@@ -128,38 +128,6 @@ def __init__(
128128 self .created = 1234567890
129129
130130
131- class MockEmbeddingData :
132- def __init__ (self , embedding = None ):
133- self .embedding = embedding or [0.1 , 0.2 , 0.3 ]
134- self .index = 0
135- self .object = "embedding"
136-
137-
138- class MockEmbeddingResponse :
139- def __init__ (self , model = "text-embedding-ada-002" , data = None , usage = None ):
140- self .model = model
141- self .data = data or [MockEmbeddingData ()]
142- self .usage = usage or MockUsage (
143- prompt_tokens = 5 , completion_tokens = 0 , total_tokens = 5
144- )
145- self .object = "list"
146-
147- def model_dump (self ):
148- return {
149- "model" : self .model ,
150- "data" : [
151- {"embedding" : d .embedding , "index" : d .index , "object" : d .object }
152- for d in self .data
153- ],
154- "usage" : {
155- "prompt_tokens" : self .usage .prompt_tokens ,
156- "completion_tokens" : self .usage .completion_tokens ,
157- "total_tokens" : self .usage .total_tokens ,
158- },
159- "object" : self .object ,
160- }
161-
162-
163131@pytest .mark .parametrize (
164132 "send_default_pii, include_prompts" ,
165133 [
@@ -313,7 +281,13 @@ def test_streaming_chat_completion(
313281 assert span ["data" ][SPANDATA .GEN_AI_RESPONSE_STREAMING ] is True
314282
315283
316- def test_embeddings_create (sentry_init , capture_events , clear_litellm_cache ):
284+ def test_embeddings_create (
285+ sentry_init ,
286+ capture_events ,
287+ get_model_response ,
288+ openai_embedding_model_response ,
289+ clear_litellm_cache ,
290+ ):
317291 """
318292 Test that litellm.embedding() calls are properly instrumented.
319293
@@ -327,20 +301,24 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
327301 )
328302 events = capture_events ()
329303
330- mock_response = MockEmbeddingResponse ( )
304+ client = OpenAI ( api_key = "test-key" )
331305
332- # Mock within the test to ensure proper ordering with cache clearing
333- with mock .patch (
334- "litellm.openai_chat_completions.make_sync_openai_embedding_request"
335- ) as mock_http :
336- # The function returns (headers, response)
337- mock_http .return_value = ({}, mock_response )
306+ model_response = get_model_response (
307+ openai_embedding_model_response ,
308+ serialize_pydantic = True ,
309+ request_headers = {"X-Stainless-Raw-Response" : "true" },
310+ )
338311
312+ with mock .patch .object (
313+ client .embeddings ._client ._client ,
314+ "send" ,
315+ return_value = model_response ,
316+ ):
339317 with start_transaction (name = "litellm test" ):
340318 response = litellm .embedding (
341319 model = "text-embedding-ada-002" ,
342320 input = "Hello, world!" ,
343- api_key = "test-key" , # Provide a fake API key to avoid authentication errors
321+ client = client ,
344322 )
345323 # Allow time for callbacks to complete (they may run in separate threads)
346324 time .sleep (0.1 )
@@ -351,8 +329,13 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
351329 (event ,) = events
352330
353331 assert event ["type" ] == "transaction"
354- assert len (event ["spans" ]) == 1
355- (span ,) = event ["spans" ]
332+ spans = list (
333+ x
334+ for x in event ["spans" ]
335+ if x ["op" ] == OP .GEN_AI_EMBEDDINGS and x ["origin" ] == "auto.ai.litellm"
336+ )
337+ assert len (spans ) == 1
338+ span = spans [0 ]
356339
357340 assert span ["op" ] == OP .GEN_AI_EMBEDDINGS
358341 assert span ["description" ] == "embeddings text-embedding-ada-002"
@@ -365,7 +348,11 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
365348
366349
367350def test_embeddings_create_with_list_input (
368- sentry_init , capture_events , clear_litellm_cache
351+ sentry_init ,
352+ capture_events ,
353+ get_model_response ,
354+ openai_embedding_model_response ,
355+ clear_litellm_cache ,
369356):
370357 """Test embedding with list input."""
371358 sentry_init (
@@ -375,20 +362,24 @@ def test_embeddings_create_with_list_input(
375362 )
376363 events = capture_events ()
377364
378- mock_response = MockEmbeddingResponse ( )
365+ client = OpenAI ( api_key = "test-key" )
379366
380- # Mock within the test to ensure proper ordering with cache clearing
381- with mock .patch (
382- "litellm.openai_chat_completions.make_sync_openai_embedding_request"
383- ) as mock_http :
384- # The function returns (headers, response)
385- mock_http .return_value = ({}, mock_response )
367+ model_response = get_model_response (
368+ openai_embedding_model_response ,
369+ serialize_pydantic = True ,
370+ request_headers = {"X-Stainless-Raw-Response" : "true" },
371+ )
386372
373+ with mock .patch .object (
374+ client .embeddings ._client ._client ,
375+ "send" ,
376+ return_value = model_response ,
377+ ):
387378 with start_transaction (name = "litellm test" ):
388379 response = litellm .embedding (
389380 model = "text-embedding-ada-002" ,
390381 input = ["First text" , "Second text" , "Third text" ],
391- api_key = "test-key" , # Provide a fake API key to avoid authentication errors
382+ client = client ,
392383 )
393384 # Allow time for callbacks to complete (they may run in separate threads)
394385 time .sleep (0.1 )
@@ -399,8 +390,13 @@ def test_embeddings_create_with_list_input(
399390 (event ,) = events
400391
401392 assert event ["type" ] == "transaction"
402- assert len (event ["spans" ]) == 1
403- (span ,) = event ["spans" ]
393+ spans = list (
394+ x
395+ for x in event ["spans" ]
396+ if x ["op" ] == OP .GEN_AI_EMBEDDINGS and x ["origin" ] == "auto.ai.litellm"
397+ )
398+ assert len (spans ) == 1
399+ span = spans [0 ]
404400
405401 assert span ["op" ] == OP .GEN_AI_EMBEDDINGS
406402 assert span ["data" ][SPANDATA .GEN_AI_OPERATION_NAME ] == "embeddings"
@@ -413,7 +409,13 @@ def test_embeddings_create_with_list_input(
413409 ]
414410
415411
416- def test_embeddings_no_pii (sentry_init , capture_events , clear_litellm_cache ):
412+ def test_embeddings_no_pii (
413+ sentry_init ,
414+ capture_events ,
415+ get_model_response ,
416+ openai_embedding_model_response ,
417+ clear_litellm_cache ,
418+ ):
417419 """Test that PII is not captured when disabled."""
418420 sentry_init (
419421 integrations = [LiteLLMIntegration (include_prompts = True )],
@@ -422,20 +424,24 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
422424 )
423425 events = capture_events ()
424426
425- mock_response = MockEmbeddingResponse ( )
427+ client = OpenAI ( api_key = "test-key" )
426428
427- # Mock within the test to ensure proper ordering with cache clearing
428- with mock .patch (
429- "litellm.openai_chat_completions.make_sync_openai_embedding_request"
430- ) as mock_http :
431- # The function returns (headers, response)
432- mock_http .return_value = ({}, mock_response )
429+ model_response = get_model_response (
430+ openai_embedding_model_response ,
431+ serialize_pydantic = True ,
432+ request_headers = {"X-Stainless-Raw-Response" : "true" },
433+ )
433434
435+ with mock .patch .object (
436+ client .embeddings ._client ._client ,
437+ "send" ,
438+ return_value = model_response ,
439+ ):
434440 with start_transaction (name = "litellm test" ):
435441 response = litellm .embedding (
436442 model = "text-embedding-ada-002" ,
437443 input = "Hello, world!" ,
438- api_key = "test-key" , # Provide a fake API key to avoid authentication errors
444+ client = client ,
439445 )
440446 # Allow time for callbacks to complete (they may run in separate threads)
441447 time .sleep (0.1 )
@@ -446,8 +452,13 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
446452 (event ,) = events
447453
448454 assert event ["type" ] == "transaction"
449- assert len (event ["spans" ]) == 1
450- (span ,) = event ["spans" ]
455+ spans = list (
456+ x
457+ for x in event ["spans" ]
458+ if x ["op" ] == OP .GEN_AI_EMBEDDINGS and x ["origin" ] == "auto.ai.litellm"
459+ )
460+ assert len (spans ) == 1
461+ span = spans [0 ]
451462
452463 assert span ["op" ] == OP .GEN_AI_EMBEDDINGS
453464 # Check that embeddings input is NOT captured when PII is disabled
0 commit comments