@@ -465,7 +465,9 @@ def test_embeddings_no_pii(
465465 assert SPANDATA .GEN_AI_EMBEDDINGS_INPUT not in span ["data" ]
466466
467467
468- def test_exception_handling (sentry_init , capture_events ):
468+ def test_exception_handling (
469+ reset_litellm_executor , sentry_init , capture_events , get_rate_limit_model_response
470+ ):
469471 sentry_init (
470472 integrations = [LiteLLMIntegration ()],
471473 traces_sample_rate = 1.0 ,
@@ -474,19 +476,22 @@ def test_exception_handling(sentry_init, capture_events):
474476
475477 messages = [{"role" : "user" , "content" : "Hello!" }]
476478
477- with start_transaction (name = "litellm test" ):
478- kwargs = {
479- "model" : "gpt-3.5-turbo" ,
480- "messages" : messages ,
481- }
479+ client = OpenAI (api_key = "z" )
482480
483- _input_callback (kwargs )
484- _failure_callback (
485- kwargs ,
486- Exception ("API rate limit reached" ),
487- datetime .now (),
488- datetime .now (),
489- )
481+ model_response = get_rate_limit_model_response ()
482+
483+ with mock .patch .object (
484+ client .embeddings ._client ._client ,
485+ "send" ,
486+ return_value = model_response ,
487+ ):
488+ with start_transaction (name = "litellm test" ):
489+ with pytest .raises (litellm .RateLimitError ):
490+ litellm .completion (
491+ model = "gpt-3.5-turbo" ,
492+ messages = messages ,
493+ client = client ,
494+ )
490495
491496 # Should have error event and transaction
492497 assert len (events ) >= 1
0 commit comments