@@ -784,6 +784,34 @@ async def make_openai_chat_completion_request(
784784 except Exception as e :
785785 raise e
786786
787+ def make_sync_openai_chat_completion_request (
788+ self ,
789+ openai_client : OpenAI ,
790+ data : dict ,
791+ timeout : Union [float , httpx .Timeout ],
792+ ):
793+ """
794+ Helper to:
795+ - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
796+ - call chat.completions.create by default
797+ """
798+ try :
799+ if litellm .return_response_headers is True :
800+ raw_response = openai_client .chat .completions .with_raw_response .create (
801+ ** data , timeout = timeout
802+ )
803+
804+ headers = dict (raw_response .headers )
805+ response = raw_response .parse ()
806+ return headers , response
807+ else :
808+ response = openai_client .chat .completions .create (
809+ ** data , timeout = timeout
810+ )
811+ return None , response
812+ except Exception as e :
813+ raise e
814+
787815 def completion (
788816 self ,
789817 model_response : ModelResponse ,
@@ -916,7 +944,15 @@ def completion(
916944 },
917945 )
918946
919- response = openai_client .chat .completions .create (** data , timeout = timeout ) # type: ignore
947+ headers , response = (
948+ self .make_sync_openai_chat_completion_request (
949+ openai_client = openai_client ,
950+ data = data ,
951+ timeout = timeout ,
952+ )
953+ )
954+
955+ logging_obj .model_call_details ["response_headers" ] = headers
920956 stringified_response = response .model_dump ()
921957 logging_obj .post_call (
922958 input = messages ,
@@ -927,6 +963,7 @@ def completion(
927963 return convert_to_model_response_object (
928964 response_object = stringified_response ,
929965 model_response_object = model_response ,
966+ _response_headers = headers ,
930967 )
931968 except openai .UnprocessableEntityError as e :
932969 ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
@@ -1043,6 +1080,25 @@ async def acompletion(
10431080 },
10441081 )
10451082
1083+ headers , response = await self .make_openai_chat_completion_request (
1084+ openai_aclient = openai_aclient , data = data , timeout = timeout
1085+ )
1086+ stringified_response = response .model_dump ()
1087+ logging_obj .post_call (
1088+ input = data ["messages" ],
1089+ api_key = api_key ,
1090+ original_response = stringified_response ,
1091+ additional_args = {"complete_input_dict" : data },
1092+ )
1093+ logging_obj .model_call_details ["response_headers" ] = headers
1094+ return convert_to_model_response_object (
1095+ response_object = stringified_response ,
1096+ model_response_object = model_response ,
1097+ hidden_params = {"headers" : headers },
1098+ _response_headers = headers ,
1099+ )
1100+ except Exception as e :
1101+ raise e
10461102 headers , response = await self .make_openai_chat_completion_request (
10471103 openai_aclient = openai_aclient , data = data , timeout = timeout
10481104 )
@@ -1122,13 +1178,20 @@ def streaming(
11221178 "complete_input_dict" : data ,
11231179 },
11241180 )
1125- response = openai_client .chat .completions .create (** data , timeout = timeout )
1181+ headers , response = self .make_sync_openai_chat_completion_request (
1182+ openai_client = openai_client ,
1183+ data = data ,
1184+ timeout = timeout ,
1185+ )
1186+
1187+ logging_obj .model_call_details ["response_headers" ] = headers
11261188 streamwrapper = CustomStreamWrapper (
11271189 completion_stream = response ,
11281190 model = model ,
11291191 custom_llm_provider = "openai" ,
11301192 logging_obj = logging_obj ,
11311193 stream_options = data .get ("stream_options" , None ),
1194+ _response_headers = headers ,
11321195 )
11331196 return streamwrapper
11341197
@@ -1170,8 +1233,30 @@ async def async_streaming(
11701233 },
11711234 )
11721235
1236+ headers , response = await self .make_openai_chat_completion_request (
1237+ openai_aclient = openai_aclient , data = data , timeout = timeout
1238+ )
1239+ logging_obj .model_call_details ["response_headers" ] = headers
1240+ streamwrapper = CustomStreamWrapper (
1241+ completion_stream = response ,
1242+ model = model ,
1243+ custom_llm_provider = "openai" ,
1244+ logging_obj = logging_obj ,
1245+ stream_options = data .get ("stream_options" , None ),
1246+ _response_headers = headers ,
1247+ )
1248+ return streamwrapper
1249+ except (
1250+ Exception
1251+ ) as e : # need to exception handle here. async exceptions don't get caught in sync functions.
1252+ if response is not None and hasattr (response , "text" ):
1253+ raise OpenAIError (
1254+ status_code = 500 ,
1255+ message = f"{ str (e )} \n \n Original Response: { response .text } " ,
1256+
11731257 headers , response = await self .make_openai_chat_completion_request (
11741258 openai_aclient = openai_aclient , data = data , timeout = timeout
1259+
11751260 )
11761261 logging_obj .model_call_details ["response_headers" ] = headers
11771262 streamwrapper = CustomStreamWrapper (
@@ -1252,6 +1337,32 @@ async def make_openai_embedding_request(
12521337 except Exception as e :
12531338 raise e
12541339
1340+ def make_sync_openai_embedding_request (
1341+ self ,
1342+ openai_client : OpenAI ,
1343+ data : dict ,
1344+ timeout : Union [float , httpx .Timeout ],
1345+ ):
1346+ """
1347+ Helper to:
1348+ - call embeddings.create.with_raw_response when litellm.return_response_headers is True
1349+ - call embeddings.create by default
1350+ """
1351+ try :
1352+ if litellm .return_response_headers is True :
1353+ raw_response = openai_client .embeddings .with_raw_response .create (
1354+ ** data , timeout = timeout
1355+ ) # type: ignore
1356+
1357+ headers = dict (raw_response .headers )
1358+ response = raw_response .parse ()
1359+ return headers , response
1360+ else :
1361+ response = openai_client .embeddings .create (** data , timeout = timeout ) # type: ignore
1362+ return None , response
1363+ except Exception as e :
1364+ raise e
1365+
12551366 async def aembedding (
12561367 self ,
12571368 input : list ,
@@ -1286,7 +1397,12 @@ async def aembedding(
12861397 additional_args = {"complete_input_dict" : data },
12871398 original_response = stringified_response ,
12881399 )
1289- return convert_to_model_response_object (response_object = stringified_response , model_response_object = model_response , response_type = "embedding" ) # type: ignore
1400+ return convert_to_model_response_object (
1401+ response_object = stringified_response ,
1402+ model_response_object = model_response ,
1403+ response_type = "embedding" ,
1404+ _response_headers = headers ,
1405+ ) # type: ignore
12901406 except Exception as e :
12911407 ## LOGGING
12921408 logging_obj .post_call (
@@ -1347,17 +1463,26 @@ def embedding(
13471463 client = client ,
13481464 )
13491465
1350- ## COMPLETION CALL
1351- response = openai_client .embeddings .create (** data , timeout = timeout ) # type: ignore
1466+ ## embedding CALL
1467+ headers : Optional [Dict ] = None
1468+ headers , sync_embedding_response = self .make_sync_openai_embedding_request (
1469+ openai_client = openai_client , data = data , timeout = timeout
1470+ ) # type: ignore
1471+
13521472 ## LOGGING
1473+ logging_obj .model_call_details ["response_headers" ] = headers
13531474 logging_obj .post_call (
13541475 input = input ,
13551476 api_key = api_key ,
13561477 additional_args = {"complete_input_dict" : data },
1357- original_response = response ,
1478+ original_response = sync_embedding_response ,
13581479 )
1359-
1360- return convert_to_model_response_object (response_object = response .model_dump (), model_response_object = model_response , response_type = "embedding" ) # type: ignore
1480+ return convert_to_model_response_object (
1481+ response_object = sync_embedding_response .model_dump (),
1482+ model_response_object = model_response ,
1483+ _response_headers = headers ,
1484+ response_type = "embedding" ,
1485+ ) # type: ignore
13611486 except OpenAIError as e :
13621487 exception_mapping_worked = True
13631488 raise e
@@ -1520,6 +1645,33 @@ async def make_openai_audio_transcriptions_request(
15201645 except Exception as e :
15211646 raise e
15221647
1648+ def make_sync_openai_audio_transcriptions_request (
1649+ self ,
1650+ openai_client : OpenAI ,
1651+ data : dict ,
1652+ timeout : Union [float , httpx .Timeout ],
1653+ ):
1654+ """
1655+ Helper to:
1656+ - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
1657+ - call openai_aclient.audio.transcriptions.create by default
1658+ """
1659+ try :
1660+ if litellm .return_response_headers is True :
1661+ raw_response = (
1662+ openai_client .audio .transcriptions .with_raw_response .create (
1663+ ** data , timeout = timeout
1664+ )
1665+ ) # type: ignore
1666+ headers = dict (raw_response .headers )
1667+ response = raw_response .parse ()
1668+ return headers , response
1669+ else :
1670+ response = openai_client .audio .transcriptions .create (** data , timeout = timeout ) # type: ignore
1671+ return None , response
1672+ except Exception as e :
1673+ raise e
1674+
15231675 def audio_transcriptions (
15241676 self ,
15251677 model : str ,
@@ -1555,8 +1707,10 @@ def audio_transcriptions(
15551707 timeout = timeout ,
15561708 max_retries = max_retries ,
15571709 )
1558- response = openai_client .audio .transcriptions .create (
1559- ** data , timeout = timeout # type: ignore
1710+ response = self .make_sync_openai_audio_transcriptions_request (
1711+ openai_client = openai_client ,
1712+ data = data ,
1713+ timeout = timeout ,
15601714 )
15611715
15621716 if isinstance (response , BaseModel ):
0 commit comments