Skip to content

Commit ce29e47

Browse files
committed
fix(integrations): OpenAI input messages are now being converted to the schema we expect for the gen_ai.request.messages
1 parent 3d3ce5b commit ce29e47

File tree

1 file changed

+65
-1
lines changed

1 file changed

+65
-1
lines changed

sentry_sdk/integrations/openai.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
safe_serialize,
1919
)
2020

21-
from typing import TYPE_CHECKING
21+
from typing import TYPE_CHECKING, Dict
2222

2323
if TYPE_CHECKING:
2424
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
@@ -177,6 +177,68 @@ def _calculate_token_usage(
177177
)
178178

179179

180+
def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
181+
"""
182+
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
183+
e.g:
184+
{
185+
"role": "user",
186+
"content": [
187+
{
188+
"text": "How many ponies do you see in the image?",
189+
"type": "text"
190+
},
191+
{
192+
"type": "image_url",
193+
"image_url": {
194+
"url": "data:image/jpeg;base64,...",
195+
"detail": "high"
196+
}
197+
}
198+
]
199+
}
200+
becomes:
201+
{
202+
"role": "user",
203+
"content": [
204+
{
205+
"text": "How many ponies do you see in the image?",
206+
"type": "text"
207+
},
208+
{
209+
"type": "blob",
210+
"modality": "image",
211+
"mime_type": "image/jpeg",
212+
"content": "data:image/jpeg;base64,..."
213+
}
214+
]
215+
}
216+
"""
217+
218+
def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
219+
if item.get("type") == "image_url":
220+
image_url = item.get("image_url") or {}
221+
if image_url.get("url", "").startswith("data:"):
222+
return {
223+
"type": "blob",
224+
"modality": "image",
225+
"mime_type": item["image_url"]["url"].split(";base64,")[0],
226+
"content": item["image_url"]["url"].split(";base64,")[1],
227+
}
228+
else:
229+
return {
230+
"type": "uri",
231+
"uri": item["image_url"]["url"],
232+
}
233+
return item
234+
235+
for message in messages:
236+
content = message.get("content")
237+
if isinstance(content, list):
238+
message["content"] = [_map_item(item) for item in content]
239+
return messages
240+
241+
180242
def _set_input_data(
181243
span: "Span",
182244
kwargs: "dict[str, Any]",
@@ -198,6 +260,8 @@ def _set_input_data(
198260
and integration.include_prompts
199261
):
200262
normalized_messages = normalize_message_roles(messages)
263+
normalized_messages = _convert_message_parts(normalized_messages)
264+
201265
scope = sentry_sdk.get_current_scope()
202266
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
203267
if messages_data is not None:

0 commit comments

Comments
 (0)