Skip to content

Commit d0df24c

Browse files
chore(closes OPEN-8725): sanitize raw output when there are attachments
1 parent 4054b8c commit d0df24c

File tree

1 file changed

+78
-6
lines changed

1 file changed

+78
-6
lines changed

src/openlayer/lib/integrations/openai_tracer.py

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,18 @@ def handle_non_streaming_create(
398398
try:
399399
output_data = parse_non_streaming_output_data(response)
400400
processed_messages = extract_chat_completion_messages(kwargs["messages"])
401+
402+
# Check if response contains audio (to sanitize raw_output)
403+
has_audio = (
404+
hasattr(response.choices[0].message, "audio")
405+
and response.choices[0].message.audio is not None
406+
)
407+
408+
# Sanitize raw_output to remove heavy base64 data already uploaded as attachments
409+
raw_output = response.model_dump()
410+
if has_audio:
411+
raw_output = _sanitize_raw_output(raw_output, has_audio=True)
412+
401413
trace_args = create_trace_args(
402414
end_time=end_time,
403415
inputs={"prompt": processed_messages},
@@ -408,7 +420,7 @@ def handle_non_streaming_create(
408420
completion_tokens=response.usage.completion_tokens,
409421
model=response.model,
410422
model_parameters=get_model_parameters(kwargs),
411-
raw_output=response.model_dump(),
423+
raw_output=raw_output,
412424
id=inference_id,
413425
)
414426

@@ -823,6 +835,24 @@ def handle_responses_non_streaming_create(
823835
output_data = parse_responses_output_data(response)
824836
usage_data = extract_responses_usage(response)
825837

838+
# Check if response contains generated images (to sanitize raw_output)
839+
has_generated_images = False
840+
if hasattr(response, "output") and isinstance(response.output, list):
841+
has_generated_images = any(
842+
getattr(item, "type", None) == "image_generation_call"
843+
for item in response.output
844+
)
845+
846+
# Sanitize raw_output to remove heavy base64 data already uploaded as attachments
847+
if hasattr(response, "model_dump"):
848+
raw_output = response.model_dump()
849+
if has_generated_images:
850+
raw_output = _sanitize_raw_output(
851+
raw_output, has_generated_images=True
852+
)
853+
else:
854+
raw_output = str(response)
855+
826856
trace_args = create_trace_args(
827857
end_time=end_time,
828858
inputs=extract_responses_inputs(kwargs),
@@ -833,11 +863,7 @@ def handle_responses_non_streaming_create(
833863
completion_tokens=usage_data.get("completion_tokens", 0),
834864
model=getattr(response, "model", kwargs.get("model", "unknown")),
835865
model_parameters=get_responses_model_parameters(kwargs),
836-
raw_output=(
837-
response.model_dump()
838-
if hasattr(response, "model_dump")
839-
else str(response)
840-
),
866+
raw_output=raw_output,
841867
id=inference_id,
842868
)
843869

@@ -1155,6 +1181,52 @@ def get_responses_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]:
11551181
}
11561182

11571183

1184+
def _sanitize_raw_output(
1185+
raw_output: Dict[str, Any],
1186+
has_audio: bool = False,
1187+
has_generated_images: bool = False,
1188+
) -> Dict[str, Any]:
1189+
"""Remove heavy base64 data from raw_output that's already uploaded as attachments.
1190+
1191+
This prevents duplicating large binary data in the trace - the data is already
1192+
stored in blob storage via attachments, so we replace it with a placeholder.
1193+
1194+
Args:
1195+
raw_output: The raw model output dict (from response.model_dump())
1196+
has_audio: Whether the response contains audio data (Chat Completions API)
1197+
has_generated_images: Whether the response contains generated images (Responses API)
1198+
1199+
Returns:
1200+
A sanitized copy of raw_output with heavy data replaced by placeholders
1201+
"""
1202+
import copy
1203+
1204+
sanitized = copy.deepcopy(raw_output)
1205+
1206+
# Clear audio data from Chat Completions response
1207+
if has_audio:
1208+
try:
1209+
for choice in sanitized.get("choices", []):
1210+
message = choice.get("message", {})
1211+
if message and "audio" in message and message["audio"]:
1212+
if "data" in message["audio"]:
1213+
message["audio"]["data"] = "[UPLOADED_TO_STORAGE]"
1214+
except Exception as e:
1215+
logger.debug("Could not sanitize audio data from raw_output: %s", e)
1216+
1217+
# Clear image data from Responses API
1218+
if has_generated_images:
1219+
try:
1220+
for output_item in sanitized.get("output", []):
1221+
if output_item.get("type") == "image_generation_call":
1222+
if "result" in output_item:
1223+
output_item["result"] = "[UPLOADED_TO_STORAGE]"
1224+
except Exception as e:
1225+
logger.debug("Could not sanitize image data from raw_output: %s", e)
1226+
1227+
return sanitized
1228+
1229+
11581230
def parse_non_streaming_output_data(
11591231
response: "openai.types.chat.chat_completion.ChatCompletion",
11601232
) -> Union[str, List[ContentItem], Dict[str, Any], None]:

0 commit comments

Comments
 (0)