Skip to content

Commit 7992427

Browse files
areibmancursoragentalexAlex ReibmanAlex Reibman
authored
Create openai responses api test (#1156)
* Add o3 Responses API integration tests and documentation Co-authored-by: alex <alex@agentops.ai> * Remove o3 integration tests, update README, add o3 responses example Co-authored-by: alex <alex@agentops.ai> * responses fix * yaml * ruff * Fix Ruff CI failures: remove unused variables and apply formatting * auto start false * safer tool args get --------- Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: alex <alex@agentops.ai> Co-authored-by: Alex Reibman <reibs@Alexs-MBP.attlocal.net> Co-authored-by: Alex Reibman <reibs@macbookpro.lan>
1 parent 83d333e commit 7992427

File tree

11 files changed

+965
-121
lines changed

11 files changed

+965
-121
lines changed

.github/workflows/examples-integration-test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ jobs:
3636
- { path: 'examples/openai/openai_example_async.py', name: 'OpenAI Async' }
3737
- { path: 'examples/openai/multi_tool_orchestration.py', name: 'OpenAI Multi-Tool' }
3838
- { path: 'examples/openai/web_search.py', name: 'OpenAI Web Search' }
39+
- { path: 'examples/openai/o3_responses_example.py', name: 'OpenAI o3 Responses' }
3940

4041
# Anthropic examples
4142
- { path: 'examples/anthropic/anthropic-example-sync.py', name: 'Anthropic Sync' }

agentops/instrumentation/providers/openai/stream_wrapper.py

Lines changed: 306 additions & 112 deletions
Large diffs are not rendered by default.

agentops/instrumentation/providers/openai/wrappers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
handle_run_stream_attributes,
1515
handle_messages_attributes,
1616
)
17+
from agentops.instrumentation.providers.openai.wrappers.responses import handle_responses_attributes
1718

1819
__all__ = [
1920
"handle_chat_attributes",
@@ -25,4 +26,5 @@
2526
"handle_run_retrieve_attributes",
2627
"handle_run_stream_attributes",
2728
"handle_messages_attributes",
29+
"handle_responses_attributes",
2830
]
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
"""Responses API wrapper for OpenAI instrumentation.
2+
3+
This module provides attribute extraction for OpenAI Responses API endpoints.
4+
"""
5+
6+
import json
7+
import logging
8+
from typing import Any, Dict, Optional, Tuple
9+
10+
from agentops.instrumentation.providers.openai.utils import is_openai_v1
11+
from agentops.instrumentation.providers.openai.wrappers.shared import (
12+
model_as_dict,
13+
should_send_prompts,
14+
)
15+
from agentops.instrumentation.common.attributes import AttributeMap
16+
from agentops.semconv import SpanAttributes, LLMRequestTypeValues
17+
18+
logger = logging.getLogger(__name__)
19+
20+
21+
def handle_responses_attributes(
22+
args: Optional[Tuple] = None,
23+
kwargs: Optional[Dict] = None,
24+
return_value: Optional[Any] = None,
25+
) -> AttributeMap:
26+
"""Extract attributes from responses API calls."""
27+
attributes = {
28+
SpanAttributes.LLM_SYSTEM: "OpenAI",
29+
SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.CHAT.value,
30+
}
31+
32+
# Extract request attributes from kwargs
33+
if kwargs:
34+
# Model
35+
if "model" in kwargs:
36+
attributes[SpanAttributes.LLM_REQUEST_MODEL] = kwargs["model"]
37+
38+
# Request parameters
39+
if "max_tokens" in kwargs:
40+
attributes[SpanAttributes.LLM_REQUEST_MAX_TOKENS] = kwargs["max_tokens"]
41+
if "temperature" in kwargs:
42+
attributes[SpanAttributes.LLM_REQUEST_TEMPERATURE] = kwargs["temperature"]
43+
if "top_p" in kwargs:
44+
attributes[SpanAttributes.LLM_REQUEST_TOP_P] = kwargs["top_p"]
45+
if "frequency_penalty" in kwargs:
46+
attributes[SpanAttributes.LLM_REQUEST_FREQUENCY_PENALTY] = kwargs["frequency_penalty"]
47+
if "presence_penalty" in kwargs:
48+
attributes[SpanAttributes.LLM_REQUEST_PRESENCE_PENALTY] = kwargs["presence_penalty"]
49+
if "user" in kwargs:
50+
attributes[SpanAttributes.LLM_USER] = kwargs["user"]
51+
52+
# Streaming
53+
attributes[SpanAttributes.LLM_REQUEST_STREAMING] = kwargs.get("stream", False)
54+
55+
# Input messages
56+
if should_send_prompts() and "input" in kwargs:
57+
messages = kwargs["input"]
58+
for i, msg in enumerate(messages):
59+
prefix = f"{SpanAttributes.LLM_PROMPTS}.{i}"
60+
if isinstance(msg, dict):
61+
if "role" in msg:
62+
attributes[f"{prefix}.role"] = msg["role"]
63+
if "content" in msg:
64+
content = msg["content"]
65+
if isinstance(content, list):
66+
content = json.dumps(content)
67+
attributes[f"{prefix}.content"] = content
68+
69+
# Tools
70+
if "tools" in kwargs:
71+
tools = kwargs["tools"]
72+
if tools:
73+
for i, tool in enumerate(tools):
74+
if isinstance(tool, dict) and "function" in tool:
75+
function = tool["function"]
76+
prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}"
77+
if "name" in function:
78+
attributes[f"{prefix}.name"] = function["name"]
79+
if "description" in function:
80+
attributes[f"{prefix}.description"] = function["description"]
81+
if "parameters" in function:
82+
attributes[f"{prefix}.parameters"] = json.dumps(function["parameters"])
83+
84+
# Extract response attributes from return value
85+
if return_value:
86+
# Convert to dict if needed
87+
response_dict = {}
88+
if hasattr(return_value, "__dict__") and not hasattr(return_value, "__iter__"):
89+
response_dict = model_as_dict(return_value)
90+
elif isinstance(return_value, dict):
91+
response_dict = return_value
92+
elif hasattr(return_value, "model_dump"):
93+
response_dict = return_value.model_dump()
94+
95+
# Basic response attributes
96+
if "id" in response_dict:
97+
attributes[SpanAttributes.LLM_RESPONSE_ID] = response_dict["id"]
98+
if "model" in response_dict:
99+
attributes[SpanAttributes.LLM_RESPONSE_MODEL] = response_dict["model"]
100+
101+
# Usage
102+
usage = response_dict.get("usage", {})
103+
if usage:
104+
if is_openai_v1() and hasattr(usage, "__dict__"):
105+
usage = usage.__dict__
106+
if "total_tokens" in usage:
107+
attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] = usage["total_tokens"]
108+
# Responses API uses input_tokens/output_tokens instead of prompt_tokens/completion_tokens
109+
if "input_tokens" in usage:
110+
attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] = usage["input_tokens"]
111+
if "output_tokens" in usage:
112+
attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] = usage["output_tokens"]
113+
114+
# Reasoning tokens
115+
output_details = usage.get("output_tokens_details", {})
116+
if isinstance(output_details, dict) and "reasoning_tokens" in output_details:
117+
attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] = output_details["reasoning_tokens"]
118+
119+
# Output items
120+
if should_send_prompts() and "output" in response_dict:
121+
output_items = response_dict["output"]
122+
completion_idx = 0
123+
for i, output_item in enumerate(output_items):
124+
# Handle dictionary format
125+
if isinstance(output_item, dict):
126+
item_type = output_item.get("type")
127+
# Handle object format (Pydantic models)
128+
elif hasattr(output_item, "type"):
129+
item_type = output_item.type
130+
output_item_dict = model_as_dict(output_item)
131+
if output_item_dict and isinstance(output_item_dict, dict):
132+
output_item = output_item_dict
133+
else:
134+
continue
135+
else:
136+
continue
137+
138+
if item_type == "message":
139+
# Extract message content
140+
if isinstance(output_item, dict):
141+
content = output_item.get("content", [])
142+
if isinstance(content, list):
143+
# Aggregate all text content
144+
text_parts = []
145+
for content_item in content:
146+
if isinstance(content_item, dict) and content_item.get("type") == "text":
147+
text = content_item.get("text", "")
148+
if text:
149+
text_parts.append(text)
150+
if text_parts:
151+
full_text = "".join(text_parts)
152+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.content"] = full_text
153+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.role"] = "assistant"
154+
completion_idx += 1
155+
elif isinstance(content, str):
156+
# Simple string content
157+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.content"] = content
158+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.role"] = "assistant"
159+
completion_idx += 1
160+
161+
elif item_type == "function_call" and isinstance(output_item, dict):
162+
# Handle function calls
163+
# The arguments contain the actual response content for function calls
164+
args_str = output_item.get("arguments", "")
165+
if args_str:
166+
try:
167+
args = json.loads(args_str)
168+
# Extract reasoning if present (common in o3 models)
169+
reasoning = args.get("reasoning", "")
170+
if reasoning:
171+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.content"] = reasoning
172+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.role"] = "assistant"
173+
completion_idx += 1
174+
except json.JSONDecodeError:
175+
pass
176+
177+
# Also store tool call details
178+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{i}.tool_calls.0.id"] = output_item.get("id", "")
179+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{i}.tool_calls.0.name"] = output_item.get("name", "")
180+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{i}.tool_calls.0.arguments"] = args_str
181+
182+
elif item_type == "reasoning" and isinstance(output_item, dict):
183+
# Handle reasoning items (o3 models provide these)
184+
summary = output_item.get("summary", "")
185+
if summary:
186+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.content"] = summary
187+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.role"] = "assistant"
188+
attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.type"] = "reasoning"
189+
completion_idx += 1
190+
191+
return attributes

examples/agno/agno_async_operations.ipynb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
"\n",
4747
"import agentops\n",
4848
"from agno.agent import Agent\n",
49-
"from agno.team import Team\n",
5049
"from agno.models.openai import OpenAIChat"
5150
]
5251
},

examples/langgraph/langgraph_example.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"from langgraph.graph import StateGraph, END\n",
4646
"from langgraph.graph.message import add_messages\n",
4747
"from langchain_openai import ChatOpenAI\n",
48-
"from langchain_core.messages import HumanMessage, AIMessage, ToolMessage\n",
48+
"from langchain_core.messages import HumanMessage, ToolMessage\n",
4949
"from langchain_core.tools import tool\n",
5050
"import agentops\n",
5151
"from dotenv import load_dotenv\n",

examples/mem0/mem0_memory_example.ipynb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@
5555
"from mem0 import Memory, AsyncMemory\n",
5656
"import os\n",
5757
"import asyncio\n",
58-
"import logging\n",
59-
"from dotenv import load_dotenv\n",
6058
"import agentops"
6159
]
6260
},
@@ -189,7 +187,7 @@
189187
" print(f\"Delete all result: {delete_all_result}\")\n",
190188
"\n",
191189
" agentops.end_trace(end_state=\"success\")\n",
192-
" except Exception as e:\n",
190+
" except Exception:\n",
193191
" agentops.end_trace(end_state=\"error\")"
194192
]
195193
},
@@ -263,7 +261,7 @@
263261
"\n",
264262
" agentops.end_trace(end_state=\"success\")\n",
265263
"\n",
266-
" except Exception as e:\n",
264+
" except Exception:\n",
267265
" agentops.end_trace(end_state=\"error\")"
268266
]
269267
},

examples/mem0/mem0_memoryclient_example.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@
199199
" delete_all_result = client.delete_all(user_id=user_id)\n",
200200
" print(f\"Delete all result: {delete_all_result}\")\n",
201201
" agentops.end_trace(end_state=\"success\")\n",
202-
" except Exception as e:\n",
202+
" except Exception:\n",
203203
" agentops.end_trace(end_state=\"error\")"
204204
]
205205
},
@@ -279,7 +279,7 @@
279279
"\n",
280280
" agentops.end_trace(end_state=\"success\")\n",
281281
"\n",
282-
" except Exception as e:\n",
282+
" except Exception:\n",
283283
" agentops.end_trace(end_state=\"error\")"
284284
]
285285
},

examples/openai/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ Example: `web_search`
3939
This example demonstrates:
4040
- Web search functionality
4141

42+
### 5. o3 Responses API
43+
44+
Example: `o3_responses_example`
45+
46+
This example demonstrates:
47+
- OpenAI's o3 reasoning model with the Responses API
48+
- Tool calls and structured reasoning
49+
- Complex decision-making scenarios
50+
- AgentOps integration with reasoning models
51+
4252
## AgentOps Integration
4353

4454
These examples show how to use AgentOps to monitor and analyze your AI applications. AgentOps automatically instruments your OpenAI calls to provide insights into performance, usage patterns, and model behavior.

0 commit comments

Comments
 (0)