Skip to content

Commit f465307

Browse files
test(langchain): Add tool execution test (#5739)
Add test for LangChain v1.0 functionality using sample Responses API output with a tool call request reused from `openai-agents` tests.
1 parent b1f8a42 commit f465307

File tree

3 files changed

+316
-129
lines changed

3 files changed

+316
-129
lines changed

tests/conftest.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,65 @@ def nonstreaming_responses_model_response():
11021102
)
11031103

11041104

1105+
@pytest.fixture
1106+
def responses_tool_call_model_responses():
1107+
def inner(
1108+
tool_name: str,
1109+
arguments: str,
1110+
response_model: str,
1111+
response_text: str,
1112+
response_ids: "Iterator[str]",
1113+
usages: "Iterator[openai.types.responses.ResponseUsage]",
1114+
):
1115+
yield openai.types.responses.Response(
1116+
id=next(response_ids),
1117+
output=[
1118+
openai.types.responses.ResponseFunctionToolCall(
1119+
id="call_123",
1120+
call_id="call_123",
1121+
name=tool_name,
1122+
type="function_call",
1123+
arguments=arguments,
1124+
)
1125+
],
1126+
parallel_tool_calls=False,
1127+
tool_choice="none",
1128+
tools=[],
1129+
created_at=10000000,
1130+
model=response_model,
1131+
object="response",
1132+
usage=next(usages),
1133+
)
1134+
1135+
yield openai.types.responses.Response(
1136+
id=next(response_ids),
1137+
output=[
1138+
openai.types.responses.ResponseOutputMessage(
1139+
id="msg_final",
1140+
type="message",
1141+
status="completed",
1142+
content=[
1143+
openai.types.responses.ResponseOutputText(
1144+
text=response_text,
1145+
type="output_text",
1146+
annotations=[],
1147+
)
1148+
],
1149+
role="assistant",
1150+
)
1151+
],
1152+
parallel_tool_calls=False,
1153+
tool_choice="none",
1154+
tools=[],
1155+
created_at=10000000,
1156+
model=response_model,
1157+
object="response",
1158+
usage=next(usages),
1159+
)
1160+
1161+
return inner
1162+
1163+
11051164
class MockServerRequestHandler(BaseHTTPRequestHandler):
11061165
def do_GET(self): # noqa: N802
11071166
# Process an HTTP GET request and return a response.

tests/integrations/langchain/test_langchain.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@
5454
CompletionUsage,
5555
)
5656

57+
from openai.types.responses import (
58+
ResponseUsage,
59+
)
60+
from openai.types.responses.response_usage import (
61+
InputTokensDetails,
62+
OutputTokensDetails,
63+
)
64+
5765
LANGCHAIN_VERSION = package_version("langchain")
5866

5967

@@ -206,6 +214,175 @@ def test_langchain_create_agent(
206214
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
207215

208216

217+
@pytest.mark.skipif(
218+
LANGCHAIN_VERSION < (1,),
219+
reason="LangChain 1.0+ required (ONE AGENT refactor)",
220+
)
221+
@pytest.mark.parametrize(
222+
"send_default_pii, include_prompts",
223+
[
224+
(True, True),
225+
(True, False),
226+
(False, True),
227+
(False, False),
228+
],
229+
)
230+
def test_tool_execution_span(
231+
sentry_init,
232+
capture_events,
233+
send_default_pii,
234+
include_prompts,
235+
get_model_response,
236+
responses_tool_call_model_responses,
237+
):
238+
sentry_init(
239+
integrations=[
240+
LangchainIntegration(
241+
include_prompts=include_prompts,
242+
)
243+
],
244+
traces_sample_rate=1.0,
245+
send_default_pii=send_default_pii,
246+
)
247+
events = capture_events()
248+
249+
responses = responses_tool_call_model_responses(
250+
tool_name="get_word_length",
251+
arguments='{"word": "eudca"}',
252+
response_model="gpt-4-0613",
253+
response_text="The word eudca has 5 letters.",
254+
response_ids=iter(["resp_1", "resp_2"]),
255+
usages=iter(
256+
[
257+
ResponseUsage(
258+
input_tokens=142,
259+
input_tokens_details=InputTokensDetails(
260+
cached_tokens=0,
261+
),
262+
output_tokens=50,
263+
output_tokens_details=OutputTokensDetails(
264+
reasoning_tokens=0,
265+
),
266+
total_tokens=192,
267+
),
268+
ResponseUsage(
269+
input_tokens=89,
270+
input_tokens_details=InputTokensDetails(
271+
cached_tokens=0,
272+
),
273+
output_tokens=28,
274+
output_tokens_details=OutputTokensDetails(
275+
reasoning_tokens=0,
276+
),
277+
total_tokens=117,
278+
),
279+
]
280+
),
281+
)
282+
tool_response = get_model_response(
283+
next(responses),
284+
serialize_pydantic=True,
285+
request_headers={
286+
"X-Stainless-Raw-Response": "True",
287+
},
288+
)
289+
final_response = get_model_response(
290+
next(responses),
291+
serialize_pydantic=True,
292+
request_headers={
293+
"X-Stainless-Raw-Response": "True",
294+
},
295+
)
296+
297+
llm = ChatOpenAI(
298+
model_name="gpt-4",
299+
temperature=0,
300+
openai_api_key="badkey",
301+
use_responses_api=True,
302+
)
303+
agent = create_agent(
304+
model=llm,
305+
tools=[get_word_length],
306+
name="word_length_agent",
307+
)
308+
309+
with patch.object(
310+
llm.client._client._client,
311+
"send",
312+
side_effect=[tool_response, final_response],
313+
) as _:
314+
with start_transaction():
315+
agent.invoke(
316+
{
317+
"messages": [
318+
HumanMessage(content="How many letters in the word eudca"),
319+
],
320+
},
321+
)
322+
323+
tx = events[0]
324+
assert tx["type"] == "transaction"
325+
assert tx["contexts"]["trace"]["origin"] == "manual"
326+
327+
chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
328+
assert len(chat_spans) == 2
329+
330+
tool_exec_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.execute_tool")
331+
assert len(tool_exec_spans) == 1
332+
tool_exec_span = tool_exec_spans[0]
333+
334+
assert chat_spans[0]["origin"] == "auto.ai.langchain"
335+
assert chat_spans[1]["origin"] == "auto.ai.langchain"
336+
assert tool_exec_span["origin"] == "auto.ai.langchain"
337+
338+
assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
339+
assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
340+
assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
341+
342+
assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
343+
assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
344+
assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
345+
346+
if send_default_pii and include_prompts:
347+
assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
348+
349+
assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
350+
351+
# Verify tool calls are recorded when PII is enabled
352+
assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_spans[0].get("data", {}), (
353+
"Tool calls should be recorded when send_default_pii=True and include_prompts=True"
354+
)
355+
tool_calls_data = chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
356+
assert isinstance(tool_calls_data, str)
357+
assert "get_word_length" in tool_calls_data
358+
else:
359+
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
360+
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
361+
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
362+
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
363+
assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
364+
assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
365+
366+
# Verify tool calls are NOT recorded when PII is disabled
367+
assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
368+
"data", {}
369+
), (
370+
f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
371+
f"and include_prompts={include_prompts}"
372+
)
373+
assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[1].get(
374+
"data", {}
375+
), (
376+
f"Tool calls should NOT be recorded when send_default_pii={send_default_pii} "
377+
f"and include_prompts={include_prompts}"
378+
)
379+
380+
# Verify that available tools are always recorded regardless of PII settings
381+
for chat_span in chat_spans:
382+
tools_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS]
383+
assert "get_word_length" in tools_data
384+
385+
209386
@pytest.mark.parametrize(
210387
"send_default_pii, include_prompts",
211388
[

0 commit comments

Comments
 (0)