Skip to content

Commit 40a5b93

Browse files
authored
tests(ai): Fix streaming+truncation integration tests across AI integrations (#20326)
Openai span streaming tests (with truncation enabled) were failing. This fixes that. Anthropic and google-genai were using separate scenario files for these tests, which seems like the better pattern in this case. So this aligns the integration tests for the remaining gen_ai integrations. Closes #20322 Closes #20321 Closes #20323 Closes #20320
1 parent 068305b commit 40a5b93

File tree

10 files changed

+214
-28
lines changed

10 files changed

+214
-28
lines changed

dev-packages/node-integration-tests/suites/tracing/anthropic/scenario-streaming.mjs renamed to dev-packages/node-integration-tests/suites/tracing/anthropic/scenario-span-streaming.mjs

File renamed without changes.

dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -847,7 +847,7 @@ describe('Anthropic integration', () => {
847847

848848
const streamingLongContent = 'A'.repeat(50_000);
849849

850-
createEsmAndCjsTests(__dirname, 'scenario-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
850+
createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
851851
test('automatically disables truncation when span streaming is enabled', async () => {
852852
await createRunner()
853853
.expect({
@@ -867,7 +867,7 @@ describe('Anthropic integration', () => {
867867

868868
createEsmAndCjsTests(
869869
__dirname,
870-
'scenario-streaming.mjs',
870+
'scenario-span-streaming.mjs',
871871
'instrument-streaming-with-truncation.mjs',
872872
(createRunner, test) => {
873873
test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { ChatAnthropic } from '@langchain/anthropic';
2+
import * as Sentry from '@sentry/node';
3+
import express from 'express';
4+
5+
function startMockAnthropicServer() {
6+
const app = express();
7+
app.use(express.json({ limit: '10mb' }));
8+
9+
app.post('/v1/messages', (req, res) => {
10+
res.json({
11+
id: 'msg_span_streaming_test',
12+
type: 'message',
13+
role: 'assistant',
14+
content: [{ type: 'text', text: 'Response' }],
15+
model: req.body.model,
16+
stop_reason: 'end_turn',
17+
stop_sequence: null,
18+
usage: { input_tokens: 10, output_tokens: 5 },
19+
});
20+
});
21+
22+
return new Promise(resolve => {
23+
const server = app.listen(0, () => {
24+
resolve(server);
25+
});
26+
});
27+
}
28+
29+
async function run() {
30+
const server = await startMockAnthropicServer();
31+
const baseUrl = `http://localhost:${server.address().port}`;
32+
33+
await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
34+
const model = new ChatAnthropic({
35+
model: 'claude-3-5-sonnet-20241022',
36+
apiKey: 'mock-api-key',
37+
clientOptions: {
38+
baseURL: baseUrl,
39+
},
40+
});
41+
42+
// Single long message so truncation must crop it
43+
const longContent = 'A'.repeat(50_000);
44+
await model.invoke([{ role: 'user', content: longContent }]);
45+
});
46+
47+
await Sentry.flush(2000);
48+
49+
server.close();
50+
}
51+
52+
run();

dev-packages/node-integration-tests/suites/tracing/langchain/test.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,7 @@ describe('LangChain integration', () => {
585585

586586
const streamingLongContent = 'A'.repeat(50_000);
587587

588-
createEsmAndCjsTests(__dirname, 'scenario-no-truncation.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
588+
createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
589589
test('automatically disables truncation when span streaming is enabled', async () => {
590590
await createRunner()
591591
.expect({
@@ -605,7 +605,7 @@ describe('LangChain integration', () => {
605605

606606
createEsmAndCjsTests(
607607
__dirname,
608-
'scenario-no-truncation.mjs',
608+
'scenario-span-streaming.mjs',
609609
'instrument-streaming-with-truncation.mjs',
610610
(createRunner, test) => {
611611
test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -614,13 +614,14 @@ describe('LangChain integration', () => {
614614
span: container => {
615615
const spans = container.items;
616616

617-
// With explicit enableTruncation: true, truncation keeps only the last message
618-
// and drops the long content. The result should NOT contain the full 50k 'A' string.
617+
// With explicit enableTruncation: true, content should be truncated despite streaming.
619618
const chatSpan = spans.find(s =>
620-
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.includes('Follow-up question'),
619+
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
621620
);
622621
expect(chatSpan).toBeDefined();
623-
expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value).not.toContain(streamingLongContent);
622+
expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
623+
streamingLongContent.length,
624+
);
624625
},
625626
})
626627
.start()
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { END, MessagesAnnotation, START, StateGraph } from '@langchain/langgraph';
2+
import * as Sentry from '@sentry/node';
3+
4+
async function run() {
5+
await Sentry.startSpan({ op: 'function', name: 'langgraph-test' }, async () => {
6+
const mockLlm = () => {
7+
return {
8+
messages: [
9+
{
10+
role: 'assistant',
11+
content: 'Mock LLM response',
12+
response_metadata: {
13+
model_name: 'mock-model',
14+
finish_reason: 'stop',
15+
tokenUsage: {
16+
promptTokens: 20,
17+
completionTokens: 10,
18+
totalTokens: 30,
19+
},
20+
},
21+
},
22+
],
23+
};
24+
};
25+
26+
const graph = new StateGraph(MessagesAnnotation)
27+
.addNode('agent', mockLlm)
28+
.addEdge(START, 'agent')
29+
.addEdge('agent', END)
30+
.compile({ name: 'weather_assistant' });
31+
32+
// Single long message so truncation must crop it
33+
const longContent = 'A'.repeat(50_000);
34+
await graph.invoke({
35+
messages: [{ role: 'user', content: longContent }],
36+
});
37+
});
38+
39+
await Sentry.flush(2000);
40+
}
41+
42+
run();

dev-packages/node-integration-tests/suites/tracing/langgraph/test.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ describe('LangGraph integration', () => {
401401

402402
const streamingLongContent = 'A'.repeat(50_000);
403403

404-
createEsmAndCjsTests(__dirname, 'scenario-no-truncation.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
404+
createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
405405
test('automatically disables truncation when span streaming is enabled', async () => {
406406
await createRunner()
407407
.expect({
@@ -421,7 +421,7 @@ describe('LangGraph integration', () => {
421421

422422
createEsmAndCjsTests(
423423
__dirname,
424-
'scenario-no-truncation.mjs',
424+
'scenario-span-streaming.mjs',
425425
'instrument-streaming-with-truncation.mjs',
426426
(createRunner, test) => {
427427
test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -430,13 +430,14 @@ describe('LangGraph integration', () => {
430430
span: container => {
431431
const spans = container.items;
432432

433-
// With explicit enableTruncation: true, truncation keeps only the last message
434-
// and drops the long content. The result should NOT contain the full 50k 'A' string.
433+
// With explicit enableTruncation: true, content should be truncated despite streaming.
435434
const chatSpan = spans.find(s =>
436-
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.includes('Follow-up question'),
435+
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
437436
);
438437
expect(chatSpan).toBeDefined();
439-
expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value).not.toContain(streamingLongContent);
438+
expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
439+
streamingLongContent.length,
440+
);
440441
},
441442
})
442443
.start()
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import * as Sentry from '@sentry/node';
2+
import express from 'express';
3+
import OpenAI from 'openai';
4+
5+
function startMockServer() {
6+
const app = express();
7+
app.use(express.json({ limit: '10mb' }));
8+
9+
app.post('/openai/chat/completions', (req, res) => {
10+
res.send({
11+
id: 'chatcmpl-mock123',
12+
object: 'chat.completion',
13+
created: 1677652288,
14+
model: req.body.model,
15+
choices: [
16+
{
17+
index: 0,
18+
message: { role: 'assistant', content: 'Hello!' },
19+
finish_reason: 'stop',
20+
},
21+
],
22+
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
23+
});
24+
});
25+
26+
app.post('/openai/responses', (req, res) => {
27+
res.send({
28+
id: 'resp_mock456',
29+
object: 'response',
30+
created_at: 1677652290,
31+
model: req.body.model,
32+
output: [
33+
{
34+
type: 'message',
35+
id: 'msg_mock_output_1',
36+
status: 'completed',
37+
role: 'assistant',
38+
content: [{ type: 'output_text', text: 'Response text', annotations: [] }],
39+
},
40+
],
41+
output_text: 'Response text',
42+
status: 'completed',
43+
usage: { input_tokens: 5, output_tokens: 3, total_tokens: 8 },
44+
});
45+
});
46+
47+
return new Promise(resolve => {
48+
const server = app.listen(0, () => {
49+
resolve(server);
50+
});
51+
});
52+
}
53+
54+
async function run() {
55+
const server = await startMockServer();
56+
57+
await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
58+
const client = new OpenAI({
59+
baseURL: `http://localhost:${server.address().port}/openai`,
60+
apiKey: 'mock-api-key',
61+
});
62+
63+
// Single long message for chat completions
64+
const longContent = 'A'.repeat(50_000);
65+
await client.chat.completions.create({
66+
model: 'gpt-4',
67+
messages: [{ role: 'user', content: longContent }],
68+
});
69+
70+
// Responses API with long string input
71+
const longStringInput = 'B'.repeat(50_000);
72+
await client.responses.create({
73+
model: 'gpt-4',
74+
input: longStringInput,
75+
});
76+
});
77+
78+
// Flush is required when span streaming is enabled to ensure streamed spans are sent before the process exits
79+
await Sentry.flush();
80+
server.close();
81+
}
82+
83+
run();

dev-packages/node-integration-tests/suites/tracing/openai/test.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ describe('OpenAI integration', () => {
10271027
const streamingLongContent = 'A'.repeat(50_000);
10281028
const streamingLongString = 'B'.repeat(50_000);
10291029

1030-
createEsmAndCjsTests(__dirname, 'scenario-no-truncation.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
1030+
createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
10311031
test('automatically disables truncation when span streaming is enabled', async () => {
10321032
await createRunner()
10331033
.expect({
@@ -1052,7 +1052,7 @@ describe('OpenAI integration', () => {
10521052

10531053
createEsmAndCjsTests(
10541054
__dirname,
1055-
'scenario-no-truncation.mjs',
1055+
'scenario-span-streaming.mjs',
10561056
'instrument-streaming-with-truncation.mjs',
10571057
(createRunner, test) => {
10581058
test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -1062,14 +1062,23 @@ describe('OpenAI integration', () => {
10621062
const spans = container.items;
10631063

10641064
// With explicit enableTruncation: true, content should be truncated despite streaming.
1065-
// Find the chat span by matching the start of the truncated content (the 'A' repeated messages).
1065+
// Truncation keeps only the last message (50k 'A's) and crops it to the byte limit.
10661066
const chatSpan = spans.find(s =>
10671067
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
10681068
);
10691069
expect(chatSpan).toBeDefined();
10701070
expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
10711071
streamingLongContent.length,
10721072
);
1073+
1074+
// The responses API string input (50k 'B's) should also be truncated.
1075+
const responsesSpan = spans.find(s =>
1076+
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('BBB'),
1077+
);
1078+
expect(responsesSpan).toBeDefined();
1079+
expect(responsesSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
1080+
streamingLongString.length,
1081+
);
10731082
},
10741083
})
10751084
.start()

dev-packages/node-integration-tests/suites/tracing/vercelai/scenario-streaming.mjs renamed to dev-packages/node-integration-tests/suites/tracing/vercelai/scenario-span-streaming.mjs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { MockLanguageModelV1 } from 'ai/test';
44

55
async function run() {
66
await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
7+
// Single long message so truncation must crop it
78
const longContent = 'A'.repeat(50_000);
89
await generateText({
910
experimental_telemetry: { isEnabled: true },
@@ -15,11 +16,7 @@ async function run() {
1516
text: 'Response',
1617
}),
1718
}),
18-
messages: [
19-
{ role: 'user', content: longContent },
20-
{ role: 'assistant', content: 'Some reply' },
21-
{ role: 'user', content: 'Follow-up question' },
22-
],
19+
messages: [{ role: 'user', content: longContent }],
2320
});
2421
});
2522

dev-packages/node-integration-tests/suites/tracing/vercelai/test.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,7 @@ describe('Vercel AI integration', () => {
986986

987987
const streamingLongContent = 'A'.repeat(50_000);
988988

989-
createEsmAndCjsTests(__dirname, 'scenario-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
989+
createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
990990
test('automatically disables truncation when span streaming is enabled', async () => {
991991
await createRunner()
992992
.expect({
@@ -1006,7 +1006,7 @@ describe('Vercel AI integration', () => {
10061006

10071007
createEsmAndCjsTests(
10081008
__dirname,
1009-
'scenario-streaming.mjs',
1009+
'scenario-span-streaming.mjs',
10101010
'instrument-streaming-with-truncation.mjs',
10111011
(createRunner, test) => {
10121012
test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -1015,13 +1015,14 @@ describe('Vercel AI integration', () => {
10151015
span: container => {
10161016
const spans = container.items;
10171017

1018-
// With explicit enableTruncation: true, truncation keeps only the last message
1019-
// and drops the long content. The result should NOT contain the full 50k 'A' string.
1018+
// With explicit enableTruncation: true, content should be truncated despite streaming.
10201019
const chatSpan = spans.find(s =>
1021-
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.includes('Follow-up question'),
1020+
s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
10221021
);
10231022
expect(chatSpan).toBeDefined();
1024-
expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value).not.toContain(streamingLongContent);
1023+
expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
1024+
streamingLongContent.length,
1025+
);
10251026
},
10261027
})
10271028
.start()

0 commit comments

Comments
 (0)