tests(ai): Fix streaming+truncation integration tests across AI integrations (#20326)

nicohrubec · web-flow · commit 40a5b93c986d · 2026-04-15T14:12:09.000+02:00
Openai span streaming tests (with truncation enabled) were failing. This fixes that. Anthropic and google-genai were using separate scenario files for these tests, which seems like the better pattern in this case. So this aligns the integration tests for the remaining gen_ai integrations. Closes #20322 Closes #20321 Closes #20323 Closes #20320
diff --git a/dev-packages/node-integration-tests/suites/tracing/anthropic/scenario-span-streaming.mjs b/dev-packages/node-integration-tests/suites/tracing/anthropic/scenario-span-streaming.mjs
diff --git a/dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts b/dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts
@@ -847,7 +847,7 @@ describe('Anthropic integration', () => {
 
   const streamingLongContent = 'A'.repeat(50_000);
 
-  createEsmAndCjsTests(__dirname, 'scenario-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
+  createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
     test('automatically disables truncation when span streaming is enabled', async () => {
       await createRunner()
         .expect({
@@ -867,7 +867,7 @@ describe('Anthropic integration', () => {
 
   createEsmAndCjsTests(
     __dirname,
-    'scenario-streaming.mjs',
+    'scenario-span-streaming.mjs',
     'instrument-streaming-with-truncation.mjs',
     (createRunner, test) => {
       test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
diff --git a/dev-packages/node-integration-tests/suites/tracing/langchain/scenario-span-streaming.mjs b/dev-packages/node-integration-tests/suites/tracing/langchain/scenario-span-streaming.mjs
@@ -0,0 +1,52 @@
+import { ChatAnthropic } from '@langchain/anthropic';
+import * as Sentry from '@sentry/node';
+import express from 'express';
+
+function startMockAnthropicServer() {
+  const app = express();
+  app.use(express.json({ limit: '10mb' }));
+
+  app.post('/v1/messages', (req, res) => {
+    res.json({
+      id: 'msg_span_streaming_test',
+      type: 'message',
+      role: 'assistant',
+      content: [{ type: 'text', text: 'Response' }],
+      model: req.body.model,
+      stop_reason: 'end_turn',
+      stop_sequence: null,
+      usage: { input_tokens: 10, output_tokens: 5 },
+    });
+  });
+
+  return new Promise(resolve => {
+    const server = app.listen(0, () => {
+      resolve(server);
+    });
+  });
+}
+
+async function run() {
+  const server = await startMockAnthropicServer();
+  const baseUrl = `http://localhost:${server.address().port}`;
+
+  await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
+    const model = new ChatAnthropic({
+      model: 'claude-3-5-sonnet-20241022',
+      apiKey: 'mock-api-key',
+      clientOptions: {
+        baseURL: baseUrl,
+      },
+    });
+
+    // Single long message so truncation must crop it
+    const longContent = 'A'.repeat(50_000);
+    await model.invoke([{ role: 'user', content: longContent }]);
+  });
+
+  await Sentry.flush(2000);
+
+  server.close();
+}
+
+run();
diff --git a/dev-packages/node-integration-tests/suites/tracing/langchain/test.ts b/dev-packages/node-integration-tests/suites/tracing/langchain/test.ts
@@ -585,7 +585,7 @@ describe('LangChain integration', () => {
 
   const streamingLongContent = 'A'.repeat(50_000);
 
-  createEsmAndCjsTests(__dirname, 'scenario-no-truncation.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
+  createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
     test('automatically disables truncation when span streaming is enabled', async () => {
       await createRunner()
         .expect({
@@ -605,7 +605,7 @@ describe('LangChain integration', () => {
 
   createEsmAndCjsTests(
     __dirname,
-    'scenario-no-truncation.mjs',
+    'scenario-span-streaming.mjs',
     'instrument-streaming-with-truncation.mjs',
     (createRunner, test) => {
       test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -614,13 +614,14 @@ describe('LangChain integration', () => {
             span: container => {
               const spans = container.items;
 
-              // With explicit enableTruncation: true, truncation keeps only the last message
-              // and drops the long content. The result should NOT contain the full 50k 'A' string.
+              // With explicit enableTruncation: true, content should be truncated despite streaming.
               const chatSpan = spans.find(s =>
-                s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.includes('Follow-up question'),
+                s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
               );
               expect(chatSpan).toBeDefined();
-              expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value).not.toContain(streamingLongContent);
+              expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
+                streamingLongContent.length,
+              );
             },
           })
           .start()
diff --git a/dev-packages/node-integration-tests/suites/tracing/langgraph/scenario-span-streaming.mjs b/dev-packages/node-integration-tests/suites/tracing/langgraph/scenario-span-streaming.mjs
@@ -0,0 +1,42 @@
+import { END, MessagesAnnotation, START, StateGraph } from '@langchain/langgraph';
+import * as Sentry from '@sentry/node';
+
+async function run() {
+  await Sentry.startSpan({ op: 'function', name: 'langgraph-test' }, async () => {
+    const mockLlm = () => {
+      return {
+        messages: [
+          {
+            role: 'assistant',
+            content: 'Mock LLM response',
+            response_metadata: {
+              model_name: 'mock-model',
+              finish_reason: 'stop',
+              tokenUsage: {
+                promptTokens: 20,
+                completionTokens: 10,
+                totalTokens: 30,
+              },
+            },
+          },
+        ],
+      };
+    };
+
+    const graph = new StateGraph(MessagesAnnotation)
+      .addNode('agent', mockLlm)
+      .addEdge(START, 'agent')
+      .addEdge('agent', END)
+      .compile({ name: 'weather_assistant' });
+
+    // Single long message so truncation must crop it
+    const longContent = 'A'.repeat(50_000);
+    await graph.invoke({
+      messages: [{ role: 'user', content: longContent }],
+    });
+  });
+
+  await Sentry.flush(2000);
+}
+
+run();
diff --git a/dev-packages/node-integration-tests/suites/tracing/langgraph/test.ts b/dev-packages/node-integration-tests/suites/tracing/langgraph/test.ts
@@ -401,7 +401,7 @@ describe('LangGraph integration', () => {
 
   const streamingLongContent = 'A'.repeat(50_000);
 
-  createEsmAndCjsTests(__dirname, 'scenario-no-truncation.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
+  createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
     test('automatically disables truncation when span streaming is enabled', async () => {
       await createRunner()
         .expect({
@@ -421,7 +421,7 @@ describe('LangGraph integration', () => {
 
   createEsmAndCjsTests(
     __dirname,
-    'scenario-no-truncation.mjs',
+    'scenario-span-streaming.mjs',
     'instrument-streaming-with-truncation.mjs',
     (createRunner, test) => {
       test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -430,13 +430,14 @@ describe('LangGraph integration', () => {
             span: container => {
               const spans = container.items;
 
-              // With explicit enableTruncation: true, truncation keeps only the last message
-              // and drops the long content. The result should NOT contain the full 50k 'A' string.
+              // With explicit enableTruncation: true, content should be truncated despite streaming.
               const chatSpan = spans.find(s =>
-                s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.includes('Follow-up question'),
+                s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
               );
               expect(chatSpan).toBeDefined();
-              expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value).not.toContain(streamingLongContent);
+              expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
+                streamingLongContent.length,
+              );
             },
           })
           .start()
diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/scenario-span-streaming.mjs b/dev-packages/node-integration-tests/suites/tracing/openai/scenario-span-streaming.mjs
@@ -0,0 +1,83 @@
+import * as Sentry from '@sentry/node';
+import express from 'express';
+import OpenAI from 'openai';
+
+function startMockServer() {
+  const app = express();
+  app.use(express.json({ limit: '10mb' }));
+
+  app.post('/openai/chat/completions', (req, res) => {
+    res.send({
+      id: 'chatcmpl-mock123',
+      object: 'chat.completion',
+      created: 1677652288,
+      model: req.body.model,
+      choices: [
+        {
+          index: 0,
+          message: { role: 'assistant', content: 'Hello!' },
+          finish_reason: 'stop',
+        },
+      ],
+      usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+    });
+  });
+
+  app.post('/openai/responses', (req, res) => {
+    res.send({
+      id: 'resp_mock456',
+      object: 'response',
+      created_at: 1677652290,
+      model: req.body.model,
+      output: [
+        {
+          type: 'message',
+          id: 'msg_mock_output_1',
+          status: 'completed',
+          role: 'assistant',
+          content: [{ type: 'output_text', text: 'Response text', annotations: [] }],
+        },
+      ],
+      output_text: 'Response text',
+      status: 'completed',
+      usage: { input_tokens: 5, output_tokens: 3, total_tokens: 8 },
+    });
+  });
+
+  return new Promise(resolve => {
+    const server = app.listen(0, () => {
+      resolve(server);
+    });
+  });
+}
+
+async function run() {
+  const server = await startMockServer();
+
+  await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
+    const client = new OpenAI({
+      baseURL: `http://localhost:${server.address().port}/openai`,
+      apiKey: 'mock-api-key',
+    });
+
+    // Single long message for chat completions
+    const longContent = 'A'.repeat(50_000);
+    await client.chat.completions.create({
+      model: 'gpt-4',
+      messages: [{ role: 'user', content: longContent }],
+    });
+
+    // Responses API with long string input
+    const longStringInput = 'B'.repeat(50_000);
+    await client.responses.create({
+      model: 'gpt-4',
+      input: longStringInput,
+    });
+  });
+
+  // Flush is required when span streaming is enabled to ensure streamed spans are sent before the process exits
+  await Sentry.flush();
+  server.close();
+}
+
+run();
diff --git a/dev-packages/node-integration-tests/suites/tracing/openai/test.ts b/dev-packages/node-integration-tests/suites/tracing/openai/test.ts
@@ -1027,7 +1027,7 @@ describe('OpenAI integration', () => {
   const streamingLongContent = 'A'.repeat(50_000);
   const streamingLongString = 'B'.repeat(50_000);
 
-  createEsmAndCjsTests(__dirname, 'scenario-no-truncation.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
+  createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
     test('automatically disables truncation when span streaming is enabled', async () => {
       await createRunner()
         .expect({
@@ -1052,7 +1052,7 @@ describe('OpenAI integration', () => {
 
   createEsmAndCjsTests(
     __dirname,
-    'scenario-no-truncation.mjs',
+    'scenario-span-streaming.mjs',
     'instrument-streaming-with-truncation.mjs',
     (createRunner, test) => {
       test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -1062,14 +1062,23 @@ describe('OpenAI integration', () => {
               const spans = container.items;
 
               // With explicit enableTruncation: true, content should be truncated despite streaming.
-              // Find the chat span by matching the start of the truncated content (the 'A' repeated messages).
+              // Truncation keeps only the last message (50k 'A's) and crops it to the byte limit.
               const chatSpan = spans.find(s =>
                 s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
               );
               expect(chatSpan).toBeDefined();
               expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
                 streamingLongContent.length,
               );
+
+              // The responses API string input (50k 'B's) should also be truncated.
+              const responsesSpan = spans.find(s =>
+                s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('BBB'),
+              );
+              expect(responsesSpan).toBeDefined();
+              expect(responsesSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
+                streamingLongString.length,
+              );
             },
           })
           .start()
diff --git a/dev-packages/node-integration-tests/suites/tracing/vercelai/scenario-span-streaming.mjs b/dev-packages/node-integration-tests/suites/tracing/vercelai/scenario-span-streaming.mjs
@@ -4,6 +4,7 @@ import { MockLanguageModelV1 } from 'ai/test';
 
 async function run() {
   await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
+    // Single long message so truncation must crop it
     const longContent = 'A'.repeat(50_000);
     await generateText({
       experimental_telemetry: { isEnabled: true },
@@ -15,11 +16,7 @@ async function run() {
           text: 'Response',
         }),
       }),
-      messages: [
-        { role: 'user', content: longContent },
-        { role: 'assistant', content: 'Some reply' },
-        { role: 'user', content: 'Follow-up question' },
-      ],
+      messages: [{ role: 'user', content: longContent }],
     });
   });
 
diff --git a/dev-packages/node-integration-tests/suites/tracing/vercelai/test.ts b/dev-packages/node-integration-tests/suites/tracing/vercelai/test.ts
@@ -986,7 +986,7 @@ describe('Vercel AI integration', () => {
 
   const streamingLongContent = 'A'.repeat(50_000);
 
-  createEsmAndCjsTests(__dirname, 'scenario-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
+  createEsmAndCjsTests(__dirname, 'scenario-span-streaming.mjs', 'instrument-streaming.mjs', (createRunner, test) => {
     test('automatically disables truncation when span streaming is enabled', async () => {
       await createRunner()
         .expect({
@@ -1006,7 +1006,7 @@ describe('Vercel AI integration', () => {
 
   createEsmAndCjsTests(
     __dirname,
-    'scenario-streaming.mjs',
+    'scenario-span-streaming.mjs',
     'instrument-streaming-with-truncation.mjs',
     (createRunner, test) => {
       test('respects explicit enableTruncation: true even when span streaming is enabled', async () => {
@@ -1015,13 +1015,14 @@ describe('Vercel AI integration', () => {
             span: container => {
               const spans = container.items;
 
-              // With explicit enableTruncation: true, truncation keeps only the last message
-              // and drops the long content. The result should NOT contain the full 50k 'A' string.
+              // With explicit enableTruncation: true, content should be truncated despite streaming.
               const chatSpan = spans.find(s =>
-                s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.includes('Follow-up question'),
+                s.attributes?.[GEN_AI_INPUT_MESSAGES_ATTRIBUTE]?.value?.startsWith('[{"role":"user","content":"AAAA'),
               );
               expect(chatSpan).toBeDefined();
-              expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value).not.toContain(streamingLongContent);
+              expect(chatSpan!.attributes[GEN_AI_INPUT_MESSAGES_ATTRIBUTE].value.length).toBeLessThan(
+                streamingLongContent.length,
+              );
             },
           })
           .start()