fix(core): Truncate content array format in Vercel (#19911)

nicohrubec · web-flow · commit 51e2cee5a1a2 · 2026-03-24T15:04:27.000+01:00
Add truncation for content array messages i.e. messages that have a `content` key, where `content` is an array of objects e.g. `{"type": "text", "text": "some string"}`. Previously these were returned as is bypassing the truncation logic. This PR makes sure these messages get truncated as well. We already handled parts array messages, which have essentially the same format but use a `parts` key. So I basically just generalized the `truncatePartsMessage` to also handle the content array format. Note: After switching to the Span V2 protocol we will get rid of truncation in the SDK altogether, but for now we should make sure to properly truncate all formats. Closes #19919 (added automatically)
diff --git a/packages/core/src/tracing/ai/messageTruncation.ts b/packages/core/src/tracing/ai/messageTruncation.ts
@@ -14,15 +14,20 @@ type ContentMessage = {
   content: string;
 };
 
+/**
+ * One block inside OpenAI / Anthropic `content: [...]` arrays (text, image_url, etc.).
+ */
+type ContentArrayBlock = {
+  [key: string]: unknown;
+  type: string;
+};
+
 /**
  * Message format used by OpenAI and Anthropic APIs for media.
  */
 type ContentArrayMessage = {
   [key: string]: unknown;
-  content: {
-    [key: string]: unknown;
-    type: string;
-  }[];
+  content: ContentArrayBlock[];
 };
 
 /**
@@ -47,6 +52,11 @@ type MediaPart = {
   content: string;
 };
 
+/**
+ * One element of an array-based message: OpenAI/Anthropic `content[]` or Google `parts`.
+ */
+type ArrayMessageItem = TextPart | MediaPart | ContentArrayBlock;
+
 /**
  * Calculate the UTF-8 byte length of a string.
  */
@@ -95,31 +105,33 @@ function truncateTextByBytes(text: string, maxBytes: number): string {
 }
 
 /**
- * Extract text content from a Google GenAI message part.
- * Parts are either plain strings or objects with a text property.
+ * Extract text content from a message item.
+ * Handles plain strings and objects with a text property.
  *
  * @returns The text content
  */
-function getPartText(part: TextPart | MediaPart): string {
-  if (typeof part === 'string') {
-    return part;
+function getItemText(item: ArrayMessageItem): string {
+  if (typeof item === 'string') {
+    return item;
+  }
+  if ('text' in item && typeof item.text === 'string') {
+    return item.text;
   }
-  if ('text' in part) return part.text;
   return '';
 }
 
 /**
- * Create a new part with updated text content while preserving the original structure.
+ * Create a new item with updated text content while preserving the original structure.
  *
- * @param part - Original part (string or object)
+ * @param item - Original item (string or object)
  * @param text - New text content
- * @returns New part with updated text
+ * @returns New item with updated text
  */
-function withPartText(part: TextPart | MediaPart, text: string): TextPart {
-  if (typeof part === 'string') {
+function withItemText(item: ArrayMessageItem, text: string): ArrayMessageItem {
+  if (typeof item === 'string') {
     return text;
   }
-  return { ...part, text };
+  return { ...item, text };
 }
 
 /**
@@ -176,56 +188,78 @@ function truncateContentMessage(message: ContentMessage, maxBytes: number): unkn
 }
 
 /**
- * Truncate a message with `parts: [...]` format (Google GenAI).
- * Keeps as many complete parts as possible, only truncating the first part if needed.
+ * Extracts the array items and their key from an array-based message.
+ * Returns `null` key if neither `parts` nor `content` is a valid array.
+ */
+function getArrayItems(message: PartsMessage | ContentArrayMessage): {
+  key: 'parts' | 'content' | null;
+  items: ArrayMessageItem[];
+} {
+  if ('parts' in message && Array.isArray(message.parts)) {
+    return { key: 'parts', items: message.parts };
+  }
+  if ('content' in message && Array.isArray(message.content)) {
+    return { key: 'content', items: message.content };
+  }
+  return { key: null, items: [] };
+}
+
+/**
+ * Truncate a message with an array-based format.
+ * Handles both `parts: [...]` (Google GenAI) and `content: [...]` (OpenAI/Anthropic multimodal).
+ * Keeps as many complete items as possible, only truncating the first item if needed.
  *
- * @param message - Message with parts array
+ * @param message - Message with parts or content array
  * @param maxBytes - Maximum byte limit
  * @returns Array with truncated message, or empty array if it doesn't fit
  */
-function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[] {
-  const { parts } = message;
+function truncateArrayMessage(message: PartsMessage | ContentArrayMessage, maxBytes: number): unknown[] {
+  const { key, items } = getArrayItems(message);
 
-  // Calculate overhead by creating empty text parts
-  const emptyParts = parts.map(part => withPartText(part, ''));
-  const overhead = jsonBytes({ ...message, parts: emptyParts });
+  if (key === null || items.length === 0) {
+    return [];
+  }
+
+  // Calculate overhead by creating empty text items
+  const emptyItems = items.map(item => withItemText(item, ''));
+  const overhead = jsonBytes({ ...message, [key]: emptyItems });
   let remainingBytes = maxBytes - overhead;
 
   if (remainingBytes <= 0) {
     return [];
   }
 
-  // Include parts until we run out of space
-  const includedParts: (TextPart | MediaPart)[] = [];
+  // Include items until we run out of space
+  const includedItems: ArrayMessageItem[] = [];
 
-  for (const part of parts) {
-    const text = getPartText(part);
+  for (const item of items) {
+    const text = getItemText(item);
     const textSize = utf8Bytes(text);
 
     if (textSize <= remainingBytes) {
-      // Part fits: include it as-is
-      includedParts.push(part);
+      // Item fits: include it as-is
+      includedItems.push(item);
       remainingBytes -= textSize;
-    } else if (includedParts.length === 0) {
-      // First part doesn't fit: truncate it
+    } else if (includedItems.length === 0) {
+      // First item doesn't fit: truncate it
       const truncated = truncateTextByBytes(text, remainingBytes);
       if (truncated) {
-        includedParts.push(withPartText(part, truncated));
+        includedItems.push(withItemText(item, truncated));
       }
       break;
     } else {
-      // Subsequent part doesn't fit: stop here
+      // Subsequent item doesn't fit: stop here
       break;
     }
   }
 
   /* c8 ignore start
    * for type safety only, algorithm guarantees SOME text included */
-  if (includedParts.length <= 0) {
+  if (includedItems.length <= 0) {
     return [];
   } else {
     /* c8 ignore stop */
-    return [{ ...message, parts: includedParts }];
+    return [{ ...message, [key]: includedItems }];
   }
 }
 
@@ -258,13 +292,8 @@ function truncateSingleMessage(message: unknown, maxBytes: number): unknown[] {
     return truncateContentMessage(message, maxBytes);
   }
 
-  if (isContentArrayMessage(message)) {
-    // Content array messages are returned as-is without truncation
-    return [message];
-  }
-
-  if (isPartsMessage(message)) {
-    return truncatePartsMessage(message, maxBytes);
+  if (isContentArrayMessage(message) || isPartsMessage(message)) {
+    return truncateArrayMessage(message, maxBytes);
   }
 
   // Unknown message format: cannot truncate safely
diff --git a/packages/core/test/lib/tracing/ai-message-truncation.test.ts b/packages/core/test/lib/tracing/ai-message-truncation.test.ts
@@ -547,5 +547,66 @@ describe('message truncation utilities', () => {
         },
       ]);
     });
+
+    it('truncates content array message when first text item does not fit', () => {
+      const messages = [
+        {
+          role: 'user',
+          content: [{ type: 'text', text: `2 ${humongous}` }],
+        },
+      ];
+      const result = truncateGenAiMessages(messages);
+      const truncLen =
+        20_000 -
+        2 -
+        JSON.stringify({
+          role: 'user',
+          content: [{ type: 'text', text: '' }],
+        }).length;
+      expect(result).toStrictEqual([
+        {
+          role: 'user',
+          content: [{ type: 'text', text: `2 ${humongous}`.substring(0, truncLen) }],
+        },
+      ]);
+    });
+
+    it('drops subsequent content array items that do not fit', () => {
+      const messages = [
+        {
+          role: 'assistant',
+          content: [
+            { type: 'text', text: `1 ${big}` },
+            { type: 'image_url', url: 'https://example.com/img.png' },
+            { type: 'text', text: `2 ${big}` },
+            { type: 'text', text: `3 ${big}` },
+            { type: 'text', text: `4 ${giant}` },
+            { type: 'text', text: `5 ${giant}` },
+          ],
+        },
+      ];
+      const result = truncateGenAiMessages(messages);
+      expect(result).toStrictEqual([
+        {
+          role: 'assistant',
+          content: [
+            { type: 'text', text: `1 ${big}` },
+            { type: 'image_url', url: 'https://example.com/img.png' },
+            { type: 'text', text: `2 ${big}` },
+            { type: 'text', text: `3 ${big}` },
+          ],
+        },
+      ]);
+    });
+
+    it('drops content array message if overhead is too large', () => {
+      const messages = [
+        {
+          some_other_field: humongous,
+          content: [{ type: 'text', text: 'hello' }],
+        },
+      ];
+      expect(truncateGenAiMessages(messages)).toStrictEqual([]);
+    });
   });
 });