diff --git a/.github/screenshots/google-adk-trace-detail-messages.png b/.github/screenshots/google-adk-trace-detail-messages.png new file mode 100644 index 00000000000..ebc34fe64a6 Binary files /dev/null and b/.github/screenshots/google-adk-trace-detail-messages.png differ diff --git a/.github/screenshots/google-adk-trace-full-page.png b/.github/screenshots/google-adk-trace-full-page.png new file mode 100644 index 00000000000..05299c06484 Binary files /dev/null and b/.github/screenshots/google-adk-trace-full-page.png differ diff --git a/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/detector.ts b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/detector.ts new file mode 100644 index 00000000000..eb14a5ca153 --- /dev/null +++ b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/detector.ts @@ -0,0 +1,86 @@ +import { FormatDetector } from "../../types"; + +interface GooglePart { + text?: string; + inline_data?: { + data?: string; + mime_type?: string; + }; + function_call?: unknown; + function_response?: unknown; +} + +interface GoogleContent { + role?: string; + parts?: GooglePart[]; +} + +interface GoogleCandidate { + content?: GoogleContent; + finish_reason?: string; +} + +/** + * Checks if an object looks like a Google GenAI Content item + */ +const isGoogleContent = (item: unknown): item is GoogleContent => { + if (!item || typeof item !== "object") return false; + const c = item as Record; + + if (!Array.isArray(c.parts)) return false; + + const validRoles = ["user", "model", "function", "system"]; + if (c.role !== undefined && !validRoles.includes(c.role as string)) + return false; + + return true; +}; + +/** + * Detects Google GenAI input format: + * { contents: [{ role: "user", parts: [{ text: "..." } | { inline_data: {...} }] }] } + */ +const hasGoogleInputFormat = (data: unknown): boolean => { + if (!data || typeof data !== "object") return false; + const d = data as Record; + + if (!Array.isArray(d.contents)) return false; + if (d.contents.length === 0) return false; + + return d.contents.every(isGoogleContent); +}; + +/** + * Detects Google GenAI output format: + * { candidates: [{ content: { role: "model", parts: [...] } }] } + */ +const hasGoogleOutputFormat = (data: unknown): boolean => { + if (!data || typeof data !== "object") return false; + const d = data as Record; + + if (!Array.isArray(d.candidates)) return false; + if (d.candidates.length === 0) return false; + + return d.candidates.every((candidate: unknown) => { + if (!candidate || typeof candidate !== "object") return false; + const c = candidate as GoogleCandidate; + return c.content === undefined || isGoogleContent(c.content); + }); +}; + +/** + * Detects if the provided data is in Google GenAI (ADK) format. + */ +export const detectGoogleFormat: FormatDetector = (data, prettifyConfig) => { + if (!data) return false; + + const isInput = prettifyConfig?.fieldType === "input"; + const isOutput = prettifyConfig?.fieldType === "output"; + + if (!isInput && !isOutput) return false; + + if (isInput && hasGoogleInputFormat(data)) return true; + if (isOutput && hasGoogleOutputFormat(data)) return true; + + return false; +}; diff --git a/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/index.ts b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/index.ts new file mode 100644 index 00000000000..2fea3eedccc --- /dev/null +++ b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/index.ts @@ -0,0 +1,11 @@ +import { LLMMessageFormatImplementation } from "../../types"; +import { detectGoogleFormat } from "./detector"; +import { mapGoogleMessages } from "./mapper"; + +export const googleFormat: LLMMessageFormatImplementation = { + name: "google", + detector: detectGoogleFormat, + mapper: mapGoogleMessages, +}; + +export { detectGoogleFormat, mapGoogleMessages }; diff --git a/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/mapper.ts b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/mapper.ts new file mode 100644 index 00000000000..aec26b3efb7 --- /dev/null +++ b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/google/mapper.ts @@ -0,0 +1,210 @@ +import PrettyLLMMessage from "@/shared/PrettyLLMMessage"; +import { + FormatMapper, + LLMMessageDescriptor, + LLMBlockDescriptor, + LLMMapperResult, +} from "../../types"; +import { MessageRole } from "@/shared/PrettyLLMMessage/types"; +import { isPlaceholder } from "../../utils"; + +interface GoogleInlineData { + data?: string; + mime_type?: string; +} + +interface GooglePart { + text?: string; + inline_data?: GoogleInlineData; + function_call?: { + name?: string; + args?: Record; + }; + function_response?: { + name?: string; + response?: unknown; + }; +} + +interface GoogleContent { + role?: string; + parts?: GooglePart[]; +} + +interface GoogleCandidate { + content?: GoogleContent; + finish_reason?: string; +} + +interface GoogleInputData { + contents: GoogleContent[]; +} + +interface GoogleOutputData { + candidates: GoogleCandidate[]; + usage_metadata?: { + prompt_token_count?: number; + candidates_token_count?: number; + total_token_count?: number; + }; +} + +const generateMessageId = (index: number, prefix: string): string => + `${prefix}-${index}`; + +/** + * Normalizes Google role names to our internal MessageRole type. + */ +const normalizeRole = (role: string | undefined): MessageRole => { + if (role === "model") return "assistant"; + if (role === "function") return "tool"; + return (role as MessageRole) || "user"; +}; + +/** + * Maps an array of Google parts to block descriptors. + */ +const mapParts = ( + parts: GooglePart[], + role: MessageRole, +): LLMBlockDescriptor[] => { + const blocks: LLMBlockDescriptor[] = []; + const images: Array<{ url: string; name: string }> = []; + + parts.forEach((part, index) => { + if (part.text !== undefined) { + // Flush pending images before text + if (images.length > 0) { + blocks.push({ + blockType: "image", + component: PrettyLLMMessage.ImageBlock, + props: { images: [...images] }, + }); + images.length = 0; + } + blocks.push({ + blockType: "text", + component: PrettyLLMMessage.TextBlock, + props: { + children: part.text, + role, + showMoreButton: true, + }, + }); + } else if (part.inline_data) { + const data = part.inline_data.data; + if (data && data.length > 0) { + images.push({ + url: data, + name: isPlaceholder(data) ? data : `Image ${index + 1}`, + }); + } + } else if (part.function_call) { + const name = part.function_call.name || "function_call"; + const args = part.function_call.args + ? JSON.stringify(part.function_call.args, null, 2) + : ""; + blocks.push({ + blockType: "code", + component: PrettyLLMMessage.CodeBlock, + props: { code: args, label: name }, + }); + } else if (part.function_response) { + const name = part.function_response.name || "function_response"; + const response = part.function_response.response + ? JSON.stringify(part.function_response.response, null, 2) + : ""; + blocks.push({ + blockType: "code", + component: PrettyLLMMessage.CodeBlock, + props: { code: response, label: name }, + }); + } + }); + + // Flush any remaining images + if (images.length > 0) { + blocks.push({ + blockType: "image", + component: PrettyLLMMessage.ImageBlock, + props: { images }, + }); + } + + return blocks; +}; + +/** + * Maps a GoogleContent object to an LLMMessageDescriptor. + */ +const mapGoogleContent = ( + content: GoogleContent, + index: number, + prefix: string, +): LLMMessageDescriptor => { + const role = normalizeRole(content.role); + const blocks = content.parts ? mapParts(content.parts, role) : []; + + return { + id: generateMessageId(index, prefix), + role, + blocks, + }; +}; + +/** + * Maps Google GenAI input format to LLMMapperResult. + */ +const mapGoogleInput = (data: GoogleInputData): LLMMapperResult => { + const messages = data.contents.map((content, index) => + mapGoogleContent(content, index, "input"), + ); + return { messages }; +}; + +/** + * Maps Google GenAI output format to LLMMapperResult. + */ +const mapGoogleOutput = (data: GoogleOutputData): LLMMapperResult => { + const messages: LLMMessageDescriptor[] = []; + + data.candidates.forEach((candidate, index) => { + if (!candidate.content) return; + + const message = mapGoogleContent(candidate.content, index, "output"); + if (candidate.finish_reason) { + message.finishReason = candidate.finish_reason; + } + messages.push(message); + }); + + const usage = data.usage_metadata + ? { + prompt_tokens: data.usage_metadata.prompt_token_count, + completion_tokens: data.usage_metadata.candidates_token_count, + total_tokens: data.usage_metadata.total_token_count, + } + : undefined; + + return { messages, usage }; +}; + +/** + * Maps Google GenAI format data to normalized LLMMapperResult. + */ +export const mapGoogleMessages: FormatMapper = (data, prettifyConfig) => { + if (!data) return { messages: [] }; + + const isInput = prettifyConfig?.fieldType === "input"; + const isOutput = prettifyConfig?.fieldType === "output"; + + if (isInput && typeof data === "object" && "contents" in data) { + return mapGoogleInput(data as GoogleInputData); + } + + if (isOutput && typeof data === "object" && "candidates" in data) { + return mapGoogleOutput(data as GoogleOutputData); + } + + return { messages: [] }; +}; diff --git a/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/index.ts b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/index.ts index f80ceb3e260..9f10f23f003 100644 --- a/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/index.ts +++ b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/index.ts @@ -1,2 +1,3 @@ export { openaiFormat, detectOpenAIFormat, mapOpenAIMessages } from "./openai"; +export { googleFormat, detectGoogleFormat, mapGoogleMessages } from "./google"; export { getFormat, getAllFormats } from "./registry"; diff --git a/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/registry.ts b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/registry.ts index a47c293ff9b..7e75a9edb8e 100644 --- a/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/registry.ts +++ b/apps/opik-frontend/src/shared/PrettyLLMMessage/llmMessages/providers/registry.ts @@ -1,6 +1,7 @@ import { LLMMessageFormat, LLMMessageFormatImplementation } from "../types"; import { openaiFormat } from "./openai"; import { langchainFormat } from "./langchain"; +import { googleFormat } from "./google"; const FORMAT_REGISTRY: Record< LLMMessageFormat, @@ -9,7 +10,7 @@ const FORMAT_REGISTRY: Record< openai: openaiFormat, langchain: langchainFormat, anthropic: null, - google: null, + google: googleFormat, }; export const getFormat = ( diff --git a/apps/opik-frontend/src/v2/pages-shared/llm/PromptMessagesReadonly/PromptMessagesReadonly.tsx b/apps/opik-frontend/src/v2/pages-shared/llm/PromptMessagesReadonly/PromptMessagesReadonly.tsx index 944a56d2532..e1d0aa631b5 100644 --- a/apps/opik-frontend/src/v2/pages-shared/llm/PromptMessagesReadonly/PromptMessagesReadonly.tsx +++ b/apps/opik-frontend/src/v2/pages-shared/llm/PromptMessagesReadonly/PromptMessagesReadonly.tsx @@ -25,6 +25,7 @@ type MediaItem = { video_url?: { url: string }; audio_url?: { url: string }; input_audio?: { data: string }; + inline_data?: { data: string; mime_type?: string }; }; const getRoleLabel = (role: string): string => { @@ -76,6 +77,12 @@ const getTextAndMedia = ( if (item.type === "audio_url" || item.type === "input_audio") { const url = item.audio_url?.url || item.url; if (url) audios.push(url); + return; + } + + // Google GenAI inline_data (images via ADK) + if (item.inline_data?.data) { + images.push(item.inline_data.data); } }); diff --git a/sdks/python/src/opik/api_objects/attachment/attachments_extractor.py b/sdks/python/src/opik/api_objects/attachment/attachments_extractor.py index 6e590418574..7f4bac1cbf6 100644 --- a/sdks/python/src/opik/api_objects/attachment/attachments_extractor.py +++ b/sdks/python/src/opik/api_objects/attachment/attachments_extractor.py @@ -33,12 +33,13 @@ def __init__(self, min_attachment_size: int): self.decoder = decoder_base64.Base64AttachmentDecoder() # Pattern to match base64 strings (can be embedded in text) + # Matches both standard base64 (+/) and URL-safe base64 (-_) # Requires at least min_attachment_size characters to reduce false positives min_base64_groups = int(min_attachment_size / 4) BASE64_PATTERN = ( - r"(?:[A-Za-z0-9+/]{4}){" + r"(?:[A-Za-z0-9+/\-_]{4}){" + str(min_base64_groups) - + ",}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?" + + r",}(?:[A-Za-z0-9+/\-_]{2}==|[A-Za-z0-9+/\-_]{3}=)?" ) self.pattern = re.compile(BASE64_PATTERN) diff --git a/sdks/python/src/opik/api_objects/attachment/decoder_base64.py b/sdks/python/src/opik/api_objects/attachment/decoder_base64.py index 21cb39a162c..740fdddca0b 100644 --- a/sdks/python/src/opik/api_objects/attachment/decoder_base64.py +++ b/sdks/python/src/opik/api_objects/attachment/decoder_base64.py @@ -35,8 +35,12 @@ def decode( return None try: - # Decode base64 string to bytes - decoded_bytes = base64.b64decode(raw_data, validate=True) + # Decode base64 string to bytes. + # Try standard base64 first; fall back to URL-safe base64 (- and _ chars). + try: + decoded_bytes = base64.b64decode(raw_data, validate=True) + except (ValueError, binascii.Error): + decoded_bytes = base64.urlsafe_b64decode(raw_data + "==") # Detect MIME type from content mime_type = decoder_helpers.detect_mime_type(decoded_bytes)