|
31 | 31 | event_from_exception, |
32 | 32 | safe_serialize, |
33 | 33 | ) |
34 | | -from google.genai.types import GenerateContentConfig, Part, Content |
| 34 | +from google.genai.types import GenerateContentConfig, Part, Content, PartDict |
35 | 35 | from itertools import chain |
36 | 36 |
|
37 | 37 | if TYPE_CHECKING: |
|
47 | 47 | ContentUnion, |
48 | 48 | ) |
49 | 49 |
|
| 50 | +_is_PIL_available = False |
| 51 | +try: |
| 52 | + from PIL import Image as PILImage # type: ignore[import-not-found] |
| 53 | + |
| 54 | + _is_PIL_available = True |
| 55 | +except ImportError: |
| 56 | + pass |
| 57 | + |
| 58 | +# Keys to use when checking to see if a dict provided by the user |
| 59 | +# is Part-like (as opposed to a Content or multi-turn conversation entry). |
| 60 | +_PART_DICT_KEYS = PartDict.__optional_keys__ |
| 61 | + |
50 | 62 |
|
51 | 63 | class UsageData(TypedDict): |
52 | 64 | """Structure for token usage data.""" |
@@ -169,12 +181,23 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A |
169 | 181 | if isinstance(contents, str): |
170 | 182 | return [{"role": "user", "content": contents}] |
171 | 183 |
|
172 | | - # Handle list case - process each item (non-recursive, flatten at top level) |
| 184 | + # Handle list case |
173 | 185 | if isinstance(contents, list): |
174 | | - for item in contents: |
175 | | - item_messages = extract_contents_messages(item) |
176 | | - messages.extend(item_messages) |
177 | | - return messages |
| 186 | + if contents and all(_is_part_like(item) for item in contents): |
| 187 | + # All items are parts — merge into a single multi-part user message |
| 188 | + content_parts = [] |
| 189 | + for item in contents: |
| 190 | + part = _extract_part_from_item(item) |
| 191 | + if part is not None: |
| 192 | + content_parts.append(part) |
| 193 | + |
| 194 | + return [{"role": "user", "content": content_parts}] |
| 195 | + else: |
| 196 | + # Multi-turn conversation or mixed content types |
| 197 | + for item in contents: |
| 198 | + item_messages = extract_contents_messages(item) |
| 199 | + messages.extend(item_messages) |
| 200 | + return messages |
178 | 201 |
|
179 | 202 | # Handle dictionary case (ContentDict) |
180 | 203 | if isinstance(contents, dict): |
@@ -206,13 +229,23 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A |
206 | 229 | # Add tool messages |
207 | 230 | messages.extend(tool_messages) |
208 | 231 | elif "text" in contents: |
209 | | - # Simple text in dict |
210 | 232 | messages.append( |
211 | 233 | { |
212 | | - "role": role or "user", |
| 234 | + "role": role, |
213 | 235 | "content": [{"text": contents["text"], "type": "text"}], |
214 | 236 | } |
215 | 237 | ) |
| 238 | + elif "inline_data" in contents: |
| 239 | + # The "data" will always be bytes (or bytes within a string), |
| 240 | + # so if this is present, it's safe to automatically substitute with the placeholder |
| 241 | + messages.append( |
| 242 | + { |
| 243 | + "inline_data": { |
| 244 | + "mime_type": contents["inline_data"].get("mime_type", ""), |
| 245 | + "data": BLOB_DATA_SUBSTITUTE, |
| 246 | + } |
| 247 | + } |
| 248 | + ) |
216 | 249 |
|
217 | 250 | return messages |
218 | 251 |
|
@@ -248,15 +281,10 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A |
248 | 281 | return [{"role": "user", "content": [part_result]}] |
249 | 282 |
|
250 | 283 | # Handle PIL.Image.Image |
251 | | - try: |
252 | | - from PIL import Image as PILImage # type: ignore[import-not-found] |
253 | | - |
254 | | - if isinstance(contents, PILImage.Image): |
255 | | - blob_part = _extract_pil_image(contents) |
256 | | - if blob_part: |
257 | | - return [{"role": "user", "content": [blob_part]}] |
258 | | - except ImportError: |
259 | | - pass |
| 284 | + if _is_PIL_available and isinstance(contents, PILImage.Image): |
| 285 | + blob_part = _extract_pil_image(contents) |
| 286 | + if blob_part: |
| 287 | + return [{"role": "user", "content": [blob_part]}] |
260 | 288 |
|
261 | 289 | # Handle File object |
262 | 290 | if hasattr(contents, "uri") and hasattr(contents, "mime_type"): |
@@ -310,11 +338,9 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": |
310 | 338 | if result is not None: |
311 | 339 | # For inline_data with bytes data, substitute the content |
312 | 340 | if "inline_data" in part: |
313 | | - inline_data = part["inline_data"] |
314 | | - if isinstance(inline_data, dict) and isinstance( |
315 | | - inline_data.get("data"), bytes |
316 | | - ): |
317 | | - result["content"] = BLOB_DATA_SUBSTITUTE |
| 341 | + # inline_data.data will always be bytes, or a string containing base64-encoded bytes, |
| 342 | + # so can automatically substitute without further checks |
| 343 | + result["content"] = BLOB_DATA_SUBSTITUTE |
318 | 344 | return result |
319 | 345 |
|
320 | 346 | return None |
@@ -357,18 +383,11 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": |
357 | 383 | if mime_type is None: |
358 | 384 | mime_type = "" |
359 | 385 |
|
360 | | - # Handle both bytes (binary data) and str (base64-encoded data) |
361 | | - if isinstance(data, bytes): |
362 | | - content = BLOB_DATA_SUBSTITUTE |
363 | | - else: |
364 | | - # For non-bytes data (e.g., base64 strings), use as-is |
365 | | - content = data |
366 | | - |
367 | 386 | return { |
368 | 387 | "type": "blob", |
369 | 388 | "modality": get_modality_from_mime_type(mime_type), |
370 | 389 | "mime_type": mime_type, |
371 | | - "content": content, |
| 390 | + "content": BLOB_DATA_SUBSTITUTE, |
372 | 391 | } |
373 | 392 |
|
374 | 393 | return None |
@@ -429,25 +448,78 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": |
429 | 448 |
|
430 | 449 | def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": |
431 | 450 | """Extract blob part from PIL.Image.Image.""" |
432 | | - try: |
433 | | - from PIL import Image as PILImage |
| 451 | + if not _is_PIL_available or not isinstance(image, PILImage.Image): |
| 452 | + return None |
434 | 453 |
|
435 | | - if not isinstance(image, PILImage.Image): |
436 | | - return None |
| 454 | + # Get format, default to JPEG |
| 455 | + format_str = image.format or "JPEG" |
| 456 | + suffix = format_str.lower() |
| 457 | + mime_type = f"image/{suffix}" |
| 458 | + |
| 459 | + return { |
| 460 | + "type": "blob", |
| 461 | + "modality": get_modality_from_mime_type(mime_type), |
| 462 | + "mime_type": mime_type, |
| 463 | + "content": BLOB_DATA_SUBSTITUTE, |
| 464 | + } |
437 | 465 |
|
438 | | - # Get format, default to JPEG |
439 | | - format_str = image.format or "JPEG" |
440 | | - suffix = format_str.lower() |
441 | | - mime_type = f"image/{suffix}" |
442 | 466 |
|
| 467 | +def _is_part_like(item: "Any") -> bool: |
| 468 | + """Check if item is a part-like value (PartUnionDict) rather than a Content/multi-turn entry.""" |
| 469 | + if isinstance(item, (str, Part)): |
| 470 | + return True |
| 471 | + if isinstance(item, (list, Content)): |
| 472 | + return False |
| 473 | + if isinstance(item, dict): |
| 474 | + if "role" in item or "parts" in item: |
| 475 | + return False |
| 476 | + # Part objects that came in as plain dicts |
| 477 | + return bool(_PART_DICT_KEYS & item.keys()) |
| 478 | + # File objects |
| 479 | + if hasattr(item, "uri"): |
| 480 | + return True |
| 481 | + # PIL.Image |
| 482 | + if _is_PIL_available and isinstance(item, PILImage.Image): |
| 483 | + return True |
| 484 | + return False |
| 485 | + |
| 486 | + |
| 487 | +def _extract_part_from_item(item: "Any") -> "Optional[dict[str, Any]]": |
| 488 | + """Convert a single part-like item to a content part dict.""" |
| 489 | + if isinstance(item, str): |
| 490 | + return {"text": item, "type": "text"} |
| 491 | + |
| 492 | + # Handle bare inline_data dicts directly to preserve the raw format |
| 493 | + if isinstance(item, dict) and "inline_data" in item: |
443 | 494 | return { |
444 | | - "type": "blob", |
445 | | - "modality": get_modality_from_mime_type(mime_type), |
446 | | - "mime_type": mime_type, |
447 | | - "content": BLOB_DATA_SUBSTITUTE, |
| 495 | + "inline_data": { |
| 496 | + "mime_type": item["inline_data"].get("mime_type", ""), |
| 497 | + "data": BLOB_DATA_SUBSTITUTE, |
| 498 | + } |
448 | 499 | } |
449 | | - except Exception: |
450 | | - return None |
| 500 | + |
| 501 | + # For other dicts and Part objects, use existing _extract_part_content |
| 502 | + result = _extract_part_content(item) |
| 503 | + if result is not None: |
| 504 | + return result |
| 505 | + |
| 506 | + # PIL.Image |
| 507 | + if _is_PIL_available and isinstance(item, PILImage.Image): |
| 508 | + return _extract_pil_image(item) |
| 509 | + |
| 510 | + # File objects |
| 511 | + if hasattr(item, "uri") and hasattr(item, "mime_type"): |
| 512 | + file_uri = getattr(item, "uri", None) |
| 513 | + mime_type = getattr(item, "mime_type", None) or "" |
| 514 | + if file_uri is not None: |
| 515 | + return { |
| 516 | + "type": "uri", |
| 517 | + "modality": get_modality_from_mime_type(mime_type), |
| 518 | + "mime_type": mime_type, |
| 519 | + "uri": file_uri, |
| 520 | + } |
| 521 | + |
| 522 | + return None |
451 | 523 |
|
452 | 524 |
|
453 | 525 | def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]": |
|
0 commit comments