From 4109da9cee40613421040c56bc9daf9ae023753f Mon Sep 17 00:00:00 2001 From: Yizuki_Ame Date: Tue, 7 Apr 2026 15:47:41 +0800 Subject: [PATCH] Fail fast when LiteLLM file_uri MIME type cannot be determined Signed-off-by: Yizuki_Ame --- src/google/adk/models/lite_llm.py | 40 +++++---- tests/unittests/models/test_litellm.py | 117 ++++++++++++++++--------- 2 files changed, 100 insertions(+), 57 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 7d13696c96..56ef145a98 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -233,10 +233,6 @@ def _get_provider_from_model(model: str) -> str: return "" -# Default MIME type when none can be inferred -_DEFAULT_MIME_TYPE = "application/octet-stream" - - def _infer_mime_type_from_uri(uri: str) -> Optional[str]: """Attempts to infer MIME type from a URI's path extension. @@ -811,6 +807,7 @@ async def _content_to_message_param( follow_up = await _content_to_message_param( types.Content(role=content.role, parts=non_tool_parts), provider=provider, + model=model, ) follow_up_messages = ( follow_up if isinstance(follow_up, list) else [follow_up] @@ -1081,27 +1078,27 @@ async def _get_content( }) continue - # Determine MIME type: use explicit value, infer from URI, or use default. + # Determine MIME type: use explicit value, infer from URI, or fail fast + # only when a file block still needs to be constructed. mime_type = part.file_data.mime_type if not mime_type: mime_type = _infer_mime_type_from_uri(part.file_data.file_uri) if not mime_type and part.file_data.display_name: guessed_mime_type, _ = mimetypes.guess_type(part.file_data.display_name) mime_type = guessed_mime_type - if not mime_type: - # LiteLLM's Vertex AI backend requires format for GCS URIs. - mime_type = _DEFAULT_MIME_TYPE - logger.debug( - "Could not determine MIME type for file_uri %s, using default: %s", - part.file_data.file_uri, - mime_type, - ) - mime_type = _normalize_mime_type(mime_type) + + normalized_mime_type = ( + _normalize_mime_type(mime_type) if mime_type else None + ) if provider in _FILE_ID_REQUIRED_PROVIDERS and _is_http_url( part.file_data.file_uri ): - url_content_type = _media_url_content_type(mime_type) + url_content_type = ( + _media_url_content_type(normalized_mime_type) + if normalized_mime_type + else None + ) if url_content_type: content_objects.append({ "type": url_content_type, @@ -1125,10 +1122,21 @@ async def _get_content( }) continue + if not normalized_mime_type: + logger.warning( + "Could not determine MIME type for file_uri %s", + part.file_data.file_uri, + ) + raise ValueError( + "Cannot determine MIME type for file_uri " + f"'{part.file_data.file_uri}'. Please set `file_data.mime_type` " + "explicitly or use a file URI with a recognizable extension." + ) + file_object: ChatCompletionFileUrlObject = { "file_id": part.file_data.file_uri, } - file_object["format"] = mime_type + file_object["format"] = normalized_mime_type content_objects.append({ "type": "file", "file": file_object, diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index ace08ad997..9dd555369b 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1882,15 +1882,7 @@ async def test_content_to_message_param_user_message_file_uri_only( @pytest.mark.asyncio async def test_content_to_message_param_user_message_file_uri_without_mime_type(): - """Test handling of file_data without mime_type (GcsArtifactService scenario). - - When using GcsArtifactService, artifacts may have file_uri (gs://...) but - without mime_type set. LiteLLM's Vertex AI backend requires the format - field to be present, so we infer MIME type from the URI extension or use - a default fallback to ensure compatibility. - - See: https://github.com/google/adk-python/issues/3787 - """ + """Test file_data without mime_type fails fast when inference is impossible.""" file_part = types.Part( file_data=types.FileData( file_uri="gs://agent-artifact-bucket/app/user/session/artifact/0" @@ -1904,22 +1896,11 @@ async def test_content_to_message_param_user_message_file_uri_without_mime_type( ], ) - message = await _content_to_message_param(content) - assert message == { - "role": "user", - "content": [ - {"type": "text", "text": "Analyze this file."}, - { - "type": "file", - "file": { - "file_id": ( - "gs://agent-artifact-bucket/app/user/session/artifact/0" - ), - "format": "application/octet-stream", - }, - }, - ], - } + with pytest.raises( + ValueError, + match="Cannot determine MIME type for file_uri", + ): + await _content_to_message_param(content) @pytest.mark.asyncio @@ -2027,6 +2008,50 @@ async def test_content_to_message_param_function_response_with_extra_parts(): ] +@pytest.mark.asyncio +async def test_content_to_message_param_function_response_preserves_model_for_file_uri(): + tool_part = types.Part( + function_response=types.FunctionResponse( + name="load_document", + response={"status": "success"}, + ) + ) + tool_part.function_response.id = "tool_call_1" + + file_part = types.Part( + file_data=types.FileData( + file_uri="gs://bucket/path/to/document.pdf", + mime_type="application/pdf", + ) + ) + content = types.Content(role="user", parts=[tool_part, file_part]) + + messages = await _content_to_message_param( + content, + provider="vertex_ai", + model="vertex_ai/gemini-2.5-flash", + ) + + assert isinstance(messages, list) + assert messages == [ + { + "role": "tool", + "tool_call_id": "tool_call_1", + "content": '{"status": "success"}', + }, + { + "role": "user", + "content": [{ + "type": "file", + "file": { + "file_id": "gs://bucket/path/to/document.pdf", + "format": "application/pdf", + }, + }], + }, + ] + + @pytest.mark.asyncio async def test_content_to_message_param_function_response_preserves_string(): """Tests that string responses are used directly without double-serialization. @@ -2999,6 +3024,26 @@ async def test_get_content_file_uri_anthropic_openai_file_id_falls_back_to_text( ] +@pytest.mark.asyncio +@pytest.mark.parametrize( + "provider,model", + [ + ("openai", "openai/gpt-4o"), + ("azure", "azure/gpt-4"), + ], +) +async def test_get_content_file_uri_file_id_required_missing_mime_falls_back_to_text( + provider, model +): + parts = [ + types.Part(file_data=types.FileData(file_uri="gs://bucket/artifact/0")) + ] + content = await _get_content(parts, provider=provider, model=model) + assert content == [ + {"type": "text", "text": '[File reference: "gs://bucket/artifact/0"]'} + ] + + @pytest.mark.asyncio async def test_get_content_file_uri_vertex_ai_non_gemini_falls_back_to_text(): parts = [ @@ -3097,27 +3142,17 @@ async def test_get_content_file_uri_infers_from_display_name(): @pytest.mark.asyncio async def test_get_content_file_uri_default_mime_type(): - """Test that file_uri without extension uses default MIME type. - - When file_data has a file_uri without a recognizable extension and no explicit - mime_type, a default MIME type should be used to ensure compatibility with - LiteLLM backends. - - See: https://github.com/google/adk-python/issues/3787 - """ + """Test that file_uri without extension raises a helpful ValueError.""" # Use Part constructor directly to create file_data without mime_type # (types.Part.from_uri requires a valid mime_type when it can't infer) parts = [ types.Part(file_data=types.FileData(file_uri="gs://bucket/artifact/0")) ] - content = await _get_content(parts) - assert content[0] == { - "type": "file", - "file": { - "file_id": "gs://bucket/artifact/0", - "format": "application/octet-stream", - }, - } + with pytest.raises( + ValueError, + match="Cannot determine MIME type for file_uri", + ): + await _get_content(parts) @pytest.mark.asyncio