Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 24 additions & 16 deletions src/google/adk/models/lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,6 @@ def _get_provider_from_model(model: str) -> str:
return ""


# Default MIME type when none can be inferred
_DEFAULT_MIME_TYPE = "application/octet-stream"


def _infer_mime_type_from_uri(uri: str) -> Optional[str]:
"""Attempts to infer MIME type from a URI's path extension.

Expand Down Expand Up @@ -811,6 +807,7 @@ async def _content_to_message_param(
follow_up = await _content_to_message_param(
types.Content(role=content.role, parts=non_tool_parts),
provider=provider,
model=model,
)
follow_up_messages = (
follow_up if isinstance(follow_up, list) else [follow_up]
Expand Down Expand Up @@ -1081,27 +1078,27 @@ async def _get_content(
})
continue

# Determine MIME type: use explicit value, infer from URI, or use default.
# Determine MIME type: use explicit value, infer from URI, or fail fast
# only when a file block still needs to be constructed.
mime_type = part.file_data.mime_type
if not mime_type:
mime_type = _infer_mime_type_from_uri(part.file_data.file_uri)
if not mime_type and part.file_data.display_name:
guessed_mime_type, _ = mimetypes.guess_type(part.file_data.display_name)
mime_type = guessed_mime_type
if not mime_type:
# LiteLLM's Vertex AI backend requires format for GCS URIs.
mime_type = _DEFAULT_MIME_TYPE
logger.debug(
"Could not determine MIME type for file_uri %s, using default: %s",
part.file_data.file_uri,
mime_type,
)
mime_type = _normalize_mime_type(mime_type)

normalized_mime_type = (
_normalize_mime_type(mime_type) if mime_type else None
)

if provider in _FILE_ID_REQUIRED_PROVIDERS and _is_http_url(
part.file_data.file_uri
):
url_content_type = _media_url_content_type(mime_type)
url_content_type = (
_media_url_content_type(normalized_mime_type)
if normalized_mime_type
else None
)
if url_content_type:
content_objects.append({
"type": url_content_type,
Expand All @@ -1125,10 +1122,21 @@ async def _get_content(
})
continue

if not normalized_mime_type:
logger.warning(
"Could not determine MIME type for file_uri %s",
part.file_data.file_uri,
)
raise ValueError(
"Cannot determine MIME type for file_uri "
f"'{part.file_data.file_uri}'. Please set `file_data.mime_type` "
"explicitly or use a file URI with a recognizable extension."
)

file_object: ChatCompletionFileUrlObject = {
"file_id": part.file_data.file_uri,
}
file_object["format"] = mime_type
file_object["format"] = normalized_mime_type
content_objects.append({
"type": "file",
"file": file_object,
Expand Down
117 changes: 76 additions & 41 deletions tests/unittests/models/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1882,15 +1882,7 @@ async def test_content_to_message_param_user_message_file_uri_only(

@pytest.mark.asyncio
async def test_content_to_message_param_user_message_file_uri_without_mime_type():
"""Test handling of file_data without mime_type (GcsArtifactService scenario).

When using GcsArtifactService, artifacts may have file_uri (gs://...) but
without mime_type set. LiteLLM's Vertex AI backend requires the format
field to be present, so we infer MIME type from the URI extension or use
a default fallback to ensure compatibility.

See: https://github.com/google/adk-python/issues/3787
"""
"""Test file_data without mime_type fails fast when inference is impossible."""
file_part = types.Part(
file_data=types.FileData(
file_uri="gs://agent-artifact-bucket/app/user/session/artifact/0"
Expand All @@ -1904,22 +1896,11 @@ async def test_content_to_message_param_user_message_file_uri_without_mime_type(
],
)

message = await _content_to_message_param(content)
assert message == {
"role": "user",
"content": [
{"type": "text", "text": "Analyze this file."},
{
"type": "file",
"file": {
"file_id": (
"gs://agent-artifact-bucket/app/user/session/artifact/0"
),
"format": "application/octet-stream",
},
},
],
}
with pytest.raises(
ValueError,
match="Cannot determine MIME type for file_uri",
):
await _content_to_message_param(content)


@pytest.mark.asyncio
Expand Down Expand Up @@ -2027,6 +2008,50 @@ async def test_content_to_message_param_function_response_with_extra_parts():
]


@pytest.mark.asyncio
async def test_content_to_message_param_function_response_preserves_model_for_file_uri():
tool_part = types.Part(
function_response=types.FunctionResponse(
name="load_document",
response={"status": "success"},
)
)
tool_part.function_response.id = "tool_call_1"

file_part = types.Part(
file_data=types.FileData(
file_uri="gs://bucket/path/to/document.pdf",
mime_type="application/pdf",
)
)
content = types.Content(role="user", parts=[tool_part, file_part])

messages = await _content_to_message_param(
content,
provider="vertex_ai",
model="vertex_ai/gemini-2.5-flash",
)

assert isinstance(messages, list)
assert messages == [
{
"role": "tool",
"tool_call_id": "tool_call_1",
"content": '{"status": "success"}',
},
{
"role": "user",
"content": [{
"type": "file",
"file": {
"file_id": "gs://bucket/path/to/document.pdf",
"format": "application/pdf",
},
}],
},
]


@pytest.mark.asyncio
async def test_content_to_message_param_function_response_preserves_string():
"""Tests that string responses are used directly without double-serialization.
Expand Down Expand Up @@ -2999,6 +3024,26 @@ async def test_get_content_file_uri_anthropic_openai_file_id_falls_back_to_text(
]


@pytest.mark.asyncio
@pytest.mark.parametrize(
"provider,model",
[
("openai", "openai/gpt-4o"),
("azure", "azure/gpt-4"),
],
)
async def test_get_content_file_uri_file_id_required_missing_mime_falls_back_to_text(
provider, model
):
parts = [
types.Part(file_data=types.FileData(file_uri="gs://bucket/artifact/0"))
]
content = await _get_content(parts, provider=provider, model=model)
assert content == [
{"type": "text", "text": '[File reference: "gs://bucket/artifact/0"]'}
]


@pytest.mark.asyncio
async def test_get_content_file_uri_vertex_ai_non_gemini_falls_back_to_text():
parts = [
Expand Down Expand Up @@ -3097,27 +3142,17 @@ async def test_get_content_file_uri_infers_from_display_name():

@pytest.mark.asyncio
async def test_get_content_file_uri_default_mime_type():
"""Test that file_uri without extension uses default MIME type.

When file_data has a file_uri without a recognizable extension and no explicit
mime_type, a default MIME type should be used to ensure compatibility with
LiteLLM backends.

See: https://github.com/google/adk-python/issues/3787
"""
"""Test that file_uri without extension raises a helpful ValueError."""
# Use Part constructor directly to create file_data without mime_type
# (types.Part.from_uri requires a valid mime_type when it can't infer)
parts = [
types.Part(file_data=types.FileData(file_uri="gs://bucket/artifact/0"))
]
content = await _get_content(parts)
assert content[0] == {
"type": "file",
"file": {
"file_id": "gs://bucket/artifact/0",
"format": "application/octet-stream",
},
}
with pytest.raises(
ValueError,
match="Cannot determine MIME type for file_uri",
):
await _get_content(parts)


@pytest.mark.asyncio
Expand Down
Loading