@@ -1716,7 +1716,7 @@ async def test_generate_content_async_with_usage_metadata(
17161716
17171717
17181718@pytest .mark .asyncio
1719- async def test_generate_content_async_ollama_chat_flattens_content (
1719+ async def test_generate_content_async_ollama_chat_preserves_multimodal_content (
17201720 mock_acompletion , mock_completion
17211721):
17221722 llm_client = MockLLMClient (mock_acompletion , mock_completion )
@@ -1748,12 +1748,26 @@ async def test_generate_content_async_ollama_chat_flattens_content(
17481748 )
17491749 _ , kwargs = mock_acompletion .call_args
17501750 message_content = kwargs ["messages" ][0 ]["content" ]
1751- assert isinstance (message_content , str )
1752- assert "Describe this image." in message_content
1751+ # Multimodal content (text + image) should be kept as a list so LiteLLM
1752+ # can convert it to Ollama's native images field.
1753+ assert isinstance (message_content , list )
1754+ text_blocks = [
1755+ b
1756+ for b in message_content
1757+ if isinstance (b , dict ) and b .get ("type" ) == "text"
1758+ ]
1759+ image_blocks = [
1760+ b
1761+ for b in message_content
1762+ if isinstance (b , dict ) and b .get ("type" ) == "image_url"
1763+ ]
1764+ assert len (text_blocks ) >= 1
1765+ assert "Describe this image." in text_blocks [0 ].get ("text" , "" )
1766+ assert len (image_blocks ) >= 1
17531767
17541768
17551769@pytest .mark .asyncio
1756- async def test_generate_content_async_custom_provider_flattens_content (
1770+ async def test_generate_content_async_custom_provider_preserves_multimodal (
17571771 mock_acompletion , mock_completion
17581772):
17591773 llm_client = MockLLMClient (mock_acompletion , mock_completion )
@@ -1784,8 +1798,14 @@ async def test_generate_content_async_custom_provider_flattens_content(
17841798 assert kwargs ["custom_llm_provider" ] == "ollama_chat"
17851799 assert kwargs ["model" ] == "qwen2.5:7b"
17861800 message_content = kwargs ["messages" ][0 ]["content" ]
1787- assert isinstance (message_content , str )
1788- assert "Describe this image." in message_content
1801+ # Multimodal content should be preserved as a list.
1802+ assert isinstance (message_content , list )
1803+ text_blocks = [
1804+ b
1805+ for b in message_content
1806+ if isinstance (b , dict ) and b .get ("type" ) == "text"
1807+ ]
1808+ assert any ("Describe this image." in b .get ("text" , "" ) for b in text_blocks )
17891809
17901810
17911811def test_flatten_ollama_content_accepts_tuple_blocks ():
@@ -1811,16 +1831,6 @@ def test_flatten_ollama_content_accepts_tuple_blocks():
18111831 ],
18121832 "first\n second" ,
18131833 ),
1814- (
1815- [
1816- {"type" : "text" , "text" : "Describe this image." },
1817- {
1818- "type" : "image_url" ,
1819- "image_url" : {"url" : "http://example.com" },
1820- },
1821- ],
1822- "Describe this image." ,
1823- ),
18241834 ],
18251835)
18261836def test_flatten_ollama_content_returns_str_or_none (content , expected ):
@@ -1831,15 +1841,58 @@ def test_flatten_ollama_content_returns_str_or_none(content, expected):
18311841 assert flattened is None or isinstance (flattened , str )
18321842
18331843
1834- def test_flatten_ollama_content_serializes_non_text_blocks_to_json ():
1844+ def test_flatten_ollama_content_preserves_image_url_blocks ():
1845+ """Media blocks should be kept as a list so LiteLLM can convert them."""
18351846 from google .adk .models .lite_llm import _flatten_ollama_content
18361847
18371848 blocks = [
1838- {"type" : "image_url" , "image_url" : {"url" : "http://example.com" }},
1849+ {"type" : "image_url" , "image_url" : {"url" : "http://example.com/img.png " }},
18391850 ]
1840- flattened = _flatten_ollama_content (blocks )
1841- assert isinstance (flattened , str )
1842- assert json .loads (flattened ) == blocks
1851+ result = _flatten_ollama_content (blocks )
1852+ assert isinstance (result , list )
1853+ assert result == blocks
1854+
1855+
1856+ def test_flatten_ollama_content_preserves_mixed_text_and_image ():
1857+ """Text + image_url should return the full list, not just the text."""
1858+ from google .adk .models .lite_llm import _flatten_ollama_content
1859+
1860+ blocks = [
1861+ {"type" : "text" , "text" : "Describe this image." },
1862+ {
1863+ "type" : "image_url" ,
1864+ "image_url" : {"url" : "data:image/png;base64,iVBORw0KGgo=" },
1865+ },
1866+ ]
1867+ result = _flatten_ollama_content (blocks )
1868+ assert isinstance (result , list )
1869+ assert len (result ) == 2
1870+ assert result [0 ]["type" ] == "text"
1871+ assert result [1 ]["type" ] == "image_url"
1872+
1873+
1874+ def test_flatten_ollama_content_preserves_video_url_blocks ():
1875+ from google .adk .models .lite_llm import _flatten_ollama_content
1876+
1877+ blocks = [
1878+ {"type" : "text" , "text" : "What happens in this clip?" },
1879+ {"type" : "video_url" , "video_url" : {"url" : "http://example.com/v.mp4" }},
1880+ ]
1881+ result = _flatten_ollama_content (blocks )
1882+ assert isinstance (result , list )
1883+ assert len (result ) == 2
1884+
1885+
1886+ def test_flatten_ollama_content_serializes_non_media_non_text_blocks_to_json ():
1887+ """Blocks with unknown types and no media should still serialize to JSON."""
1888+ from google .adk .models .lite_llm import _flatten_ollama_content
1889+
1890+ blocks = [
1891+ {"type" : "custom_block" , "data" : "something" },
1892+ ]
1893+ result = _flatten_ollama_content (blocks )
1894+ assert isinstance (result , str )
1895+ assert json .loads (result ) == blocks
18431896
18441897
18451898def test_flatten_ollama_content_serializes_dict_to_json ():
0 commit comments