diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index 54fc51f72..f3ed873e6 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -112,7 +112,7 @@ def parse_completion_response(response) Message.new( role: :assistant, - content: extract_text_parts(parts) || parse_content(data), + content: parse_content(data), thinking: Thinking.build( text: extract_thought_parts(parts), signature: extract_thought_signature(parts) @@ -140,21 +140,13 @@ def parse_content(data) candidate = data.dig('candidates', 0) return '' unless candidate - return '' if function_call?(candidate) - parts = candidate.dig('content', 'parts') return '' unless parts&.any? - non_thought_parts = parts.reject { |part| part['thought'] } - return '' unless non_thought_parts.any? - - build_response_content(non_thought_parts) - end + content_parts = parts.reject { |part| part['thought'] || part['functionCall'] } + return '' unless content_parts.any? - def extract_text_parts(parts) - text_parts = parts.reject { |p| p['thought'] } - content = text_parts.filter_map { |p| p['text'] }.join - content.empty? ? nil : content + build_response_content(content_parts) end def extract_thought_parts(parts) diff --git a/lib/ruby_llm/providers/gemini/media.rb b/lib/ruby_llm/providers/gemini/media.rb index 6a7dcbae3..b9353a201 100644 --- a/lib/ruby_llm/providers/gemini/media.rb +++ b/lib/ruby_llm/providers/gemini/media.rb @@ -71,7 +71,11 @@ def build_response_content(parts) # rubocop:disable Metrics/PerceivedComplexity text = nil if text.empty? return text if attachments.empty? - Content.new(text:, attachments:) + Content.new(text).tap do |content| + attachments.each do |attachment| + content.add_attachment(attachment.source, filename: attachment.filename) + end + end end def build_inline_attachment(inline_data, index) diff --git a/spec/ruby_llm/providers/gemini/chat_spec.rb b/spec/ruby_llm/providers/gemini/chat_spec.rb index ea4675262..52432ba47 100644 --- a/spec/ruby_llm/providers/gemini/chat_spec.rb +++ b/spec/ruby_llm/providers/gemini/chat_spec.rb @@ -600,6 +600,49 @@ expect(message.output_tokens).to eq(8) expect(message.cached_tokens).to eq(21) end + + it 'handles message where both text and attachments are present' do + raw_data = 'fake-image-bytes' + encoded_data = Base64.strict_encode64(raw_data) + response = Struct.new(:body, :env).new( + { + 'candidates' => [ + { + 'content' => { + 'parts' => [ + { + 'functionCall' => { + 'name' => 'lookup_weather', + 'args' => { 'city' => 'Paris' } + } + }, + { 'text' => 'Here is the result with an image.' }, + { + 'inlineData' => { + 'mimeType' => 'image/png', + 'data' => encoded_data + } + } + ] + } + } + ], + 'usageMetadata' => {} + }, + Struct.new(:url).new(Struct.new(:path).new('/v1/models/gemini-2.5-flash-image:generateContent')) + ) + + provider = RubyLLM::Providers::Gemini.new(RubyLLM.config) + message = provider.send(:parse_completion_response, response) + attachment = message.content.attachments.first + + expect(message.content).to be_a(RubyLLM::Content) + expect(message.content.text).to eq('Here is the result with an image.') + expect(message.content.attachments.size).to eq(1) + expect(attachment).to be_a(RubyLLM::Attachment) + expect(attachment.mime_type).to eq('image/png') + expect(attachment.content).to eq(raw_data) + end end it 'correctly sums candidatesTokenCount and thoughtsTokenCount' do