Populate Gemini cached token usage

crmne · crmne · commit 10949459d124 · 2026-03-04T12:25:03.000+01:00
Map usageMetadata.cachedContentTokenCount to cached_tokens for Gemini chat responses and streaming chunks, and add provider specs covering both paths.
diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
@@ -120,6 +120,7 @@ def parse_completion_response(response)
             tool_calls: tool_calls,
             input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
             output_tokens: calculate_output_tokens(data),
+            cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
             thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
             raw: response
diff --git a/lib/ruby_llm/providers/gemini/streaming.rb b/lib/ruby_llm/providers/gemini/streaming.rb
@@ -22,6 +22,7 @@ def build_chunk(data)
             ),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
+            cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
             thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
             tool_calls: extract_tool_calls(data)
           )
diff --git a/spec/ruby_llm/providers/gemini/chat_spec.rb b/spec/ruby_llm/providers/gemini/chat_spec.rb
@@ -573,6 +573,33 @@
       expect(message.content).to eq('{"ok":true}')
       expect(message.thinking&.text).to eq('Reasoning trace')
     end
+
+    it 'captures cached token usage when present' do
+      response = Struct.new(:body, :env).new(
+        {
+          'candidates' => [
+            {
+              'content' => {
+                'parts' => [{ 'text' => 'Hi' }]
+              }
+            }
+          ],
+          'usageMetadata' => {
+            'promptTokenCount' => 42,
+            'candidatesTokenCount' => 8,
+            'cachedContentTokenCount' => 21
+          }
+        },
+        Struct.new(:url).new(Struct.new(:path).new('/v1/models/gemini-2.5-flash:generateContent'))
+      )
+
+      provider = RubyLLM::Providers::Gemini.new(RubyLLM.config)
+      message = provider.send(:parse_completion_response, response)
+
+      expect(message.input_tokens).to eq(42)
+      expect(message.output_tokens).to eq(8)
+      expect(message.cached_tokens).to eq(21)
+    end
   end
 
   it 'correctly sums candidatesTokenCount and thoughtsTokenCount' do
diff --git a/spec/ruby_llm/providers/gemini/streaming_spec.rb b/spec/ruby_llm/providers/gemini/streaming_spec.rb
@@ -5,6 +5,37 @@
 RSpec.describe RubyLLM::Providers::Gemini::Streaming do
   include_context 'with configured RubyLLM'
 
+  let(:test_obj) do
+    Object.new.tap do |obj|
+      obj.extend(RubyLLM::Providers::Gemini::Tools)
+      obj.extend(described_class)
+    end
+  end
+
+  it 'captures cached token usage on chunks when present' do
+    data = {
+      'candidates' => [
+        {
+          'content' => {
+            'parts' => [{ 'text' => 'hello' }]
+          }
+        }
+      ],
+      'usageMetadata' => {
+        'promptTokenCount' => 10,
+        'candidatesTokenCount' => 4,
+        'cachedContentTokenCount' => 6
+      },
+      'modelVersion' => 'gemini-2.5-flash'
+    }
+
+    chunk = test_obj.send(:build_chunk, data)
+
+    expect(chunk.input_tokens).to eq(10)
+    expect(chunk.output_tokens).to eq(4)
+    expect(chunk.cached_tokens).to eq(6)
+  end
+
   it 'correctly sums candidatesTokenCount and thoughtsTokenCount in streaming' do
     chat = RubyLLM.chat(model: 'gemini-2.5-flash', provider: :gemini)
 

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ def build_chunk(data)`
`22`	`22`	`),`
`23`	`23`	`input_tokens: extract_input_tokens(data),`
`24`	`24`	`output_tokens: extract_output_tokens(data),`
	`25`	`+ cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),`
`25`	`26`	`thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),`
`26`	`27`	`tool_calls: extract_tool_calls(data)`
`27`	`28`	`)`