Skip to content

Commit 1094945

Browse files
committed
Populate Gemini cached token usage
Map usageMetadata.cachedContentTokenCount to cached_tokens for Gemini chat responses and streaming chunks, and add provider specs covering both paths.
1 parent beec837 commit 1094945

4 files changed

Lines changed: 60 additions & 0 deletions

File tree

lib/ruby_llm/providers/gemini/chat.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def parse_completion_response(response)
120120
tool_calls: tool_calls,
121121
input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
122122
output_tokens: calculate_output_tokens(data),
123+
cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
123124
thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
124125
model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
125126
raw: response

lib/ruby_llm/providers/gemini/streaming.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def build_chunk(data)
2222
),
2323
input_tokens: extract_input_tokens(data),
2424
output_tokens: extract_output_tokens(data),
25+
cached_tokens: data.dig('usageMetadata', 'cachedContentTokenCount'),
2526
thinking_tokens: data.dig('usageMetadata', 'thoughtsTokenCount'),
2627
tool_calls: extract_tool_calls(data)
2728
)

spec/ruby_llm/providers/gemini/chat_spec.rb

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,33 @@
573573
expect(message.content).to eq('{"ok":true}')
574574
expect(message.thinking&.text).to eq('Reasoning trace')
575575
end
576+
577+
it 'captures cached token usage when present' do
578+
response = Struct.new(:body, :env).new(
579+
{
580+
'candidates' => [
581+
{
582+
'content' => {
583+
'parts' => [{ 'text' => 'Hi' }]
584+
}
585+
}
586+
],
587+
'usageMetadata' => {
588+
'promptTokenCount' => 42,
589+
'candidatesTokenCount' => 8,
590+
'cachedContentTokenCount' => 21
591+
}
592+
},
593+
Struct.new(:url).new(Struct.new(:path).new('/v1/models/gemini-2.5-flash:generateContent'))
594+
)
595+
596+
provider = RubyLLM::Providers::Gemini.new(RubyLLM.config)
597+
message = provider.send(:parse_completion_response, response)
598+
599+
expect(message.input_tokens).to eq(42)
600+
expect(message.output_tokens).to eq(8)
601+
expect(message.cached_tokens).to eq(21)
602+
end
576603
end
577604

578605
it 'correctly sums candidatesTokenCount and thoughtsTokenCount' do

spec/ruby_llm/providers/gemini/streaming_spec.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,37 @@
55
RSpec.describe RubyLLM::Providers::Gemini::Streaming do
66
include_context 'with configured RubyLLM'
77

8+
let(:test_obj) do
9+
Object.new.tap do |obj|
10+
obj.extend(RubyLLM::Providers::Gemini::Tools)
11+
obj.extend(described_class)
12+
end
13+
end
14+
15+
it 'captures cached token usage on chunks when present' do
16+
data = {
17+
'candidates' => [
18+
{
19+
'content' => {
20+
'parts' => [{ 'text' => 'hello' }]
21+
}
22+
}
23+
],
24+
'usageMetadata' => {
25+
'promptTokenCount' => 10,
26+
'candidatesTokenCount' => 4,
27+
'cachedContentTokenCount' => 6
28+
},
29+
'modelVersion' => 'gemini-2.5-flash'
30+
}
31+
32+
chunk = test_obj.send(:build_chunk, data)
33+
34+
expect(chunk.input_tokens).to eq(10)
35+
expect(chunk.output_tokens).to eq(4)
36+
expect(chunk.cached_tokens).to eq(6)
37+
end
38+
839
it 'correctly sums candidatesTokenCount and thoughtsTokenCount in streaming' do
940
chat = RubyLLM.chat(model: 'gemini-2.5-flash', provider: :gemini)
1041

0 commit comments

Comments
 (0)