Skip to content

Commit 4de62cb

Browse files
authored
Merge pull request #1024 from iceljc/features/refactor-llm-cost
Features/refactor llm cost
2 parents dbf2aab + 3ab3edd commit 4de62cb

19 files changed

Lines changed: 244 additions & 108 deletions

File tree

src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@ public class TokenStatsModel
55
public string Provider { get; set; }
66
public string Model { get; set; }
77
public string Prompt { get; set; }
8-
public int PromptCount { get; set; }
9-
public int CachedPromptCount { get; set; }
10-
public int CompletionCount { get; set; }
8+
public int TextInputTokens { get; set; }
9+
public int CachedTextInputTokens { get; set; }
10+
public int AudioInputTokens { get; set; }
11+
public int CachedAudioInputTokens { get; set; }
12+
public int TextOutputTokens { get; set; }
13+
public int AudioOutputTokens { get; set; }
1114
public AgentLlmConfig LlmConfig { get; set; }
15+
16+
public int TotalInputTokens => TextInputTokens + CachedTextInputTokens + AudioInputTokens + CachedAudioInputTokens;
17+
public int TotalOutputTokens => TextOutputTokens + AudioOutputTokens;
1218
}

src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,35 +42,33 @@ public class LlmModelSetting
4242
/// </summary>
4343
public bool ImageGeneration { get; set; }
4444

45-
/// <summary>
46-
/// Prompt cost per 1K token
47-
/// </summary>
48-
public float PromptCost { get; set; }
49-
50-
/// <summary>
51-
/// Completion cost per 1K token
52-
/// </summary>
53-
public float CompletionCost { get; set; }
54-
5545
/// <summary>
5646
/// Embedding dimension
5747
/// </summary>
5848
public int Dimension { get; set; }
5949

60-
public LlmCost AdditionalCost { get; set; } = new();
50+
public LlmCost Cost { get; set; } = new();
6151

6252
public override string ToString()
6353
{
6454
return $"[{Type}] {Name} {Endpoint}";
6555
}
6656
}
6757

58+
/// <summary>
59+
/// Cost per 1K tokens
60+
/// </summary>
6861
public class LlmCost
6962
{
70-
public float CachedPromptCost { get; set; } = 0f;
71-
public float AudioPromptCost { get; set; } = 0f;
72-
public float ReasoningCompletionCost { get; } = 0f;
73-
public float AudioCompletionCost { get; } = 0f;
63+
// Input
64+
public float TextInputCost { get; set; } = 0f;
65+
public float CachedTextInputCost { get; set; } = 0f;
66+
public float AudioInputCost { get; set; } = 0f;
67+
public float CachedAudioInputCost { get; set; } = 0f;
68+
69+
// Output
70+
public float TextOutputCost { get; set; } = 0f;
71+
public float AudioOutputCost { get; set; } = 0f;
7472
}
7573

7674
public enum LlmModelType

src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,28 +35,33 @@ public TokenStatistics(IServiceProvider services, ILogger<TokenStatistics> logge
3535
public void AddToken(TokenStatsModel stats, RoleDialogModel message)
3636
{
3737
_model = stats.Model;
38-
_promptTokenCount += stats.PromptCount;
39-
_completionTokenCount += stats.CompletionCount;
38+
_promptTokenCount += stats.TotalInputTokens;
39+
_completionTokenCount += stats.TotalOutputTokens;
4040

4141
var settingsService = _services.GetRequiredService<ILlmProviderService>();
4242
var settings = settingsService.GetSetting(stats.Provider, _model);
4343

44-
var deltaPromptCost = (stats.PromptCount - stats.CachedPromptCount) / 1000f * settings.PromptCost;
45-
var deltaCachedPromptCost = stats.CachedPromptCount / 1000f * (settings.AdditionalCost?.CachedPromptCost ?? 0f);
46-
var deltaCompletionCost = stats.CompletionCount / 1000f * settings.CompletionCost;
44+
var deltaTextInputCost = stats.TextInputTokens / 1000f * (settings.Cost?.TextInputCost ?? 0f);
45+
var deltaCachedTextInputCost = stats.CachedTextInputTokens / 1000f * (settings.Cost?.CachedTextInputCost ?? 0f);
46+
var deltaAudioInputCost = stats.AudioInputTokens / 1000f * (settings.Cost?.AudioInputCost ?? 0f);
47+
var deltaCachedAudioInputCost = stats.CachedAudioInputTokens / 1000f * (settings.Cost?.CachedAudioInputCost ?? 0f);
4748

48-
var deltaTotal = deltaPromptCost + deltaCachedPromptCost + deltaCompletionCost;
49+
var deltaTextOutputCost = stats.TextOutputTokens / 1000f * (settings.Cost?.TextOutputCost ?? 0f);
50+
var deltaAudioOutputCost = stats.AudioOutputTokens / 1000f * (settings.Cost?.AudioOutputCost ?? 0f);
51+
52+
var deltaPromptCost = deltaTextInputCost + deltaCachedTextInputCost + deltaAudioInputCost + deltaCachedAudioInputCost;
53+
var deltaCompletionCost = deltaTextOutputCost + deltaAudioOutputCost;
54+
55+
var deltaTotal = deltaPromptCost + deltaCompletionCost;
4956
_promptCost += deltaPromptCost;
5057
_completionCost += deltaCompletionCost;
5158

5259
// Accumulated Token
5360
var stat = _services.GetRequiredService<IConversationStateService>();
5461
var inputCount = int.Parse(stat.GetState("prompt_total", "0"));
55-
stat.SetState("prompt_total", stats.PromptCount + inputCount, isNeedVersion: false, source: StateSource.Application);
62+
stat.SetState("prompt_total", stats.TotalInputTokens + inputCount, isNeedVersion: false, source: StateSource.Application);
5663
var outputCount = int.Parse(stat.GetState("completion_total", "0"));
57-
stat.SetState("completion_total", stats.CompletionCount + outputCount, isNeedVersion: false, source: StateSource.Application);
58-
var cachedCount = int.Parse(stat.GetState("cached_prompt_total", "0"));
59-
stat.SetState("cached_prompt_total", stats.CachedPromptCount + cachedCount, isNeedVersion: false, source: StateSource.Application);
64+
stat.SetState("completion_total", stats.TotalOutputTokens + outputCount, isNeedVersion: false, source: StateSource.Application);
6065

6166
// Total cost
6267
var total_cost = float.Parse(stat.GetState("llm_total_cost", "0"));
@@ -76,8 +81,8 @@ public void AddToken(TokenStatsModel stats, RoleDialogModel message)
7681
RecordTime = DateTime.UtcNow,
7782
IntervalType = StatsInterval.Day,
7883
Data = [
79-
new StatsKeyValuePair("prompt_token_count_total", stats.PromptCount),
80-
new StatsKeyValuePair("completion_token_count_total", stats.CompletionCount),
84+
new StatsKeyValuePair("prompt_token_count_total", stats.TotalInputTokens),
85+
new StatsKeyValuePair("completion_token_count_total", stats.TotalOutputTokens),
8186
new StatsKeyValuePair("prompt_cost_total", deltaPromptCost),
8287
new StatsKeyValuePair("completion_cost_total", deltaCompletionCost)
8388
]

src/Infrastructure/BotSharp.OpenAPI/Controllers/InstructModeController.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -551,8 +551,8 @@ public async Task<SpeechToTextViewModel> SpeechToText(IFormFile file,
551551

552552
try
553553
{
554-
var auditData = FileUtility.BuildFileDataFromFile(file);
555-
var content = await fileInstruct.SpeechToText(new InstructFileModel { FileData = auditData }, text, new InstructOptions
554+
var audioData = FileUtility.BuildFileDataFromFile(file);
555+
var content = await fileInstruct.SpeechToText(new InstructFileModel { FileData = audioData }, text, new InstructOptions
556556
{
557557
Provider = provider,
558558
Model = model,

src/Plugins/BotSharp.Plugin.AnthropicAI/Providers/ChatCompletionProvider.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
8181
Prompt = prompt,
8282
Provider = Provider,
8383
Model = _model,
84-
PromptCount = response.Usage?.InputTokens ?? 0,
85-
CompletionCount = response.Usage?.OutputTokens ?? 0
84+
TextInputTokens = response.Usage?.InputTokens ?? 0,
85+
TextOutputTokens = response.Usage?.OutputTokens ?? 0
8686
});
8787
}
8888

src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using Azure;
12
using BotSharp.Abstraction.Files.Utilities;
23
using OpenAI.Chat;
34
using System.ClientModel;
@@ -40,12 +41,13 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
4041
var chatClient = client.GetChatClient(_model);
4142
var (prompt, messages, options) = PrepareOptions(agent, conversations);
4243

44+
ClientResult<ChatCompletion>? response = null;
4345
ChatCompletion value = default;
4446
RoleDialogModel responseMessage;
4547

4648
try
4749
{
48-
var response = chatClient.CompleteChat(messages, options);
50+
response = chatClient.CompleteChat(messages, options);
4951
value = response.Value;
5052

5153
var reason = value.FinishReason;
@@ -101,6 +103,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
101103
};
102104
}
103105

106+
var tokenUsage = response?.Value?.Usage;
107+
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;
108+
104109
// After chat completion hook
105110
foreach (var hook in contentHooks)
106111
{
@@ -109,8 +114,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
109114
Prompt = prompt,
110115
Provider = Provider,
111116
Model = _model,
112-
PromptCount = value?.Usage?.InputTokenCount ?? 0,
113-
CompletionCount = value?.Usage?.OutputTokenCount ?? 0
117+
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
118+
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
119+
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
114120
});
115121
}
116122

@@ -146,6 +152,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent,
146152
RenderedInstruction = string.Join("\r\n", renderedInstructions)
147153
};
148154

155+
var tokenUsage = response?.Value?.Usage;
156+
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;
157+
149158
// After chat completion hook
150159
foreach (var hook in hooks)
151160
{
@@ -154,8 +163,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent,
154163
Prompt = prompt,
155164
Provider = Provider,
156165
Model = _model,
157-
PromptCount = response.Value?.Usage?.InputTokenCount ?? 0,
158-
CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0
166+
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
167+
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
168+
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
159169
});
160170
}
161171

src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Text/TextCompletionProvider.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,15 @@ public async Task<string> GetCompletion(string text, string agentId, string mess
7878
CurrentAgentId = agentId,
7979
MessageId = messageId
8080
};
81+
8182
Task.WaitAll(contentHooks.Select(hook =>
8283
hook.AfterGenerated(responseMessage, new TokenStatsModel
8384
{
8485
Prompt = text,
8586
Provider = Provider,
8687
Model = _model,
87-
PromptCount = response.Usage?.PromptTokens ?? default,
88-
CompletionCount = response.Usage?.CompletionTokens ?? default
88+
TextInputTokens = response?.Usage?.PromptTokens ?? 0,
89+
TextOutputTokens = response?.Usage?.CompletionTokens ?? 0
8990
})).ToArray());
9091

9192
return completion.Trim();

src/Plugins/BotSharp.Plugin.DeepSeekAI/Providers/Chat/ChatCompletionProvider.cs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
7373
};
7474
}
7575

76+
var tokenUsage = response?.Value?.Usage;
77+
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;
78+
7679
// After chat completion hook
7780
foreach (var hook in contentHooks)
7881
{
@@ -81,8 +84,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
8184
Prompt = prompt,
8285
Provider = Provider,
8386
Model = _model,
84-
PromptCount = response.Value?.Usage?.InputTokenCount ?? 0,
85-
CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0
87+
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
88+
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
89+
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
8690
});
8791
}
8892

@@ -115,6 +119,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
115119
RenderedInstruction = string.Join("\r\n", renderedInstructions)
116120
};
117121

122+
var tokenUsage = response?.Value?.Usage;
123+
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;
124+
118125
// After chat completion hook
119126
foreach (var hook in hooks)
120127
{
@@ -123,8 +130,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
123130
Prompt = prompt,
124131
Provider = Provider,
125132
Model = _model,
126-
PromptCount = response.Value?.Usage?.InputTokenCount ?? 0,
127-
CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0
133+
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
134+
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
135+
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
128136
});
129137
}
130138

src/Plugins/BotSharp.Plugin.DeepSeekAI/Providers/Text/TextCompletionProvider.cs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,19 @@ public async Task<string> GetCompletion(string text, string agentId, string mess
6161
MessageId = messageId
6262
};
6363

64+
var tokenUsage = response?.Value?.Usage;
65+
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;
66+
6467
foreach (var hook in contentHooks)
6568
{
6669
await hook.AfterGenerated(responseMessage, new TokenStatsModel
6770
{
6871
Prompt = text,
6972
Provider = Provider,
7073
Model = _model,
71-
PromptCount = response?.Value?.Usage?.InputTokenCount ?? default,
72-
CompletionCount = response?.Value?.Usage?.OutputTokenCount ?? default
74+
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
75+
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
76+
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
7377
});
7478
}
7579

src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,8 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
8181
Prompt = prompt,
8282
Provider = Provider,
8383
Model = _model,
84-
PromptCount = response.UsageMetadata?.PromptTokenCount ?? 0,
85-
CachedPromptCount = response.UsageMetadata?.CachedContentTokenCount ?? 0,
86-
CompletionCount = response.UsageMetadata?.CandidatesTokenCount ?? 0
84+
TextInputTokens = response?.UsageMetadata?.PromptTokenCount ?? 0,
85+
TextOutputTokens = response?.UsageMetadata?.CandidatesTokenCount ?? 0
8786
});
8887
}
8988

@@ -124,9 +123,8 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
124123
Prompt = prompt,
125124
Provider = Provider,
126125
Model = _model,
127-
PromptCount = response?.UsageMetadata?.PromptTokenCount ?? 0,
128-
CachedPromptCount = response.UsageMetadata?.CachedContentTokenCount ?? 0,
129-
CompletionCount = response.UsageMetadata?.CandidatesTokenCount ?? 0
126+
TextInputTokens = response?.UsageMetadata?.PromptTokenCount ?? 0,
127+
TextOutputTokens = response?.UsageMetadata?.CandidatesTokenCount ?? 0
130128
});
131129
}
132130

0 commit comments

Comments
 (0)