Skip to content

Commit c24ebdd

Browse files
kunalkushwahaclaude
andcommitted
feat(trace): per-model cost estimation + fix token extraction
The trace fallback path read token attributes under keys the framework never emits (llm.usage.completion_tokens / llm.completion_tokens), so TotalTokens — and therefore the cost estimate — was always 0 for runs without a manifest. - Add internal/pricing: a published-list-price table (per 1M tokens) with exact + longest-prefix model matching (handles dated/variant ids and provider prefixes); local/unknown models return (0, false). Unit tested. - Read the real AgenticGoKit observability keys (agk.llm.tokens.input/output/total, agk.llm.model, agk.llm.cost.usd) plus legacy aliases for back-compat. - Replace the hardcoded `tokens * 0.00001` estimate: prefer a directly-reported cost, else a price-table estimate, else 0 for unknown/local models. Improves the accuracy of `agk trace list/view` and the `agk run` trace summary. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 705998d commit c24ebdd

3 files changed

Lines changed: 228 additions & 23 deletions

File tree

cmd/trace.go

Lines changed: 94 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"time"
1414

1515
"github.com/agenticgokit/agk/internal/audit"
16+
"github.com/agenticgokit/agk/internal/pricing"
1617
"github.com/agenticgokit/agk/internal/tui"
1718
tea "github.com/charmbracelet/bubbletea"
1819
"github.com/spf13/cobra"
@@ -637,7 +638,6 @@ func parseTraceFile(runPath string) (TraceRun, error) {
637638
}
638639

639640
durationSeconds := stats.LastSpan.Sub(stats.FirstSpan).Seconds()
640-
estimatedCost := float64(stats.TotalTokens) * 0.00001 // Rough estimate
641641

642642
return TraceRun{
643643
RunID: runID,
@@ -648,17 +648,22 @@ func parseTraceFile(runPath string) (TraceRun, error) {
648648
Duration: durationSeconds,
649649
SpanCount: stats.SpanCount,
650650
LLMCalls: stats.LLMCalls,
651-
TotalTokens: stats.TotalTokens,
652-
EstimatedCost: estimatedCost,
651+
TotalTokens: stats.Tokens(),
652+
EstimatedCost: stats.Cost(),
653653
}, nil
654654
}
655655

656656
type RunStats struct {
657-
SpanCount int
658-
LLMCalls int
659-
TotalTokens int
660-
FirstSpan time.Time
661-
LastSpan time.Time
657+
SpanCount int
658+
LLMCalls int
659+
InputTokens int
660+
OutputTokens int
661+
TotalTokens int
662+
Model string
663+
DirectCost float64 // summed from agk.llm.cost.usd attributes, if emitted
664+
HasDirectCost bool
665+
FirstSpan time.Time
666+
LastSpan time.Time
662667
}
663668

664669
func (s *RunStats) Update(span map[string]interface{}) {
@@ -671,34 +676,83 @@ func (s *RunStats) Update(span map[string]interface{}) {
671676
}
672677
}
673678

674-
// Extract token count from attributes
679+
// Extract token/model/cost data from attributes
675680
if attrs, ok := span["Attributes"].([]interface{}); ok {
676-
s.extractTokens(attrs)
681+
s.extractAttrs(attrs)
677682
}
678683

679684
// Extract start and end times
680685
s.updateTimes(span)
681686
}
682687

683-
func (s *RunStats) extractTokens(attrs []interface{}) {
688+
// extractAttrs pulls token, model, and cost data from a span's attributes.
689+
// It recognizes the AgenticGoKit observability keys (agk.llm.tokens.*, agk.llm.model,
690+
// agk.llm.cost.usd) plus a few legacy aliases for backward compatibility.
691+
func (s *RunStats) extractAttrs(attrs []interface{}) {
684692
for _, attr := range attrs {
685-
if attrMap, ok := attr.(map[string]interface{}); ok {
686-
if key, ok := attrMap["Key"].(string); ok {
687-
// Look for token-related attributes
688-
if key == "llm.usage.completion_tokens" || key == "llm.completion_tokens" {
689-
if val, ok := attrMap["Value"].(map[string]interface{}); ok {
690-
if tokenVal, ok := val["Value"]; ok {
691-
if tokenInt, err := toInt64(tokenVal); err == nil {
692-
s.TotalTokens += int(tokenInt)
693-
}
694-
}
695-
}
696-
}
693+
attrMap, ok := attr.(map[string]interface{})
694+
if !ok {
695+
continue
696+
}
697+
key, ok := attrMap["Key"].(string)
698+
if !ok {
699+
continue
700+
}
701+
val, ok := attrMap["Value"].(map[string]interface{})
702+
if !ok {
703+
continue
704+
}
705+
raw := val["Value"]
706+
707+
switch key {
708+
case "agk.llm.tokens.input", "agk.llm.tokens.prompt",
709+
"llm.usage.prompt_tokens", "llm.prompt_tokens":
710+
if n, err := toInt64(raw); err == nil {
711+
s.InputTokens += int(n)
712+
}
713+
case "agk.llm.tokens.output", "agk.llm.tokens.completion",
714+
"llm.usage.completion_tokens", "llm.completion_tokens":
715+
if n, err := toInt64(raw); err == nil {
716+
s.OutputTokens += int(n)
717+
}
718+
case "agk.llm.tokens.total", "llm.usage.total_tokens":
719+
if n, err := toInt64(raw); err == nil {
720+
s.TotalTokens += int(n)
721+
}
722+
case "agk.llm.cost.usd":
723+
if f, err := toFloat64(raw); err == nil {
724+
s.DirectCost += f
725+
s.HasDirectCost = true
726+
}
727+
case "agk.llm.model":
728+
if str, ok := raw.(string); ok && str != "" {
729+
s.Model = str
697730
}
698731
}
699732
}
700733
}
701734

735+
// Tokens returns the best available total token count, preferring an explicit
736+
// total and falling back to input+output.
737+
func (s *RunStats) Tokens() int {
738+
if s.TotalTokens > 0 {
739+
return s.TotalTokens
740+
}
741+
return s.InputTokens + s.OutputTokens
742+
}
743+
744+
// Cost returns the estimated USD cost: a directly-reported cost if present,
745+
// otherwise a price-table estimate, otherwise 0 for unknown/local models.
746+
func (s *RunStats) Cost() float64 {
747+
if s.HasDirectCost {
748+
return s.DirectCost
749+
}
750+
if cost, ok := pricing.Estimate(s.Model, s.InputTokens, s.OutputTokens); ok {
751+
return cost
752+
}
753+
return 0
754+
}
755+
702756
func (s *RunStats) updateTimes(span map[string]interface{}) {
703757
// Extract start and end times from span
704758
// Format: "2026-01-19T18:36:38.897+09:00"
@@ -741,6 +795,23 @@ func toInt64(v interface{}) (int64, error) {
741795
}
742796
}
743797

798+
// toFloat64 safely converts a value to float64
799+
func toFloat64(v interface{}) (float64, error) {
800+
switch val := v.(type) {
801+
case float64:
802+
return val, nil
803+
case int:
804+
return float64(val), nil
805+
case int64:
806+
return float64(val), nil
807+
case string:
808+
f, err := strconv.ParseFloat(val, 64)
809+
return f, err
810+
default:
811+
return 0, fmt.Errorf("cannot convert %T to float64", v)
812+
}
813+
}
814+
744815
func getLatestRunID() string {
745816
entries, err := os.ReadDir(runsDirName)
746817
if err != nil {

internal/pricing/pricing.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Package pricing provides approximate USD cost estimates for LLM token usage.
2+
//
3+
// Prices are published list prices per 1,000,000 tokens and are intended for
4+
// rough cost reporting in traces, not billing. They drift over time; update the
5+
// table as providers change pricing. Local models (Ollama, etc.) are treated as
6+
// free and intentionally absent from the table.
7+
package pricing
8+
9+
import "strings"
10+
11+
// ModelPrice is the USD price per 1,000,000 tokens, split by input and output.
12+
type ModelPrice struct {
13+
InputPer1M float64
14+
OutputPer1M float64
15+
}
16+
17+
// table maps a normalized model key to its price. Lookups use exact match first,
18+
// then the longest matching prefix, so dated/variant model ids (e.g.
19+
// "gpt-4o-2024-08-06", "claude-sonnet-4-20250514") resolve to their base model.
20+
var table = map[string]ModelPrice{
21+
// OpenAI
22+
"gpt-4o": {InputPer1M: 2.50, OutputPer1M: 10.00},
23+
"gpt-4o-mini": {InputPer1M: 0.15, OutputPer1M: 0.60},
24+
"gpt-4-turbo": {InputPer1M: 10.00, OutputPer1M: 30.00},
25+
"gpt-4": {InputPer1M: 30.00, OutputPer1M: 60.00},
26+
"gpt-3.5-turbo": {InputPer1M: 0.50, OutputPer1M: 1.50},
27+
"o1": {InputPer1M: 15.00, OutputPer1M: 60.00},
28+
"o1-mini": {InputPer1M: 1.10, OutputPer1M: 4.40},
29+
"o3-mini": {InputPer1M: 1.10, OutputPer1M: 4.40},
30+
31+
// Anthropic
32+
"claude-3-5-sonnet": {InputPer1M: 3.00, OutputPer1M: 15.00},
33+
"claude-3-5-haiku": {InputPer1M: 0.80, OutputPer1M: 4.00},
34+
"claude-3-opus": {InputPer1M: 15.00, OutputPer1M: 75.00},
35+
"claude-3-sonnet": {InputPer1M: 3.00, OutputPer1M: 15.00},
36+
"claude-3-haiku": {InputPer1M: 0.25, OutputPer1M: 1.25},
37+
"claude-sonnet-4": {InputPer1M: 3.00, OutputPer1M: 15.00},
38+
"claude-opus-4": {InputPer1M: 15.00, OutputPer1M: 75.00},
39+
"claude-haiku-4": {InputPer1M: 1.00, OutputPer1M: 5.00},
40+
}
41+
42+
// Estimate returns the estimated USD cost for the given token usage and whether
43+
// the model was found in the price table. Unknown or local models return (0, false),
44+
// letting callers decide how to report an unpriced run.
45+
func Estimate(model string, inputTokens, outputTokens int) (float64, bool) {
46+
p, ok := lookup(model)
47+
if !ok {
48+
return 0, false
49+
}
50+
cost := float64(inputTokens)/1e6*p.InputPer1M + float64(outputTokens)/1e6*p.OutputPer1M
51+
return cost, true
52+
}
53+
54+
func lookup(model string) (ModelPrice, bool) {
55+
key := normalize(model)
56+
if key == "" {
57+
return ModelPrice{}, false
58+
}
59+
if p, ok := table[key]; ok {
60+
return p, true
61+
}
62+
// Longest-prefix match handles dated/variant ids.
63+
var best string
64+
for k := range table {
65+
if strings.HasPrefix(key, k) && len(k) > len(best) {
66+
best = k
67+
}
68+
}
69+
if best != "" {
70+
return table[best], true
71+
}
72+
return ModelPrice{}, false
73+
}
74+
75+
// normalize lowercases the model id and strips any provider prefix
76+
// (e.g. "openai/gpt-4o" -> "gpt-4o").
77+
func normalize(model string) string {
78+
m := strings.ToLower(strings.TrimSpace(model))
79+
if i := strings.LastIndex(m, "/"); i >= 0 {
80+
m = m[i+1:]
81+
}
82+
return m
83+
}

internal/pricing/pricing_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package pricing
2+
3+
import "testing"
4+
5+
func TestEstimate(t *testing.T) {
6+
tests := []struct {
7+
name string
8+
model string
9+
in int
10+
out int
11+
want float64
12+
matched bool
13+
approxOK bool // compare with tolerance instead of exact
14+
}{
15+
{name: "gpt-4o exact", model: "gpt-4o", in: 1_000_000, out: 1_000_000, want: 12.50, matched: true},
16+
{name: "gpt-4o-mini", model: "gpt-4o-mini", in: 1_000_000, out: 0, want: 0.15, matched: true},
17+
{name: "gpt-4o dated suffix", model: "gpt-4o-2024-08-06", in: 1_000_000, out: 0, want: 2.50, matched: true},
18+
{name: "longest prefix prefers mini", model: "gpt-4o-mini-2024-07-18", in: 1_000_000, out: 0, want: 0.15, matched: true},
19+
{name: "provider prefix stripped", model: "openai/gpt-4o", in: 0, out: 1_000_000, want: 10.00, matched: true},
20+
{name: "claude sonnet 4 dated", model: "claude-sonnet-4-20250514", in: 1_000_000, out: 0, want: 3.00, matched: true},
21+
{name: "case insensitive", model: "GPT-4O", in: 1_000_000, out: 0, want: 2.50, matched: true},
22+
{name: "zero tokens still matched", model: "gpt-4o", in: 0, out: 0, want: 0, matched: true},
23+
{name: "local model unknown", model: "llama3.2", in: 1_000_000, out: 1_000_000, want: 0, matched: false},
24+
{name: "empty model", model: "", in: 100, out: 100, want: 0, matched: false},
25+
}
26+
27+
for _, tc := range tests {
28+
t.Run(tc.name, func(t *testing.T) {
29+
got, matched := Estimate(tc.model, tc.in, tc.out)
30+
if matched != tc.matched {
31+
t.Fatalf("matched = %v, want %v", matched, tc.matched)
32+
}
33+
if diff := got - tc.want; diff > 1e-9 || diff < -1e-9 {
34+
t.Fatalf("cost = %v, want %v", got, tc.want)
35+
}
36+
})
37+
}
38+
}
39+
40+
func TestEstimatePartialTokens(t *testing.T) {
41+
// 1,500 input + 500 output on gpt-4o:
42+
// 1500/1e6*2.50 + 500/1e6*10.00 = 0.00375 + 0.005 = 0.00875
43+
got, matched := Estimate("gpt-4o", 1500, 500)
44+
if !matched {
45+
t.Fatal("expected gpt-4o to match")
46+
}
47+
want := 0.00875
48+
if diff := got - want; diff > 1e-9 || diff < -1e-9 {
49+
t.Fatalf("cost = %v, want %v", got, want)
50+
}
51+
}

0 commit comments

Comments
 (0)