From 1909539dd3a961148234f3626d86d3163165d866 Mon Sep 17 00:00:00 2001 From: Kunal Kushwaha Date: Mon, 22 Jun 2026 00:06:43 +0900 Subject: [PATCH] feat(trace): per-model cost estimation + fix token extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The trace fallback path read token attributes under keys the framework never emits (llm.usage.completion_tokens / llm.completion_tokens), so TotalTokens — and therefore the cost estimate — was always 0 for runs without a manifest. - Add internal/pricing: a published-list-price table (per 1M tokens) with exact + longest-prefix model matching (handles dated/variant ids and provider prefixes); local/unknown models return (0, false). Unit tested. - Read the real AgenticGoKit observability keys (agk.llm.tokens.input/output/total, agk.llm.model, agk.llm.cost.usd) plus legacy aliases for back-compat. - Replace the hardcoded `tokens * 0.00001` estimate: prefer a directly-reported cost, else a price-table estimate, else 0 for unknown/local models. Improves the accuracy of `agk trace list/view` and the `agk run` trace summary. Co-Authored-By: Claude Opus 4.8 --- cmd/trace.go | 117 +++++++++++++++++++++++++------ internal/pricing/pricing.go | 83 ++++++++++++++++++++++ internal/pricing/pricing_test.go | 51 ++++++++++++++ 3 files changed, 228 insertions(+), 23 deletions(-) create mode 100644 internal/pricing/pricing.go create mode 100644 internal/pricing/pricing_test.go diff --git a/cmd/trace.go b/cmd/trace.go index b9e4450..a8e7d30 100644 --- a/cmd/trace.go +++ b/cmd/trace.go @@ -13,6 +13,7 @@ import ( "time" "github.com/agenticgokit/agk/internal/audit" + "github.com/agenticgokit/agk/internal/pricing" "github.com/agenticgokit/agk/internal/tui" tea "github.com/charmbracelet/bubbletea" "github.com/spf13/cobra" @@ -637,7 +638,6 @@ func parseTraceFile(runPath string) (TraceRun, error) { } durationSeconds := stats.LastSpan.Sub(stats.FirstSpan).Seconds() - estimatedCost := float64(stats.TotalTokens) * 0.00001 // Rough estimate return TraceRun{ RunID: runID, @@ -648,17 +648,22 @@ func parseTraceFile(runPath string) (TraceRun, error) { Duration: durationSeconds, SpanCount: stats.SpanCount, LLMCalls: stats.LLMCalls, - TotalTokens: stats.TotalTokens, - EstimatedCost: estimatedCost, + TotalTokens: stats.Tokens(), + EstimatedCost: stats.Cost(), }, nil } type RunStats struct { - SpanCount int - LLMCalls int - TotalTokens int - FirstSpan time.Time - LastSpan time.Time + SpanCount int + LLMCalls int + InputTokens int + OutputTokens int + TotalTokens int + Model string + DirectCost float64 // summed from agk.llm.cost.usd attributes, if emitted + HasDirectCost bool + FirstSpan time.Time + LastSpan time.Time } func (s *RunStats) Update(span map[string]interface{}) { @@ -671,34 +676,83 @@ func (s *RunStats) Update(span map[string]interface{}) { } } - // Extract token count from attributes + // Extract token/model/cost data from attributes if attrs, ok := span["Attributes"].([]interface{}); ok { - s.extractTokens(attrs) + s.extractAttrs(attrs) } // Extract start and end times s.updateTimes(span) } -func (s *RunStats) extractTokens(attrs []interface{}) { +// extractAttrs pulls token, model, and cost data from a span's attributes. +// It recognizes the AgenticGoKit observability keys (agk.llm.tokens.*, agk.llm.model, +// agk.llm.cost.usd) plus a few legacy aliases for backward compatibility. +func (s *RunStats) extractAttrs(attrs []interface{}) { for _, attr := range attrs { - if attrMap, ok := attr.(map[string]interface{}); ok { - if key, ok := attrMap["Key"].(string); ok { - // Look for token-related attributes - if key == "llm.usage.completion_tokens" || key == "llm.completion_tokens" { - if val, ok := attrMap["Value"].(map[string]interface{}); ok { - if tokenVal, ok := val["Value"]; ok { - if tokenInt, err := toInt64(tokenVal); err == nil { - s.TotalTokens += int(tokenInt) - } - } - } - } + attrMap, ok := attr.(map[string]interface{}) + if !ok { + continue + } + key, ok := attrMap["Key"].(string) + if !ok { + continue + } + val, ok := attrMap["Value"].(map[string]interface{}) + if !ok { + continue + } + raw := val["Value"] + + switch key { + case "agk.llm.tokens.input", "agk.llm.tokens.prompt", + "llm.usage.prompt_tokens", "llm.prompt_tokens": + if n, err := toInt64(raw); err == nil { + s.InputTokens += int(n) + } + case "agk.llm.tokens.output", "agk.llm.tokens.completion", + "llm.usage.completion_tokens", "llm.completion_tokens": + if n, err := toInt64(raw); err == nil { + s.OutputTokens += int(n) + } + case "agk.llm.tokens.total", "llm.usage.total_tokens": + if n, err := toInt64(raw); err == nil { + s.TotalTokens += int(n) + } + case "agk.llm.cost.usd": + if f, err := toFloat64(raw); err == nil { + s.DirectCost += f + s.HasDirectCost = true + } + case "agk.llm.model": + if str, ok := raw.(string); ok && str != "" { + s.Model = str } } } } +// Tokens returns the best available total token count, preferring an explicit +// total and falling back to input+output. +func (s *RunStats) Tokens() int { + if s.TotalTokens > 0 { + return s.TotalTokens + } + return s.InputTokens + s.OutputTokens +} + +// Cost returns the estimated USD cost: a directly-reported cost if present, +// otherwise a price-table estimate, otherwise 0 for unknown/local models. +func (s *RunStats) Cost() float64 { + if s.HasDirectCost { + return s.DirectCost + } + if cost, ok := pricing.Estimate(s.Model, s.InputTokens, s.OutputTokens); ok { + return cost + } + return 0 +} + func (s *RunStats) updateTimes(span map[string]interface{}) { // Extract start and end times from span // Format: "2026-01-19T18:36:38.897+09:00" @@ -741,6 +795,23 @@ func toInt64(v interface{}) (int64, error) { } } +// toFloat64 safely converts a value to float64 +func toFloat64(v interface{}) (float64, error) { + switch val := v.(type) { + case float64: + return val, nil + case int: + return float64(val), nil + case int64: + return float64(val), nil + case string: + f, err := strconv.ParseFloat(val, 64) + return f, err + default: + return 0, fmt.Errorf("cannot convert %T to float64", v) + } +} + func getLatestRunID() string { entries, err := os.ReadDir(runsDirName) if err != nil { diff --git a/internal/pricing/pricing.go b/internal/pricing/pricing.go new file mode 100644 index 0000000..5ca8a74 --- /dev/null +++ b/internal/pricing/pricing.go @@ -0,0 +1,83 @@ +// Package pricing provides approximate USD cost estimates for LLM token usage. +// +// Prices are published list prices per 1,000,000 tokens and are intended for +// rough cost reporting in traces, not billing. They drift over time; update the +// table as providers change pricing. Local models (Ollama, etc.) are treated as +// free and intentionally absent from the table. +package pricing + +import "strings" + +// ModelPrice is the USD price per 1,000,000 tokens, split by input and output. +type ModelPrice struct { + InputPer1M float64 + OutputPer1M float64 +} + +// table maps a normalized model key to its price. Lookups use exact match first, +// then the longest matching prefix, so dated/variant model ids (e.g. +// "gpt-4o-2024-08-06", "claude-sonnet-4-20250514") resolve to their base model. +var table = map[string]ModelPrice{ + // OpenAI + "gpt-4o": {InputPer1M: 2.50, OutputPer1M: 10.00}, + "gpt-4o-mini": {InputPer1M: 0.15, OutputPer1M: 0.60}, + "gpt-4-turbo": {InputPer1M: 10.00, OutputPer1M: 30.00}, + "gpt-4": {InputPer1M: 30.00, OutputPer1M: 60.00}, + "gpt-3.5-turbo": {InputPer1M: 0.50, OutputPer1M: 1.50}, + "o1": {InputPer1M: 15.00, OutputPer1M: 60.00}, + "o1-mini": {InputPer1M: 1.10, OutputPer1M: 4.40}, + "o3-mini": {InputPer1M: 1.10, OutputPer1M: 4.40}, + + // Anthropic + "claude-3-5-sonnet": {InputPer1M: 3.00, OutputPer1M: 15.00}, + "claude-3-5-haiku": {InputPer1M: 0.80, OutputPer1M: 4.00}, + "claude-3-opus": {InputPer1M: 15.00, OutputPer1M: 75.00}, + "claude-3-sonnet": {InputPer1M: 3.00, OutputPer1M: 15.00}, + "claude-3-haiku": {InputPer1M: 0.25, OutputPer1M: 1.25}, + "claude-sonnet-4": {InputPer1M: 3.00, OutputPer1M: 15.00}, + "claude-opus-4": {InputPer1M: 15.00, OutputPer1M: 75.00}, + "claude-haiku-4": {InputPer1M: 1.00, OutputPer1M: 5.00}, +} + +// Estimate returns the estimated USD cost for the given token usage and whether +// the model was found in the price table. Unknown or local models return (0, false), +// letting callers decide how to report an unpriced run. +func Estimate(model string, inputTokens, outputTokens int) (float64, bool) { + p, ok := lookup(model) + if !ok { + return 0, false + } + cost := float64(inputTokens)/1e6*p.InputPer1M + float64(outputTokens)/1e6*p.OutputPer1M + return cost, true +} + +func lookup(model string) (ModelPrice, bool) { + key := normalize(model) + if key == "" { + return ModelPrice{}, false + } + if p, ok := table[key]; ok { + return p, true + } + // Longest-prefix match handles dated/variant ids. + var best string + for k := range table { + if strings.HasPrefix(key, k) && len(k) > len(best) { + best = k + } + } + if best != "" { + return table[best], true + } + return ModelPrice{}, false +} + +// normalize lowercases the model id and strips any provider prefix +// (e.g. "openai/gpt-4o" -> "gpt-4o"). +func normalize(model string) string { + m := strings.ToLower(strings.TrimSpace(model)) + if i := strings.LastIndex(m, "/"); i >= 0 { + m = m[i+1:] + } + return m +} diff --git a/internal/pricing/pricing_test.go b/internal/pricing/pricing_test.go new file mode 100644 index 0000000..34cae4f --- /dev/null +++ b/internal/pricing/pricing_test.go @@ -0,0 +1,51 @@ +package pricing + +import "testing" + +func TestEstimate(t *testing.T) { + tests := []struct { + name string + model string + in int + out int + want float64 + matched bool + approxOK bool // compare with tolerance instead of exact + }{ + {name: "gpt-4o exact", model: "gpt-4o", in: 1_000_000, out: 1_000_000, want: 12.50, matched: true}, + {name: "gpt-4o-mini", model: "gpt-4o-mini", in: 1_000_000, out: 0, want: 0.15, matched: true}, + {name: "gpt-4o dated suffix", model: "gpt-4o-2024-08-06", in: 1_000_000, out: 0, want: 2.50, matched: true}, + {name: "longest prefix prefers mini", model: "gpt-4o-mini-2024-07-18", in: 1_000_000, out: 0, want: 0.15, matched: true}, + {name: "provider prefix stripped", model: "openai/gpt-4o", in: 0, out: 1_000_000, want: 10.00, matched: true}, + {name: "claude sonnet 4 dated", model: "claude-sonnet-4-20250514", in: 1_000_000, out: 0, want: 3.00, matched: true}, + {name: "case insensitive", model: "GPT-4O", in: 1_000_000, out: 0, want: 2.50, matched: true}, + {name: "zero tokens still matched", model: "gpt-4o", in: 0, out: 0, want: 0, matched: true}, + {name: "local model unknown", model: "llama3.2", in: 1_000_000, out: 1_000_000, want: 0, matched: false}, + {name: "empty model", model: "", in: 100, out: 100, want: 0, matched: false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, matched := Estimate(tc.model, tc.in, tc.out) + if matched != tc.matched { + t.Fatalf("matched = %v, want %v", matched, tc.matched) + } + if diff := got - tc.want; diff > 1e-9 || diff < -1e-9 { + t.Fatalf("cost = %v, want %v", got, tc.want) + } + }) + } +} + +func TestEstimatePartialTokens(t *testing.T) { + // 1,500 input + 500 output on gpt-4o: + // 1500/1e6*2.50 + 500/1e6*10.00 = 0.00375 + 0.005 = 0.00875 + got, matched := Estimate("gpt-4o", 1500, 500) + if !matched { + t.Fatal("expected gpt-4o to match") + } + want := 0.00875 + if diff := got - want; diff > 1e-9 || diff < -1e-9 { + t.Fatalf("cost = %v, want %v", got, want) + } +}