Skip to content

Commit d2e4f85

Browse files
abwugejames-6-23
andauthored
Fix/fast pricing (#153)
* fix: bill fast requests by actual tier * fix: use explicit priority prices for fast billing * fix: preserve upstream billing tier --------- Co-authored-by: KYX <kyxjames23@gmail.com>
1 parent 94b9dff commit d2e4f85

7 files changed

Lines changed: 261 additions & 107 deletions

File tree

database/billing.go

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ var (
4141

4242
modelPricingRules = []modelPricingRule{
4343
{model: "gpt-5.5", pricing: ModelPricing{
44-
InputPricePerMToken: 5.0,
45-
InputPricePerMTokenPriority: 12.5,
46-
OutputPricePerMToken: 30.0,
47-
OutputPricePerMTokenPriority: 75.0,
48-
CacheReadPricePerMToken: 0.5,
49-
CacheReadPricePerMTokenPriority: 1.25,
44+
InputPricePerMToken: 5.0,
45+
InputPricePerMTokenPriority: 12.5,
46+
OutputPricePerMToken: 30.0,
47+
OutputPricePerMTokenPriority: 75.0,
48+
CacheReadPricePerMToken: 0.5,
49+
CacheReadPricePerMTokenPriority: 1.25,
5050
LongInputPricePerMToken: 10.0,
5151
LongInputPricePerMTokenPriority: 25.0,
5252
LongOutputPricePerMToken: 45.0,
@@ -55,10 +55,10 @@ var (
5555
LongCacheReadPricePerMTokenPriority: 2.5,
5656
}},
5757
{model: "gpt-5.5-pro", pricing: ModelPricing{
58-
InputPricePerMToken: 30.0,
59-
InputPricePerMTokenPriority: 75.0,
60-
OutputPricePerMToken: 180.0,
61-
OutputPricePerMTokenPriority: 450.0,
58+
InputPricePerMToken: 30.0,
59+
InputPricePerMTokenPriority: 75.0,
60+
OutputPricePerMToken: 180.0,
61+
OutputPricePerMTokenPriority: 450.0,
6262
LongInputPricePerMToken: 60.0,
6363
LongInputPricePerMTokenPriority: 150.0,
6464
LongOutputPricePerMToken: 270.0,
@@ -67,12 +67,12 @@ var (
6767
{model: "gpt-5.4-mini", pricing: ModelPricing{InputPricePerMToken: 0.75, OutputPricePerMToken: 4.5, CacheReadPricePerMToken: 0.075}},
6868
{model: "gpt-5.4-nano", pricing: ModelPricing{InputPricePerMToken: 0.2, OutputPricePerMToken: 1.25, CacheReadPricePerMToken: 0.02}},
6969
{model: "gpt-5.4", pricing: ModelPricing{
70-
InputPricePerMToken: 2.5,
71-
InputPricePerMTokenPriority: 5.0,
72-
OutputPricePerMToken: 15.0,
73-
OutputPricePerMTokenPriority: 30.0,
74-
CacheReadPricePerMToken: 0.25,
75-
CacheReadPricePerMTokenPriority: 0.5,
70+
InputPricePerMToken: 2.5,
71+
InputPricePerMTokenPriority: 5.0,
72+
OutputPricePerMToken: 15.0,
73+
OutputPricePerMTokenPriority: 30.0,
74+
CacheReadPricePerMToken: 0.25,
75+
CacheReadPricePerMTokenPriority: 0.5,
7676
LongInputPricePerMToken: 5.0,
7777
LongInputPricePerMTokenPriority: 10.0,
7878
LongOutputPricePerMToken: 22.5,
@@ -81,10 +81,10 @@ var (
8181
LongCacheReadPricePerMTokenPriority: 1.0,
8282
}},
8383
{model: "gpt-5.4-pro", pricing: ModelPricing{
84-
InputPricePerMToken: 30.0,
85-
InputPricePerMTokenPriority: 75.0,
86-
OutputPricePerMToken: 180.0,
87-
OutputPricePerMTokenPriority: 450.0,
84+
InputPricePerMToken: 30.0,
85+
InputPricePerMTokenPriority: 75.0,
86+
OutputPricePerMToken: 180.0,
87+
OutputPricePerMTokenPriority: 450.0,
8888
LongInputPricePerMToken: 60.0,
8989
LongInputPricePerMTokenPriority: 150.0,
9090
LongOutputPricePerMToken: 270.0,
@@ -326,7 +326,8 @@ func geminiFamilyPricing(model string) *ModelPricing {
326326
}
327327

328328
func usePriorityPricing(serviceTier string, pricing *ModelPricing) bool {
329-
if normalizeServiceTier(serviceTier) != "priority" {
329+
tier := normalizeServiceTier(serviceTier)
330+
if tier != "priority" && tier != "fast" {
330331
return false
331332
}
332333
return pricing.InputPricePerMTokenPriority > 0 ||
@@ -336,8 +337,6 @@ func usePriorityPricing(serviceTier string, pricing *ModelPricing) bool {
336337

337338
func serviceTierCostMultiplier(serviceTier string) float64 {
338339
switch normalizeServiceTier(serviceTier) {
339-
case "priority":
340-
return 2.0
341340
case "flex":
342341
return 0.5
343342
default:

database/billing_test.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,33 @@ func TestCalculateCostHandlesCachedTokensAndServiceTier(t *testing.T) {
100100
cachedTokens: 200,
101101
want: 0.0191,
102102
},
103+
{
104+
name: "uses priority prices for fast tier",
105+
model: "gpt-5.4",
106+
serviceTier: "fast",
107+
inputTokens: 1000,
108+
outputTokens: 500,
109+
cachedTokens: 200,
110+
want: 0.0191,
111+
},
112+
{
113+
name: "does not invent priority multiplier when priority price is unknown",
114+
model: "gpt-4o",
115+
serviceTier: "priority",
116+
inputTokens: 1000,
117+
outputTokens: 500,
118+
cachedTokens: 200,
119+
want: 0.0075,
120+
},
121+
{
122+
name: "fast tier falls back to standard pricing when priority price is unknown",
123+
model: "gpt-4o",
124+
serviceTier: "fast",
125+
inputTokens: 1000,
126+
outputTokens: 500,
127+
cachedTokens: 200,
128+
want: 0.0075,
129+
},
103130
{
104131
name: "applies flex multiplier",
105132
model: "gpt-5.4",
@@ -277,7 +304,7 @@ func TestCodexAutoReviewModelNormalizesToGPT54(t *testing.T) {
277304
func TestCodexAutoReviewLongContextPricing(t *testing.T) {
278305
// codex-auto-review maps to gpt-5.4 which has long context pricing.
279306
long := CalculateCostBreakdown(300000, 500, 100, "codex-auto-review", "")
280-
assertFloatEqual(t, long.InputPricePerMToken, 5.0) // long input price
307+
assertFloatEqual(t, long.InputPricePerMToken, 5.0) // long input price
281308
assertFloatEqual(t, long.OutputPricePerMToken, 22.5) // long output price
282309
assertFloatEqual(t, long.CacheReadPricePerMToken, 0.5) // long cache read price
283310
}

database/postgres.go

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,8 +1543,13 @@ func (db *DB) InsertUsageLog(ctx context.Context, log *UsageLogInput) error {
15431543
billingModel = log.Model
15441544
}
15451545

1546-
// 计算账号计费金额(标准费用)
1547-
accountBilled := calculateCost(log.InputTokens, log.OutputTokens, log.CachedTokens, billingModel, log.ServiceTier)
1546+
billingServiceTier := log.BillingServiceTier
1547+
if billingServiceTier == "" {
1548+
billingServiceTier = log.ServiceTier
1549+
}
1550+
1551+
// 计算账号计费金额(基于上游实际 service tier)
1552+
accountBilled := calculateCost(log.InputTokens, log.OutputTokens, log.CachedTokens, billingModel, billingServiceTier)
15481553

15491554
// 用户计费金额与账号计费金额相同(简化版,未来可支持倍率)
15501555
userBilled := accountBilled
@@ -1598,38 +1603,39 @@ func (db *DB) InsertUsageLog(ctx context.Context, log *UsageLogInput) error {
15981603

15991604
// UsageLogInput 日志写入参数
16001605
type UsageLogInput struct {
1601-
AccountID int64
1602-
Endpoint string
1603-
Model string
1604-
EffectiveModel string
1605-
PromptTokens int
1606-
CompletionTokens int
1607-
TotalTokens int
1608-
StatusCode int
1609-
DurationMs int
1610-
InputTokens int
1611-
OutputTokens int
1612-
ReasoningTokens int
1613-
FirstTokenMs int
1614-
ReasoningEffort string
1615-
InboundEndpoint string
1616-
UpstreamEndpoint string
1617-
Stream bool
1618-
CachedTokens int
1619-
ServiceTier string
1620-
APIKeyID int64
1621-
APIKeyName string
1622-
APIKeyMasked string
1623-
ImageCount int
1624-
ImageWidth int
1625-
ImageHeight int
1626-
ImageBytes int
1627-
ImageFormat string
1628-
ImageSize string
1629-
IsRetryAttempt bool
1630-
AttemptIndex int
1631-
UpstreamErrorKind string
1632-
ErrorMessage string
1606+
AccountID int64
1607+
Endpoint string
1608+
Model string
1609+
EffectiveModel string
1610+
PromptTokens int
1611+
CompletionTokens int
1612+
TotalTokens int
1613+
StatusCode int
1614+
DurationMs int
1615+
InputTokens int
1616+
OutputTokens int
1617+
ReasoningTokens int
1618+
FirstTokenMs int
1619+
ReasoningEffort string
1620+
InboundEndpoint string
1621+
UpstreamEndpoint string
1622+
Stream bool
1623+
CachedTokens int
1624+
ServiceTier string
1625+
BillingServiceTier string
1626+
APIKeyID int64
1627+
APIKeyName string
1628+
APIKeyMasked string
1629+
ImageCount int
1630+
ImageWidth int
1631+
ImageHeight int
1632+
ImageBytes int
1633+
ImageFormat string
1634+
ImageSize string
1635+
IsRetryAttempt bool
1636+
AttemptIndex int
1637+
UpstreamErrorKind string
1638+
ErrorMessage string
16331639
}
16341640

16351641
func (l *UsageLog) populateBillingBreakdown() {

database/sqlite_test.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,74 @@ func TestUsageLogsReturnBillingFields(t *testing.T) {
737737
}
738738
}
739739

740+
func TestUsageLogsBillFastByActualServiceTier(t *testing.T) {
741+
dbPath := filepath.Join(t.TempDir(), "codex2api.db")
742+
743+
db, err := New("sqlite", dbPath)
744+
if err != nil {
745+
t.Fatalf("New(sqlite) 返回错误: %v", err)
746+
}
747+
defer db.Close()
748+
749+
ctx := context.Background()
750+
if err := db.InsertUsageLog(ctx, &UsageLogInput{
751+
AccountID: 1,
752+
Endpoint: "/v1/responses",
753+
Model: "gpt-5.4",
754+
StatusCode: 200,
755+
InputTokens: 1000,
756+
OutputTokens: 500,
757+
CachedTokens: 200,
758+
ServiceTier: "fast",
759+
BillingServiceTier: "default",
760+
}); err != nil {
761+
t.Fatalf("InsertUsageLog 返回错误: %v", err)
762+
}
763+
if err := db.InsertUsageLog(ctx, &UsageLogInput{
764+
AccountID: 1,
765+
Endpoint: "/v1/responses",
766+
Model: "gpt-5.4",
767+
StatusCode: 200,
768+
InputTokens: 1000,
769+
OutputTokens: 500,
770+
CachedTokens: 200,
771+
ServiceTier: "fast",
772+
BillingServiceTier: "priority",
773+
}); err != nil {
774+
t.Fatalf("InsertUsageLog 返回错误: %v", err)
775+
}
776+
db.flushLogs()
777+
778+
logs, err := db.ListRecentUsageLogs(ctx, 10)
779+
if err != nil {
780+
t.Fatalf("ListRecentUsageLogs 返回错误: %v", err)
781+
}
782+
if len(logs) != 2 {
783+
t.Fatalf("len(logs) = %d, want 2", len(logs))
784+
}
785+
786+
wantPriority := calculateCost(1000, 500, 200, "gpt-5.4", "priority")
787+
wantDefault := calculateCost(1000, 500, 200, "gpt-5.4", "default")
788+
seenPriority := false
789+
seenDefault := false
790+
for _, log := range logs {
791+
if log.ServiceTier != "fast" {
792+
t.Fatalf("log tier = %q, want fast", log.ServiceTier)
793+
}
794+
switch log.AccountBilled {
795+
case wantPriority:
796+
seenPriority = true
797+
case wantDefault:
798+
seenDefault = true
799+
default:
800+
t.Fatalf("unexpected billed amount %.12f, want %.12f or %.12f", log.AccountBilled, wantPriority, wantDefault)
801+
}
802+
}
803+
if !seenPriority || !seenDefault {
804+
t.Fatalf("billing tiers seen priority=%v default=%v, want both", seenPriority, seenDefault)
805+
}
806+
}
807+
740808
func TestUsageLogsReturnErrorMessage(t *testing.T) {
741809
dbPath := filepath.Join(t.TempDir(), "codex2api.db")
742810

0 commit comments

Comments
 (0)