Skip to content

Commit ea7f3a2

Browse files
authored
Merge pull request #55 from kaitranntt/kai/fix/usage-detail-sanitization
fix: sanitize usage detail identifiers
2 parents 81f7e22 + 5743e5d commit ea7f3a2

3 files changed

Lines changed: 214 additions & 8 deletions

File tree

internal/usage/logger_plugin.go

Lines changed: 123 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record)
214214
s.updateAPIStats(stats, modelName, RequestDetail{
215215
Timestamp: timestamp,
216216
LatencyMs: normaliseLatency(record.Latency),
217-
Source: record.Source,
218-
AuthIndex: record.AuthIndex,
217+
Source: sanitizeUsageDetailSource(record.Source),
218+
AuthIndex: sanitizeUsageDetailAuthIndex(record.AuthIndex),
219219
Tokens: detail,
220220
Failed: failed,
221221
})
@@ -308,6 +308,7 @@ func (s *RequestStatistics) Snapshot() StatisticsSnapshot {
308308

309309
result.APIs = make(map[string]APISnapshot, len(s.apis))
310310
for apiName, stats := range s.apis {
311+
apiName = sanitizeUsageAPIIdentifier(apiName)
311312
apiSnapshot := APISnapshot{
312313
TotalRequests: stats.TotalRequests,
313314
TotalTokens: stats.TotalTokens,
@@ -317,6 +318,9 @@ func (s *RequestStatistics) Snapshot() StatisticsSnapshot {
317318
for modelName, modelStatsValue := range stats.Models {
318319
requestDetails := make([]RequestDetail, len(modelStatsValue.Details))
319320
copy(requestDetails, modelStatsValue.Details)
321+
for i := range requestDetails {
322+
requestDetails[i] = sanitizeRequestDetail(requestDetails[i])
323+
}
320324
apiSnapshot.Models[modelName] = ModelSnapshot{
321325
TotalRequests: modelStatsValue.TotalRequests,
322326
TotalTokens: modelStatsValue.TotalTokens,
@@ -386,6 +390,7 @@ func (s *RequestStatistics) RestoreSnapshot(snapshot StatisticsSnapshot) MergeRe
386390

387391
func (s *RequestStatistics) loadSnapshot(snapshot StatisticsSnapshot) {
388392
for apiName, apiSnapshot := range snapshot.APIs {
393+
apiName = sanitizeUsageAPIIdentifier(apiName)
389394
stats := s.apiStatsForKey(apiName)
390395
var apiRequests int64
391396
var apiTokens int64
@@ -449,6 +454,7 @@ func normaliseRequestDetails(details []RequestDetail) ([]RequestDetail, detailAg
449454
if detail.Timestamp.IsZero() {
450455
detail.Timestamp = now
451456
}
457+
detail = sanitizeRequestDetail(detail)
452458
out = append(out, detail)
453459
totals.requests++
454460
totals.tokens += detail.Tokens.TotalTokens
@@ -557,14 +563,127 @@ func normaliseUsageIdentifier(value string) string {
557563
return trimRunes(value, maxUsageIdentifierRunes)
558564
}
559565

566+
func sanitizeRequestDetail(detail RequestDetail) RequestDetail {
567+
detail.Source = sanitizeUsageDetailSource(detail.Source)
568+
detail.AuthIndex = sanitizeUsageDetailAuthIndex(detail.AuthIndex)
569+
return detail
570+
}
571+
572+
func sanitizeUsageAPIIdentifier(value string) string {
573+
raw := strings.TrimSpace(value)
574+
normalized := normaliseUsageIdentifier(raw)
575+
if isSafeUsageIdentifier(normalized) {
576+
return normalized
577+
}
578+
return "api-key:" + shortUsageHash(raw)
579+
}
580+
581+
func sanitizeUsageDetailSource(value string) string {
582+
raw := strings.TrimSpace(value)
583+
normalized := normaliseUsageIdentifier(raw)
584+
if normalized == unknownUsageBucket || isSafeUsageIdentifier(normalized) {
585+
return normalized
586+
}
587+
return "source:" + shortUsageHash(raw)
588+
}
589+
590+
func sanitizeUsageDetailAuthIndex(value string) string {
591+
value = strings.TrimSpace(value)
592+
if value == "" {
593+
return ""
594+
}
595+
normalized := trimRunes(value, maxUsageIdentifierRunes)
596+
lower := strings.ToLower(normalized)
597+
if strings.HasPrefix(lower, "auth:") {
598+
hash := strings.TrimPrefix(lower, "auth:")
599+
if isHexIdentifier(hash, 12) || isHexIdentifier(hash, 16) || isHexIdentifier(hash, 64) {
600+
return normalized
601+
}
602+
return "auth:" + shortUsageHash(value)
603+
}
604+
if isHexIdentifier(lower, 16) || isHexIdentifier(lower, 64) {
605+
return normalized
606+
}
607+
return "auth:" + shortUsageHash(value)
608+
}
609+
610+
func isSafeUsageIdentifier(value string) bool {
611+
value = strings.TrimSpace(value)
612+
if value == "" {
613+
return true
614+
}
615+
lower := strings.ToLower(value)
616+
if lower == unknownUsageBucket || lower == overflowUsageBucket {
617+
return true
618+
}
619+
if strings.HasPrefix(lower, "api-key:") {
620+
hash := strings.TrimPrefix(lower, "api-key:")
621+
return isHexIdentifier(hash, 8) || isHexIdentifier(hash, 12) || isHexIdentifier(hash, 64)
622+
}
623+
if strings.HasPrefix(lower, "source:") {
624+
hash := strings.TrimPrefix(lower, "source:")
625+
return isHexIdentifier(hash, 12) || isHexIdentifier(hash, 64)
626+
}
627+
if isHTTPRoute(value) || isSafePathIdentifier(value) {
628+
return true
629+
}
630+
switch lower {
631+
case "gemini", "gemini-cli", "aistudio", "vertex", "claude", "codex", "openai",
632+
"openai-compatibility", "openai-compatible", "antigravity", "github-copilot",
633+
"gitlab", "cursor", "kiro", "kilo", "kimi", "iflow", "codebuddy", "local":
634+
return true
635+
default:
636+
return false
637+
}
638+
}
639+
640+
func isSafePathIdentifier(value string) bool {
641+
if !strings.HasPrefix(value, "/") {
642+
return false
643+
}
644+
lower := strings.ToLower(value)
645+
return !strings.Contains(value, "?") &&
646+
!strings.Contains(lower, "key") &&
647+
!strings.Contains(lower, "token") &&
648+
!strings.Contains(lower, "auth")
649+
}
650+
651+
func isHTTPRoute(value string) bool {
652+
parts := strings.Fields(value)
653+
if len(parts) != 2 || !strings.HasPrefix(parts[1], "/") {
654+
return false
655+
}
656+
switch strings.ToUpper(parts[0]) {
657+
case "GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS":
658+
return true
659+
default:
660+
return false
661+
}
662+
}
663+
664+
func isHexIdentifier(value string, length int) bool {
665+
if len(value) != length {
666+
return false
667+
}
668+
for _, r := range value {
669+
if (r < '0' || r > '9') && (r < 'a' || r > 'f') {
670+
return false
671+
}
672+
}
673+
return true
674+
}
675+
560676
func secretUsageBucket(value string) string {
561677
value = strings.TrimSpace(value)
562678
if value == "" {
563679
return ""
564680
}
681+
return fmt.Sprintf("api-key:%s", shortUsageHash(value)[:8])
682+
}
683+
684+
func shortUsageHash(value string) string {
565685
sum := sha256.Sum256([]byte(value))
566-
hash := hex.EncodeToString(sum[:])[:8]
567-
return fmt.Sprintf("api-key:%s", hash)
686+
return hex.EncodeToString(sum[:])[:12]
568687
}
569688

570689
func trimRunes(value string, maxRunes int) string {

internal/usage/logger_plugin_test.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,77 @@ func TestRequestStatisticsRecordIncludesLatency(t *testing.T) {
4848
}
4949
}
5050

51+
func TestRequestStatisticsSanitizesDetailIdentifiers(t *testing.T) {
52+
prevStatsEnabled := StatisticsEnabled()
53+
SetStatisticsEnabled(true)
54+
t.Cleanup(func() {
55+
SetStatisticsEnabled(prevStatsEnabled)
56+
})
57+
58+
const rawAPIKey = "sk-test-client-secret"
59+
const rawSource = "person@example.com"
60+
const rawAuthIndex = "raw-auth-index-secret"
61+
62+
stats := NewRequestStatistics()
63+
stats.Record(context.Background(), coreusage.Record{
64+
APIKey: rawAPIKey,
65+
Model: "gpt-5.4",
66+
Source: rawSource,
67+
AuthIndex: rawAuthIndex,
68+
RequestedAt: time.Date(2026, 3, 20, 12, 0, 0, 0, time.UTC),
69+
})
70+
71+
snapshot := stats.Snapshot()
72+
apiName := onlyAPIName(t, snapshot)
73+
if strings.Contains(apiName, rawAPIKey) {
74+
t.Fatalf("api bucket leaked raw key: %q", apiName)
75+
}
76+
details := onlyAPISnapshot(t, snapshot).Models["gpt-5.4"].Details
77+
if len(details) != 1 {
78+
t.Fatalf("details len = %d, want 1", len(details))
79+
}
80+
assertNoRawUsageIdentifier(t, details[0].Source, rawSource)
81+
assertNoRawUsageIdentifier(t, details[0].AuthIndex, rawAuthIndex)
82+
}
83+
84+
func TestRequestStatisticsRestoreSanitizesImportedDetails(t *testing.T) {
85+
prevStatsEnabled := StatisticsEnabled()
86+
SetStatisticsEnabled(true)
87+
t.Cleanup(func() {
88+
SetStatisticsEnabled(prevStatsEnabled)
89+
})
90+
91+
const rawSource = "source:imported-person@example.com"
92+
const rawAuthIndex = "auth:imported-auth-secret"
93+
const rawAPIKey = "api-key:sk-imported-api-secret"
94+
95+
stats := NewRequestStatistics()
96+
stats.RestoreSnapshot(StatisticsSnapshot{
97+
APIs: map[string]APISnapshot{
98+
rawAPIKey: {
99+
Models: map[string]ModelSnapshot{
100+
"gpt-5.4": {
101+
Details: []RequestDetail{{
102+
Timestamp: time.Date(2026, 3, 20, 12, 0, 0, 0, time.UTC),
103+
Source: rawSource,
104+
AuthIndex: rawAuthIndex,
105+
}},
106+
},
107+
},
108+
},
109+
},
110+
})
111+
112+
snapshot := stats.Snapshot()
113+
assertNoRawUsageIdentifier(t, onlyAPIName(t, snapshot), rawAPIKey)
114+
details := onlyAPISnapshot(t, snapshot).Models["gpt-5.4"].Details
115+
if len(details) != 1 {
116+
t.Fatalf("details len = %d, want 1", len(details))
117+
}
118+
assertNoRawUsageIdentifier(t, details[0].Source, rawSource)
119+
assertNoRawUsageIdentifier(t, details[0].AuthIndex, rawAuthIndex)
120+
}
121+
51122
func TestRequestStatisticsUsesStableLoggingContext(t *testing.T) {
52123
prevStatsEnabled := StatisticsEnabled()
53124
SetStatisticsEnabled(true)
@@ -288,3 +359,16 @@ func requireSnapshotTotals(t *testing.T, got, want StatisticsSnapshot) {
288359
t.Fatalf("tokens by hour[12] = %d, want %d", got.TokensByHour["12"], want.TokensByHour["12"])
289360
}
290361
}
362+
363+
func assertNoRawUsageIdentifier(t *testing.T, got, raw string) {
364+
t.Helper()
365+
if got == "" {
366+
t.Fatalf("sanitized identifier is empty")
367+
}
368+
if got == raw || strings.Contains(got, raw) {
369+
t.Fatalf("identifier leaked raw value: got %q raw %q", got, raw)
370+
}
371+
if !strings.HasPrefix(got, "source:") && !strings.HasPrefix(got, "auth:") && !strings.HasPrefix(got, "api-key:") {
372+
t.Fatalf("identifier = %q, want stable redacted prefix", got)
373+
}
374+
}

test/usage_logging_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ func TestGeminiExecutorRecordsSuccessfulZeroUsageInStatistics(t *testing.T) {
4242
})
4343

4444
model, source := executeGeminiZeroUsage(t, "stats")
45-
detail := waitForStatisticsDetail(t, "gemini", model, source)
45+
detail := waitForStatisticsDetail(t, "gemini", model)
46+
if detail.Source == source {
47+
t.Fatalf("detail source leaked raw account identifier")
48+
}
4649
if detail.Failed {
4750
t.Fatalf("detail failed = true, want false")
4851
}
@@ -121,7 +124,7 @@ func waitForQueuedUsageModelTotalTokens(t *testing.T, wantProvider, wantModel st
121124
t.Fatalf("timed out waiting for queued usage payload for provider=%q model=%q", wantProvider, wantModel)
122125
}
123126

124-
func waitForStatisticsDetail(t *testing.T, apiName, model, source string) internalusage.RequestDetail {
127+
func waitForStatisticsDetail(t *testing.T, apiName, model string) internalusage.RequestDetail {
125128
t.Helper()
126129

127130
deadline := time.Now().Add(2 * time.Second)
@@ -138,14 +141,14 @@ func waitForStatisticsDetail(t *testing.T, apiName, model, source string) intern
138141
continue
139142
}
140143
for _, detail := range modelSnapshot.Details {
141-
if detail.Source == source {
144+
if detail.Source != "" {
142145
return detail
143146
}
144147
}
145148
time.Sleep(10 * time.Millisecond)
146149
}
147150

148-
t.Fatalf("timed out waiting for statistics detail for api=%q model=%q source=%q", apiName, model, source)
151+
t.Fatalf("timed out waiting for statistics detail for api=%q model=%q", apiName, model)
149152
return internalusage.RequestDetail{}
150153
}
151154

0 commit comments

Comments
 (0)