diff --git a/apps/web/src/lib/ai-gateway/handleRequestLogging.ts b/apps/web/src/lib/ai-gateway/handleRequestLogging.ts index c12babb435..e1ee28d883 100644 --- a/apps/web/src/lib/ai-gateway/handleRequestLogging.ts +++ b/apps/web/src/lib/ai-gateway/handleRequestLogging.ts @@ -8,6 +8,7 @@ import { createHash } from 'crypto'; import { redisSet } from '@/lib/redis'; import { requestLogRedisKey } from '@/lib/redis-keys'; import { detectToolCallArgumentErrors } from '@/lib/ai-gateway/api-request-log-errors'; +import { stripNulBytesInPlace } from '@/lib/ai-gateway/strip-nul-bytes'; const users = [ '992891e9fe987b8960a05ed0bc9cc456979d1d71410d467f212e6233dbc0a523', // christiaan @@ -54,7 +55,7 @@ export async function handleRequestLogging(params: { status_code: clonedResponse.status, model, provider, - request: request.body, + request: sanitizedRequestBody(request), response, error, }) @@ -92,3 +93,19 @@ export async function handleRequestLogging(params: { } }); } + +function sanitizedRequestBody(request: GatewayRequest): GatewayRequest['body'] { + const dirtyFields: string[] = []; + stripNulBytesInPlace(request.body, dirtyFields); + if (dirtyFields.length > 0) { + logExceptInTest( + 'api_request_log request string field contained NUL bytes; sanitized before insert', + { + source: 'handleRequestLogging', + fields: dirtyFields, + kind: request.kind, + } + ); + } + return request.body; +} diff --git a/apps/web/src/lib/ai-gateway/processUsage.test.ts b/apps/web/src/lib/ai-gateway/processUsage.test.ts index 9e91d8a83f..58db65a0fc 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.test.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.test.ts @@ -8,9 +8,9 @@ import { mapToUsageStats, logMicrodollarUsage, processOpenRouterUsage, - stripNulBytesInPlace, toInsertableDbUsageRecord, } from './processUsage'; +import { stripNulBytesInPlace } from '@/lib/ai-gateway/strip-nul-bytes'; import type { OpenRouterGeneration } from '@/lib/ai-gateway/providers/openrouter/types'; import { verifyApproval } from '../../tests/helpers/approval.helper'; import { insertTestUser } from '../../tests/helpers/user.helper'; diff --git a/apps/web/src/lib/ai-gateway/processUsage.ts b/apps/web/src/lib/ai-gateway/processUsage.ts index 68787c08ef..dbb9acd586 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.ts @@ -57,6 +57,7 @@ import { computeOpenRouterCostFields, drainSseStream } from '@/lib/ai-gateway/pr import { isAnthropicModel } from '@/lib/ai-gateway/providers/anthropic'; import { isMinimaxModel } from '@/lib/ai-gateway/providers/minimax'; import type { KiloExclusiveModel } from '@/lib/ai-gateway/providers/kilo-exclusive-model'; +import { stripNulBytesInPlace } from '@/lib/ai-gateway/strip-nul-bytes'; const posthogClient = PostHogClient(); @@ -128,36 +129,6 @@ export function extractUsageContextInfo(usageContext: MicrodollarUsageContext) { }; } -/** - * Strip NUL bytes (\u0000) in place from every string-typed field on `obj`. - * - * Postgres `text` columns reject NUL bytes with `22021 invalid byte sequence - * for encoding "UTF8": 0x00`, which crashes the `microdollar_usage` CTE insert - * and leaves the request un-billed (see Sentry KILOCODE-WEB-1G3Z). - * - * NULs have been observed in client-populated fields on the LLM gateway hot - * path: HTTP headers from the VS Code extension (machine_id, session_id, - * http_user_agent) and prompt-derived fields (system_prompt_prefix, - * user_prompt_prefix). Sanitizing at the DB boundary is a safety net; once - * the upstream source is identified via the `console.warn` in - * `toInsertableDbUsageRecord` (queryable in Axiom), sanitize at the source - * and remove this. - * - * Any sanitized field names are appended to `dirtyFields` so the caller can - * log them for source attribution. - */ -export function stripNulBytesInPlace(obj: Record, dirtyFields: string[]): void { - for (const key of Object.keys(obj)) { - const value = obj[key]; - if (typeof value === 'string' && value.indexOf('\u0000') >= 0) { - // Using split/join rather than a regex avoids the no-control-regex - // lint rule; the NUL byte is the intended match here. - obj[key] = value.split('\u0000').join(''); - dirtyFields.push(key); - } - } -} - export function toInsertableDbUsageRecord( usageStats: MicrodollarUsageStats, usageContextInfo: UsageContextInfo diff --git a/apps/web/src/lib/ai-gateway/strip-nul-bytes.ts b/apps/web/src/lib/ai-gateway/strip-nul-bytes.ts new file mode 100644 index 0000000000..467727a649 --- /dev/null +++ b/apps/web/src/lib/ai-gateway/strip-nul-bytes.ts @@ -0,0 +1,42 @@ +/** + * Strip NUL bytes (\u0000) in place from every string-typed field on `value`. + * + * Postgres `text` columns reject NUL bytes with `22021 invalid byte sequence + * for encoding "UTF8": 0x00`, which crashes inserts into affected tables. + * + * Any sanitized field paths are appended to `dirtyFields` so the caller can + * log them for source attribution. + */ +export function stripNulBytesInPlace(value: object, dirtyFields: string[]): void { + stripNulBytesFromValue(value, dirtyFields, ''); +} + +function stripNulBytesFromValue(value: unknown, dirtyFields: string[], path: string): void { + if (Array.isArray(value)) { + value.forEach((item, index) => { + stripNulBytesFromValue(item, dirtyFields, `${path}[${index}]`); + }); + return; + } + + if (!isPlainObject(value)) { + return; + } + + for (const [key, item] of Object.entries(value)) { + const itemPath = path ? `${path}.${key}` : key; + if (typeof item === 'string' && item.indexOf('\u0000') >= 0) { + value[key] = item.split('\u0000').join(''); + dirtyFields.push(itemPath); + continue; + } + + stripNulBytesFromValue(item, dirtyFields, itemPath); + } +} + +function isPlainObject(value: unknown): value is Record { + return ( + typeof value === 'object' && value !== null && Object.getPrototypeOf(value) === Object.prototype + ); +}