Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion apps/web/src/lib/ai-gateway/handleRequestLogging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { createHash } from 'crypto';
import { redisSet } from '@/lib/redis';
import { requestLogRedisKey } from '@/lib/redis-keys';
import { detectToolCallArgumentErrors } from '@/lib/ai-gateway/api-request-log-errors';
import { stripNulBytesInPlace } from '@/lib/ai-gateway/strip-nul-bytes';

const users = [
'992891e9fe987b8960a05ed0bc9cc456979d1d71410d467f212e6233dbc0a523', // christiaan
Expand Down Expand Up @@ -54,7 +55,7 @@ export async function handleRequestLogging(params: {
status_code: clonedResponse.status,
model,
provider,
request: request.body,
request: sanitizedRequestBody(request),
response,
error,
})
Expand Down Expand Up @@ -92,3 +93,19 @@ export async function handleRequestLogging(params: {
}
});
}

function sanitizedRequestBody(request: GatewayRequest): GatewayRequest['body'] {
const dirtyFields: string[] = [];
stripNulBytesInPlace(request.body, dirtyFields);
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Nested request fields are still unsanitized

stripNulBytesInPlace only touches the top-level properties on request.body, but gateway payloads keep the actual prompt text inside nested structures like messages[].content, input[].content, and tool payloads. A NUL byte in one of those strings will still make the api_request_log.request jsonb insert fail, so this change does not fully cover the bug the PR is trying to fix.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in f56d7e5 by making stripNulBytesInPlace recurse through arrays and plain objects, so nested prompt/tool strings are sanitized before inserting api_request_log.request.

if (dirtyFields.length > 0) {
logExceptInTest(
'api_request_log request string field contained NUL bytes; sanitized before insert',
{
source: 'handleRequestLogging',
fields: dirtyFields,
kind: request.kind,
}
);
}
return request.body;
}
2 changes: 1 addition & 1 deletion apps/web/src/lib/ai-gateway/processUsage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import {
mapToUsageStats,
logMicrodollarUsage,
processOpenRouterUsage,
stripNulBytesInPlace,
toInsertableDbUsageRecord,
} from './processUsage';
import { stripNulBytesInPlace } from '@/lib/ai-gateway/strip-nul-bytes';
import type { OpenRouterGeneration } from '@/lib/ai-gateway/providers/openrouter/types';
import { verifyApproval } from '../../tests/helpers/approval.helper';
import { insertTestUser } from '../../tests/helpers/user.helper';
Expand Down
31 changes: 1 addition & 30 deletions apps/web/src/lib/ai-gateway/processUsage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import { computeOpenRouterCostFields, drainSseStream } from '@/lib/ai-gateway/pr
import { isAnthropicModel } from '@/lib/ai-gateway/providers/anthropic';
import { isMinimaxModel } from '@/lib/ai-gateway/providers/minimax';
import type { KiloExclusiveModel } from '@/lib/ai-gateway/providers/kilo-exclusive-model';
import { stripNulBytesInPlace } from '@/lib/ai-gateway/strip-nul-bytes';

const posthogClient = PostHogClient();

Expand Down Expand Up @@ -128,36 +129,6 @@ export function extractUsageContextInfo(usageContext: MicrodollarUsageContext) {
};
}

/**
* Strip NUL bytes (\u0000) in place from every string-typed field on `obj`.
*
* Postgres `text` columns reject NUL bytes with `22021 invalid byte sequence
* for encoding "UTF8": 0x00`, which crashes the `microdollar_usage` CTE insert
* and leaves the request un-billed (see Sentry KILOCODE-WEB-1G3Z).
*
* NULs have been observed in client-populated fields on the LLM gateway hot
* path: HTTP headers from the VS Code extension (machine_id, session_id,
* http_user_agent) and prompt-derived fields (system_prompt_prefix,
* user_prompt_prefix). Sanitizing at the DB boundary is a safety net; once
* the upstream source is identified via the `console.warn` in
* `toInsertableDbUsageRecord` (queryable in Axiom), sanitize at the source
* and remove this.
*
* Any sanitized field names are appended to `dirtyFields` so the caller can
* log them for source attribution.
*/
export function stripNulBytesInPlace(obj: Record<string, unknown>, dirtyFields: string[]): void {
for (const key of Object.keys(obj)) {
const value = obj[key];
if (typeof value === 'string' && value.indexOf('\u0000') >= 0) {
// Using split/join rather than a regex avoids the no-control-regex
// lint rule; the NUL byte is the intended match here.
obj[key] = value.split('\u0000').join('');
dirtyFields.push(key);
}
}
}

export function toInsertableDbUsageRecord(
usageStats: MicrodollarUsageStats,
usageContextInfo: UsageContextInfo
Expand Down
42 changes: 42 additions & 0 deletions apps/web/src/lib/ai-gateway/strip-nul-bytes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/**
* Strip NUL bytes (\u0000) in place from every string-typed field on `value`.
*
* Postgres `text` columns reject NUL bytes with `22021 invalid byte sequence
* for encoding "UTF8": 0x00`, which crashes inserts into affected tables.
*
* Any sanitized field paths are appended to `dirtyFields` so the caller can
* log them for source attribution.
*/
export function stripNulBytesInPlace(value: object, dirtyFields: string[]): void {
stripNulBytesFromValue(value, dirtyFields, '');
}

function stripNulBytesFromValue(value: unknown, dirtyFields: string[], path: string): void {
if (Array.isArray(value)) {
value.forEach((item, index) => {
stripNulBytesFromValue(item, dirtyFields, `${path}[${index}]`);
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Arrays of strings still bypass sanitization

The new recursion fixes nested objects, but this branch immediately recurses into each array element without handling the case where the element itself is a string. Values like transforms: ['bad\u0000value'], models: ['bad\u0000value'], or any other string array in the request body will return early at isPlainObject and keep the NUL byte, so api_request_log.request inserts can still fail for those payloads.

});
return;
}

if (!isPlainObject(value)) {
return;
}

for (const [key, item] of Object.entries(value)) {
const itemPath = path ? `${path}.${key}` : key;
if (typeof item === 'string' && item.indexOf('\u0000') >= 0) {
value[key] = item.split('\u0000').join('');

Check failure

Code scanning / CodeQL

Remote property injection High

A property name to write to depends on a
user-provided value
.
dirtyFields.push(itemPath);
continue;
}

stripNulBytesFromValue(item, dirtyFields, itemPath);
}
}

function isPlainObject(value: unknown): value is Record<string, unknown> {
return (
typeof value === 'object' && value !== null && Object.getPrototypeOf(value) === Object.prototype
);
}
Loading