Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions packages/host/app/services/card-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { v4 as uuidv4 } from 'uuid';

import {
formattedError,
isJsonContentType,
SupportedMimeType,
type CardDocument,
type SingleCardDocument,
Expand Down Expand Up @@ -179,6 +180,19 @@ export default class CardService extends Service {
throw err;
}
if (response.status !== 204) {
// A relationship link can point at a non-card URL (e.g. an image);
// gate on Content-Type so the binary body never reaches JSON.parse.
let contentType = response.headers.get('content-type');
if (!isJsonContentType(contentType)) {
let err = new Error(
`Expected a card document from ${urlString} but the response content type was ${
contentType ?? 'unknown'
}. If this is a relationship link, it likely points at a non-card URL (e.g. an image) rather than a card.`,
) as any;
err.status = response.status;
err.responseHeaders = response.headers;
throw err;
}
return await response.json();
}
return;
Expand Down Expand Up @@ -208,6 +222,7 @@ export default class CardService extends Service {
return {
status: response.status,
content: await response.text(),
contentType: response.headers.get('content-type'),
};
}

Expand Down
23 changes: 22 additions & 1 deletion packages/host/app/services/store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import {
rri,
logger,
formattedError,
isJsonContentType,
SupportedMimeType,
RealmPaths,
type Store as StoreInterface,
Expand Down Expand Up @@ -1880,7 +1881,27 @@ export default class StoreService extends Service implements StoreInterface {
vn.toURL(`${url}.json`),
);
if (result.status === 200) {
json = JSON.parse(result.content);
// A relationship link can point at a non-card URL (e.g. an
// image); gate on Content-Type so the binary body never
// reaches JSON.parse.
if (!isJsonContentType(result.contentType)) {
throw new Error(
`Could not load ${url} as a card: the response (content type ${
result.contentType ?? 'unknown'
}) is not a card document. If this is a relationship link, it likely points at a non-card URL (e.g. an image) rather than a card.`,
);
}
try {
json = JSON.parse(result.content);
} catch {
// Content-Type claimed JSON but the body didn't parse
// (e.g. truncated source) — still surface a clean error.
throw new Error(
`Could not load ${url} as a card: its source (content type ${
result.contentType ?? 'unknown'
}) is not valid JSON.`,
);
}
Comment thread
jurgenwerk marked this conversation as resolved.
} else {
throw new Error(
`Received non-200 status fetching instance source ${url}.json: ${result.content}`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ module('Integration | ai-assistant-panel | codeblocks', function (hooks) {
if (url.toString() === 'https://example.com/component.gts') {
return {
status: 200,
contentType: 'application/vnd.card+source',
content: `import Component from '@glimmer/component';

export default class MyComponent extends Component {
Expand All @@ -107,6 +108,7 @@ export default class MyComponent extends Component {
}
return {
status: 404,
contentType: null,
content: '',
};
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ module('Integration | Component | FormattedAiBotMessage', function (hooks) {
cardService.getSource = async () => {
return Promise.resolve({
status: 200,
contentType: 'application/vnd.card+source',
content: 'let a = 1;\nlet b = 2;',
});
};
Expand Down
61 changes: 61 additions & 0 deletions packages/host/tests/unit/index-writer-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,67 @@ module('Unit | index-writer', function (hooks) {
);
});

test('strips jsonb-illegal code points from error_doc and diagnostics on write', async function (assert) {
await setupIndex(
adapter,
[{ realm_url: testRealmURL, current_version: 1 }],
[],
);
let batch = await indexWriter.createBatch(new URL(testRealmURL));
// A NUL and an unpaired surrogate in the error message + diagnostics:
// Postgres rejects both in jsonb, so without sanitization this write
// aborts the whole batch.
await batch.updateEntry(new URL(`${testRealmURL}1.json`), {
type: 'instance-error',
error: {
message: 'Unexpected token \u0000\uD800 JFIF is not valid JSON',
status: 500,
additionalErrors: [
{
message: 'nested \u0000 binary',
status: 500,
additionalErrors: [],
} as any,
],
},
diagnostics: { renderStage: 'load\u0000links' },
});
// Must not throw — the un-sanitized write rejected the upsert.
await batch.done();

let [row] = (await adapter.execute(
'SELECT * FROM boxel_index WHERE has_error = TRUE AND realm_version = 2',
{ coerceTypes },
)) as unknown as BoxelIndexTable[];
assert.ok(row?.error_doc, 'error row persisted despite binary in message');

let nul = String.fromCharCode(0);
let hasIllegalCodePoint = (s: string) =>
s.includes(nul) ||
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/.test(
s,
);
assert.notOk(
hasIllegalCodePoint(row.error_doc!.message),
'illegal code points stripped from the error message',
);
assert.true(
row.error_doc!.message.includes('JFIF'),
'the readable remainder of the message is preserved',
);
assert.notOk(
hasIllegalCodePoint(
(row.error_doc!.additionalErrors as any[])[0].message,
),
'illegal code points stripped from nested additionalErrors too',
);
assert.strictEqual(
(row.diagnostics as any).renderStage,
'load\uFFFDlinks',
'illegal code points stripped from diagnostics strings too',
);
});

test('error entry does not include last known good state when not available', async function (assert) {
await setupIndex(
adapter,
Expand Down
2 changes: 2 additions & 0 deletions packages/realm-server/tests/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ const ALL_TEST_FILES: string[] = [
'./prerender-deadlock-test',
'./runtime-exception-capture-test',
'./clamp-serialized-error-test',
'./sanitize-for-jsonb-test',
'./is-json-content-type-test',
'./prerender-diagnostics-persistence-test',
'./prerender-proxy-test',
'./queue-test',
Expand Down
34 changes: 34 additions & 0 deletions packages/realm-server/tests/is-json-content-type-test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { module, test } from 'qunit';
import { basename } from 'path';
import { isJsonContentType } from '@cardstack/runtime-common';

module(basename(__filename), function () {
test('accepts application/json and JSON-suffix media types', function (assert) {
assert.true(isJsonContentType('application/json'));
assert.true(isJsonContentType('text/json'));
assert.true(isJsonContentType('application/vnd.api+json'));
assert.true(isJsonContentType('application/vnd.card+json'));
assert.true(isJsonContentType('application/vnd.card.file-meta+json'));
});

test('ignores parameters and is case-insensitive', function (assert) {
assert.true(isJsonContentType('application/json; charset=utf-8'));
assert.true(isJsonContentType('APPLICATION/VND.API+JSON'));
assert.true(isJsonContentType(' application/json '));
});

test('rejects binary and other non-JSON media types', function (assert) {
assert.false(isJsonContentType('image/jpeg'));
assert.false(isJsonContentType('application/pdf'));
assert.false(isJsonContentType('application/octet-stream'));
assert.false(isJsonContentType('text/html'));
// `+source` is a structured-suffix type but it is not JSON.
assert.false(isJsonContentType('application/vnd.card+source'));
});

test('rejects a missing or empty content type', function (assert) {
assert.false(isJsonContentType(null));
assert.false(isJsonContentType(undefined));
assert.false(isJsonContentType(''));
});
});
73 changes: 73 additions & 0 deletions packages/realm-server/tests/sanitize-for-jsonb-test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { module, test } from 'qunit';
import { basename } from 'path';
import { sanitizeForJsonb } from '@cardstack/runtime-common';

// `sanitizeForJsonb` replaces the code points Postgres rejects in a
// `jsonb` value (NUL, unpaired UTF-16 surrogates) while leaving clean
// values untouched.

const NUL = String.fromCharCode(0x0000);
const LONE_HIGH_SURROGATE = String.fromCharCode(0xd800);
const LONE_LOW_SURROGATE = String.fromCharCode(0xdc00);
const REPLACEMENT = String.fromCharCode(0xfffd);

function hasIllegalCodePoint(value: string): boolean {
// eslint-disable-next-line no-control-regex -- matching the NUL control character is the whole point
return /\u0000|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/.test(
value,
);
}

module(basename(__filename), function () {
test('returns clean values unchanged', function (assert) {
assert.strictEqual(sanitizeForJsonb('plain text'), 'plain text');
assert.strictEqual(sanitizeForJsonb(42 as unknown), 42);
assert.true(sanitizeForJsonb(true as unknown));
assert.strictEqual(sanitizeForJsonb(null as unknown), null);
assert.strictEqual(sanitizeForJsonb(undefined as unknown), undefined);
});

test('preserves well-formed multi-byte text and surrogate pairs', function (assert) {
// A valid astral-plane code point (😀 = U+1F600) is a *paired* high+
// low surrogate in UTF-16 and must survive untouched.
let input = 'café — 😀 — naïve';
assert.strictEqual(sanitizeForJsonb(input), input);
assert.notOk(hasIllegalCodePoint(sanitizeForJsonb(input)));
});

test('replaces a NUL character', function (assert) {
let result = sanitizeForJsonb(`load${NUL}links`);
assert.strictEqual(result, `load${REPLACEMENT}links`);
assert.notOk(hasIllegalCodePoint(result));
});

test('replaces unpaired surrogate halves', function (assert) {
let highOnly = sanitizeForJsonb(`a${LONE_HIGH_SURROGATE}b`);
let lowOnly = sanitizeForJsonb(`a${LONE_LOW_SURROGATE}b`);
assert.strictEqual(highOnly, `a${REPLACEMENT}b`);
assert.strictEqual(lowOnly, `a${REPLACEMENT}b`);
assert.notOk(hasIllegalCodePoint(highOnly));
assert.notOk(hasIllegalCodePoint(lowOnly));
});

test('recurses through nested objects, arrays, and keys', function (assert) {
let result = sanitizeForJsonb({
message: `Unexpected token ${NUL}${LONE_HIGH_SURROGATE} JFIF`,
additionalErrors: [{ detail: `nested ${NUL} binary` }],
[`key${NUL}`]: 'value',
}) as Record<string, any>;

assert.notOk(
hasIllegalCodePoint(JSON.stringify(result)),
'whole tree clean',
);
assert.true(result.message.includes('JFIF'), 'readable remainder kept');
assert.notOk(hasIllegalCodePoint(result.message));
assert.notOk(hasIllegalCodePoint(result.additionalErrors[0].detail));
assert.strictEqual(
result[`key${REPLACEMENT}`],
'value',
'illegal code points in object keys are sanitized too',
);
});
});
26 changes: 26 additions & 0 deletions packages/runtime-common/error.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,32 @@ export interface SerializedError {
// The budget is set to 8 MiB: 32× under the jsonb limit, plenty of
// debug headroom for healthy errors, and small enough to leave room
// for the rest of the row (`pristine_doc` / `search_doc` etc.).
// Postgres rejects the NUL character (`22P05`) and unpaired UTF-16
// surrogate halves inside a `jsonb` value's text. A single such code
// point — e.g. raw binary folded into an error message — aborts the
// whole upsert batch, so `sanitizeForJsonb` replaces them before write.
/* eslint-disable no-control-regex -- matching the NUL control character is the whole point */
const JSONB_ILLEGAL_CODE_POINTS =
/\u0000|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g;
/* eslint-enable no-control-regex */

export function sanitizeForJsonb<T>(value: T): T {
if (typeof value === 'string') {
return value.replace(JSONB_ILLEGAL_CODE_POINTS, '\uFFFD') as unknown as T;
}
if (Array.isArray(value)) {
return value.map((item) => sanitizeForJsonb(item)) as unknown as T;
}
if (value !== null && typeof value === 'object') {
let result: Record<string, unknown> = {};
for (let [key, val] of Object.entries(value)) {
result[sanitizeForJsonb(key)] = sanitizeForJsonb(val);
}
return result as T;
}
Comment on lines +81 to +87
return value;
}

export const ERROR_DOC_MAX_BYTES = 8 * 1024 * 1024;
export const ERROR_DOC_MAX_ADDITIONAL_ERRORS = 200;
const ENTRY_STACK_BUDGET = 64 * 1024;
Expand Down
13 changes: 9 additions & 4 deletions packages/runtime-common/index-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ import {
dbExpression,
upsertMultipleRows,
} from './expression';
import { clampSerializedError, type SerializedError } from './error';
import {
clampSerializedError,
sanitizeForJsonb,
type SerializedError,
} from './error';
import type { DBAdapter } from './db';
import type { RealmMetaTable } from './index-structure';
import type { FileMetaResource } from './resource-types';
Expand Down Expand Up @@ -470,11 +474,12 @@ export class Batch {
// diagnostics via `formattedError`) keeps working unchanged —
// no schema rename needed. The column remains source of truth;
// the error-doc copy is derived.
let diagnostics: Diagnostics = {
// Sanitize so jsonb-illegal bytes can't abort the batch on write.
let diagnostics: Diagnostics = sanitizeForJsonb({
...(entry.diagnostics ?? {}),
invalidationId: this.#currentInvalidationId,
indexedAt: Date.now(),
};
});
let errorEntry = isErrorEntry(entry)
? {
...entry,
Expand Down Expand Up @@ -559,7 +564,7 @@ export class Batch {
baseTypeFromError(entry),
),
type: baseTypeFromError(entry),
error_doc: errorEntry?.error ?? entry.error,
error_doc: sanitizeForJsonb(errorEntry?.error ?? entry.error),
has_error: true,
diagnostics: diagnostics,
};
Expand Down
3 changes: 2 additions & 1 deletion packages/runtime-common/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ export {
isCardErrorJSONAPI,
clampSerializedError,
coerceErrorMessage,
sanitizeForJsonb,
ERROR_DOC_MAX_BYTES,
ERROR_DOC_MAX_ADDITIONAL_ERRORS,
} from './error';
Expand Down Expand Up @@ -763,7 +764,7 @@ export const isNode =
Object.prototype.toString.call((globalThis as any).process) ===
'[object process]';

export { SupportedMimeType } from './supported-mime-type';
export { SupportedMimeType, isJsonContentType } from './supported-mime-type';
export {
isUrlLike,
VirtualNetwork,
Expand Down
Loading
Loading