Skip to content
Open
3 changes: 2 additions & 1 deletion _packages/native-preview/src/api/async/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
readSourceFileHash,
RemoteSourceFile,
} from "../node/node.ts";
import { Wtf8Decoder } from "../node/wtf8.ts";
import { ObjectRegistry } from "../objectRegistry.ts";
import type {
APIOptions,
Expand Down Expand Up @@ -315,7 +316,7 @@ export class Program {
private client: Client;
private sourceFileCache: SourceFileCache;
private toPath: (fileName: string) => Path;
private decoder = new TextDecoder();
private decoder = new Wtf8Decoder();

constructor(
snapshotId: string,
Expand Down
4 changes: 3 additions & 1 deletion _packages/native-preview/src/api/node/msgpack.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Minimal msgpack encoder/decoder.
// Supports: arrays, unsigned integers, strings, booleans, binary data.

import { Wtf8Decoder } from "./wtf8.ts";

// ── MessagePack format constants ────────────────────────────────────
export const MSGPACK_FIXARRAY3 = 0x93; // 3-element fixarray
export const MSGPACK_BIN8 = 0xc4;
Expand Down Expand Up @@ -39,7 +41,7 @@ export function writeBinHeader(buf: Uint8Array, off: number, len: number): numbe
}

const encoder = new TextEncoder();
const decoder = new TextDecoder();
const decoder = new Wtf8Decoder();

export class MsgpackWriter {
private buf: Uint8Array;
Expand Down
3 changes: 2 additions & 1 deletion _packages/native-preview/src/api/node/node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
HEADER_OFFSET_STRUCTURED_DATA,
NODE_LEN,
} from "./protocol.ts";
import { Wtf8Decoder } from "./wtf8.ts";

// Re-export everything consumers need from the other two files.
export { RemoteNode, RemoteNodeList } from "./node.generated.ts";
Expand Down Expand Up @@ -242,7 +243,7 @@ export function parseNodeHandle(handle: string): ParsedNodeHandle {
* (e.g. from typeToTypeNode) that don't have a source file.
*/
export function decodeNode(data: Uint8Array): Node {
const sf = new RemoteSourceFile(data, new TextDecoder());
const sf = new RemoteSourceFile(data, new Wtf8Decoder());
return sf as unknown as Node;
}

Expand Down
2 changes: 1 addition & 1 deletion _packages/native-preview/src/api/node/protocol.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export const PROTOCOL_VERSION = 5;
export const PROTOCOL_VERSION = 6;

export const HEADER_OFFSET_METADATA = 0;
export const HEADER_OFFSET_HASH_LO0 = 4;
Expand Down
62 changes: 62 additions & 0 deletions _packages/native-preview/src/api/node/wtf8.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
const surrogateLeadByte = 0xED;
const surrogateSecondByteMin = 0xA0;
const surrogateSecondByteMax = 0xBF;
const continuationByteMin = 0x80;
const continuationByteMax = 0xBF;
type DecodeOptions = Parameters<TextDecoder["decode"]>[1];

function isWtf8Surrogate(bytes: Uint8Array, index: number): boolean {
return index + 2 < bytes.length
&& bytes[index] === surrogateLeadByte
&& bytes[index + 1] >= surrogateSecondByteMin
&& bytes[index + 1] <= surrogateSecondByteMax
&& bytes[index + 2] >= continuationByteMin
&& bytes[index + 2] <= continuationByteMax;
}

function getSurrogateCodeUnit(bytes: Uint8Array, index: number): number {
return 0xD000 | ((bytes[index + 1] & 0x3F) << 6) | (bytes[index + 2] & 0x3F);
}

function toUint8Array(input: NodeJS.AllowSharedBufferSource): Uint8Array {
if (input instanceof Uint8Array) {
return input;
}
if (ArrayBuffer.isView(input)) {
return new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
}
return new Uint8Array(input);
}

export class Wtf8Decoder extends TextDecoder {
override decode(input?: NodeJS.AllowSharedBufferSource, options?: DecodeOptions): string {
if (input === undefined) {
return super.decode(input, options);
}

const bytes = toUint8Array(input);
const parts: string[] = [];
let segmentStart = 0;

for (let i = 0; i < bytes.length; i++) {
if (!isWtf8Surrogate(bytes, i)) {
continue;
}

if (segmentStart < i) {
parts.push(super.decode(bytes.subarray(segmentStart, i), options));
}
parts.push(String.fromCharCode(getSurrogateCodeUnit(bytes, i)));
i += 2;
segmentStart = i + 1;
}

if (segmentStart === 0) {
return super.decode(bytes, options);
}
if (segmentStart < bytes.length) {
parts.push(super.decode(bytes.subarray(segmentStart), options));
}
return parts.join("");
}
}
3 changes: 2 additions & 1 deletion _packages/native-preview/src/api/sync/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import {
readSourceFileHash,
RemoteSourceFile,
} from "../node/node.ts";
import { Wtf8Decoder } from "../node/wtf8.ts";
import { ObjectRegistry } from "../objectRegistry.ts";
import type {
APIOptions,
Expand Down Expand Up @@ -323,7 +324,7 @@ export class Program {
private client: Client;
private sourceFileCache: SourceFileCache;
private toPath: (fileName: string) => Path;
private decoder = new TextDecoder();
private decoder = new Wtf8Decoder();

constructor(
snapshotId: string,
Expand Down
42 changes: 40 additions & 2 deletions _packages/native-preview/test/async/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,18 +206,24 @@ test("unicode escapes", async () => {
"/tsconfig.json": "{}",
"/src/1.ts": `"😃"`,
"/src/2.ts": `"\\ud83d\\ude03"`,
"/src/3.ts": `"\\ud800a\\udc00"`,
});
try {
const snapshot = await api.updateSnapshot({ openProject: "/tsconfig.json" });
const project = snapshot.getProject("/tsconfig.json")!;
const expectedTexts = new Map([
["/src/1.ts", "😃"],
["/src/2.ts", "😃"],
["/src/3.ts", "\ud800a\udc00"],
]);

for (const file of ["/src/1.ts", "/src/2.ts"]) {
for (const file of expectedTexts.keys()) {
const sourceFile = await project.program.getSourceFile(file);
assert.ok(sourceFile);

sourceFile.forEachChild(function visit(node) {
if (isStringLiteral(node)) {
assert.equal(node.text, "😃");
assert.equal(node.text, expectedTexts.get(file));
}
node.forEachChild(visit);
});
Expand All @@ -228,6 +234,38 @@ test("unicode escapes", async () => {
}
});

test("template unicode escapes", async () => {
const api = spawnAPI({
"/tsconfig.json": "{}",
"/src/index.ts": "`\\ud800${0}\\udc00`",
});
try {
const snapshot = await api.updateSnapshot({ openProject: "/tsconfig.json" });
const project = snapshot.getProject("/tsconfig.json")!;
const sourceFile = await project.program.getSourceFile("/src/index.ts");
assert.ok(sourceFile);

let sawHead = false;
let sawTail = false;
sourceFile.forEachChild(function visit(node) {
if (isTemplateHead(node)) {
assert.equal(node.text, "\ud800");
sawHead = true;
}
else if (isTemplateTail(node)) {
assert.equal(node.text, "\udc00");
sawTail = true;
}
node.forEachChild(visit);
});
assert.ok(sawHead);
assert.ok(sawTail);
}
finally {
await api.close();
}
});

test("Object equality", async () => {
const api = spawnAPI();
try {
Expand Down
6 changes: 3 additions & 3 deletions _packages/native-preview/test/encoder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ describe("Encoder", () => {
// Verify header
const view = new DataView(encoded.buffer, encoded.byteOffset, encoded.byteLength);
const metadata = view.getUint32(0, true);
assert.strictEqual(metadata >>> 24, 5, "protocol version should be 5");
assert.strictEqual(metadata >>> 24, 6, "protocol version should be 6");

// Verify we can decode it
const decoded = decode(encoded);
Expand Down Expand Up @@ -169,11 +169,11 @@ describe("Encoder", () => {
assert.strictEqual(rootKind, SyntaxKind.IfStatement);
});

test("protocol version is 5", () => {
test("protocol version is 6", () => {
const sf = makeSF("", "/test.ts", []);
const encoded = encodeSourceFile(sf);
const view = new DataView(encoded.buffer, encoded.byteOffset, encoded.byteLength);
assert.strictEqual(view.getUint32(0, true) >>> 24, 5);
assert.strictEqual(view.getUint32(0, true) >>> 24, 6);
});

test("boolean properties are encoded", () => {
Expand Down
42 changes: 40 additions & 2 deletions _packages/native-preview/test/sync/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,18 +214,24 @@ test("unicode escapes", () => {
"/tsconfig.json": "{}",
"/src/1.ts": `"😃"`,
"/src/2.ts": `"\\ud83d\\ude03"`,
"/src/3.ts": `"\\ud800a\\udc00"`,
});
try {
const snapshot = api.updateSnapshot({ openProject: "/tsconfig.json" });
const project = snapshot.getProject("/tsconfig.json")!;
const expectedTexts = new Map([
["/src/1.ts", "😃"],
["/src/2.ts", "😃"],
["/src/3.ts", "\ud800a\udc00"],
]);

for (const file of ["/src/1.ts", "/src/2.ts"]) {
for (const file of expectedTexts.keys()) {
const sourceFile = project.program.getSourceFile(file);
assert.ok(sourceFile);

sourceFile.forEachChild(function visit(node) {
if (isStringLiteral(node)) {
assert.equal(node.text, "😃");
assert.equal(node.text, expectedTexts.get(file));
}
node.forEachChild(visit);
});
Expand All @@ -236,6 +242,38 @@ test("unicode escapes", () => {
}
});

test("template unicode escapes", () => {
const api = spawnAPI({
"/tsconfig.json": "{}",
"/src/index.ts": "`\\ud800${0}\\udc00`",
});
try {
const snapshot = api.updateSnapshot({ openProject: "/tsconfig.json" });
const project = snapshot.getProject("/tsconfig.json")!;
const sourceFile = project.program.getSourceFile("/src/index.ts");
assert.ok(sourceFile);

let sawHead = false;
let sawTail = false;
sourceFile.forEachChild(function visit(node) {
if (isTemplateHead(node)) {
assert.equal(node.text, "\ud800");
sawHead = true;
}
else if (isTemplateTail(node)) {
assert.equal(node.text, "\udc00");
sawTail = true;
}
node.forEachChild(visit);
});
assert.ok(sawHead);
assert.ok(sawTail);
}
finally {
api.close();
}
});

test("Object equality", () => {
const api = spawnAPI();
try {
Expand Down
44 changes: 44 additions & 0 deletions _packages/native-preview/test/wtf8.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import assert from "node:assert";
import {
describe,
test,
} from "node:test";
import { Wtf8Decoder } from "../src/api/node/wtf8.ts";

describe("Wtf8Decoder", () => {
test("decodes standard UTF-8", () => {
const decoder = new Wtf8Decoder();
assert.strictEqual(decoder.decode(new TextEncoder().encode("hello 🦀")), "hello 🦀");
});

test("preserves WTF-8 encoded lone surrogates", () => {
const decoder = new Wtf8Decoder();
const text = decoder.decode(Uint8Array.of(
0xF0,
0x9F,
0xA6,
0x80,
0xED,
0x9F,
0xBF,
0xED,
0xA0,
0x80,
0xED,
0xA0,
0x81,
0xED,
0xB0,
0x80,
0xF0,
0x9F,
0xA6,
0x80,
));

assert.deepStrictEqual(
Array.from({ length: text.length }, (_, i) => text.charCodeAt(i)),
[0xD83E, 0xDD80, 0xD7FF, 0xD800, 0xD801, 0xDC00, 0xD83E, 0xDD80],
);
});
});
Loading