diff --git a/_packages/native-preview/src/api/async/api.ts b/_packages/native-preview/src/api/async/api.ts
index b6fc71a2c18..2d65c9da5d3 100644
--- a/_packages/native-preview/src/api/async/api.ts
+++ b/_packages/native-preview/src/api/async/api.ts
@@ -31,6 +31,7 @@ import {
     readSourceFileHash,
     RemoteSourceFile,
 } from "../node/node.ts";
+import { Wtf8Decoder } from "../node/wtf8.ts";
 import { ObjectRegistry } from "../objectRegistry.ts";
 import type {
     APIOptions,
@@ -315,7 +316,7 @@ export class Program {
     private client: Client;
     private sourceFileCache: SourceFileCache;
     private toPath: (fileName: string) => Path;
-    private decoder = new TextDecoder();
+    private decoder = new Wtf8Decoder();
 
     constructor(
         snapshotId: string,
diff --git a/_packages/native-preview/src/api/node/msgpack.ts b/_packages/native-preview/src/api/node/msgpack.ts
index 6e32df4283d..50248d27bda 100644
--- a/_packages/native-preview/src/api/node/msgpack.ts
+++ b/_packages/native-preview/src/api/node/msgpack.ts
@@ -1,6 +1,8 @@
 // Minimal msgpack encoder/decoder.
 // Supports: arrays, unsigned integers, strings, booleans, binary data.
 
+import { Wtf8Decoder } from "./wtf8.ts";
+
 // ── MessagePack format constants ────────────────────────────────────
 export const MSGPACK_FIXARRAY3 = 0x93; // 3-element fixarray
 export const MSGPACK_BIN8 = 0xc4;
@@ -39,7 +41,7 @@ export function writeBinHeader(buf: Uint8Array, off: number, len: number): numbe
 }
 
 const encoder = new TextEncoder();
-const decoder = new TextDecoder();
+const decoder = new Wtf8Decoder();
 
 export class MsgpackWriter {
     private buf: Uint8Array;
diff --git a/_packages/native-preview/src/api/node/node.ts b/_packages/native-preview/src/api/node/node.ts
index 8db099184a3..82919e55699 100644
--- a/_packages/native-preview/src/api/node/node.ts
+++ b/_packages/native-preview/src/api/node/node.ts
@@ -23,6 +23,7 @@ import {
     HEADER_OFFSET_STRUCTURED_DATA,
     NODE_LEN,
 } from "./protocol.ts";
+import { Wtf8Decoder } from "./wtf8.ts";
 
 // Re-export everything consumers need from the other two files.
 export { RemoteNode, RemoteNodeList } from "./node.generated.ts";
@@ -242,7 +243,7 @@ export function parseNodeHandle(handle: string): ParsedNodeHandle {
  * (e.g. from typeToTypeNode) that don't have a source file.
  */
 export function decodeNode(data: Uint8Array): Node {
-    const sf = new RemoteSourceFile(data, new TextDecoder());
+    const sf = new RemoteSourceFile(data, new Wtf8Decoder());
     return sf as unknown as Node;
 }
 
diff --git a/_packages/native-preview/src/api/node/protocol.ts b/_packages/native-preview/src/api/node/protocol.ts
index b21098cc2d4..dc28e0fa444 100644
--- a/_packages/native-preview/src/api/node/protocol.ts
+++ b/_packages/native-preview/src/api/node/protocol.ts
@@ -1,4 +1,4 @@
-export const PROTOCOL_VERSION = 5;
+export const PROTOCOL_VERSION = 6;
 
 export const HEADER_OFFSET_METADATA = 0;
 export const HEADER_OFFSET_HASH_LO0 = 4;
diff --git a/_packages/native-preview/src/api/node/wtf8.ts b/_packages/native-preview/src/api/node/wtf8.ts
new file mode 100644
index 00000000000..83b409664f9
--- /dev/null
+++ b/_packages/native-preview/src/api/node/wtf8.ts
@@ -0,0 +1,62 @@
+const surrogateLeadByte = 0xED;
+const surrogateSecondByteMin = 0xA0;
+const surrogateSecondByteMax = 0xBF;
+const continuationByteMin = 0x80;
+const continuationByteMax = 0xBF;
+type DecodeOptions = Parameters<TextDecoder["decode"]>[1];
+
+function isWtf8Surrogate(bytes: Uint8Array, index: number): boolean {
+    return index + 2 < bytes.length
+        && bytes[index] === surrogateLeadByte
+        && bytes[index + 1] >= surrogateSecondByteMin
+        && bytes[index + 1] <= surrogateSecondByteMax
+        && bytes[index + 2] >= continuationByteMin
+        && bytes[index + 2] <= continuationByteMax;
+}
+
+function getSurrogateCodeUnit(bytes: Uint8Array, index: number): number {
+    return 0xD000 | ((bytes[index + 1] & 0x3F) << 6) | (bytes[index + 2] & 0x3F);
+}
+
+function toUint8Array(input: NodeJS.AllowSharedBufferSource): Uint8Array {
+    if (input instanceof Uint8Array) {
+        return input;
+    }
+    if (ArrayBuffer.isView(input)) {
+        return new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
+    }
+    return new Uint8Array(input);
+}
+
+export class Wtf8Decoder extends TextDecoder {
+    override decode(input?: NodeJS.AllowSharedBufferSource, options?: DecodeOptions): string {
+        if (input === undefined) {
+            return super.decode(input, options);
+        }
+
+        const bytes = toUint8Array(input);
+        const parts: string[] = [];
+        let segmentStart = 0;
+
+        for (let i = 0; i < bytes.length; i++) {
+            if (!isWtf8Surrogate(bytes, i)) {
+                continue;
+            }
+
+            if (segmentStart < i) {
+                parts.push(super.decode(bytes.subarray(segmentStart, i), options));
+            }
+            parts.push(String.fromCharCode(getSurrogateCodeUnit(bytes, i)));
+            i += 2;
+            segmentStart = i + 1;
+        }
+
+        if (segmentStart === 0) {
+            return super.decode(bytes, options);
+        }
+        if (segmentStart < bytes.length) {
+            parts.push(super.decode(bytes.subarray(segmentStart), options));
+        }
+        return parts.join("");
+    }
+}
diff --git a/_packages/native-preview/src/api/sync/api.ts b/_packages/native-preview/src/api/sync/api.ts
index 38af3e847d5..cbcb2decc2d 100644
--- a/_packages/native-preview/src/api/sync/api.ts
+++ b/_packages/native-preview/src/api/sync/api.ts
@@ -39,6 +39,7 @@ import {
     readSourceFileHash,
     RemoteSourceFile,
 } from "../node/node.ts";
+import { Wtf8Decoder } from "../node/wtf8.ts";
 import { ObjectRegistry } from "../objectRegistry.ts";
 import type {
     APIOptions,
@@ -323,7 +324,7 @@ export class Program {
     private client: Client;
     private sourceFileCache: SourceFileCache;
     private toPath: (fileName: string) => Path;
-    private decoder = new TextDecoder();
+    private decoder = new Wtf8Decoder();
 
     constructor(
         snapshotId: string,
diff --git a/_packages/native-preview/test/async/api.test.ts b/_packages/native-preview/test/async/api.test.ts
index fceba3ca6df..74f5347bb31 100644
--- a/_packages/native-preview/test/async/api.test.ts
+++ b/_packages/native-preview/test/async/api.test.ts
@@ -206,18 +206,24 @@ test("unicode escapes", async () => {
         "/tsconfig.json": "{}",
         "/src/1.ts": `"😃"`,
         "/src/2.ts": `"\\ud83d\\ude03"`,
+        "/src/3.ts": `"\\ud800a\\udc00"`,
     });
     try {
         const snapshot = await api.updateSnapshot({ openProject: "/tsconfig.json" });
         const project = snapshot.getProject("/tsconfig.json")!;
+        const expectedTexts = new Map([
+            ["/src/1.ts", "😃"],
+            ["/src/2.ts", "😃"],
+            ["/src/3.ts", "\ud800a\udc00"],
+        ]);
 
-        for (const file of ["/src/1.ts", "/src/2.ts"]) {
+        for (const file of expectedTexts.keys()) {
             const sourceFile = await project.program.getSourceFile(file);
             assert.ok(sourceFile);
 
             sourceFile.forEachChild(function visit(node) {
                 if (isStringLiteral(node)) {
-                    assert.equal(node.text, "😃");
+                    assert.equal(node.text, expectedTexts.get(file));
                 }
                 node.forEachChild(visit);
             });
@@ -228,6 +234,38 @@ test("unicode escapes", async () => {
     }
 });
 
+test("template unicode escapes", async () => {
+    const api = spawnAPI({
+        "/tsconfig.json": "{}",
+        "/src/index.ts": "`\\ud800${0}\\udc00`",
+    });
+    try {
+        const snapshot = await api.updateSnapshot({ openProject: "/tsconfig.json" });
+        const project = snapshot.getProject("/tsconfig.json")!;
+        const sourceFile = await project.program.getSourceFile("/src/index.ts");
+        assert.ok(sourceFile);
+
+        let sawHead = false;
+        let sawTail = false;
+        sourceFile.forEachChild(function visit(node) {
+            if (isTemplateHead(node)) {
+                assert.equal(node.text, "\ud800");
+                sawHead = true;
+            }
+            else if (isTemplateTail(node)) {
+                assert.equal(node.text, "\udc00");
+                sawTail = true;
+            }
+            node.forEachChild(visit);
+        });
+        assert.ok(sawHead);
+        assert.ok(sawTail);
+    }
+    finally {
+        await api.close();
+    }
+});
+
 test("Object equality", async () => {
     const api = spawnAPI();
     try {
diff --git a/_packages/native-preview/test/encoder.test.ts b/_packages/native-preview/test/encoder.test.ts
index 0dd09db4c25..4ccc34d018a 100644
--- a/_packages/native-preview/test/encoder.test.ts
+++ b/_packages/native-preview/test/encoder.test.ts
@@ -62,7 +62,7 @@ describe("Encoder", () => {
         // Verify header
         const view = new DataView(encoded.buffer, encoded.byteOffset, encoded.byteLength);
         const metadata = view.getUint32(0, true);
-        assert.strictEqual(metadata >>> 24, 5, "protocol version should be 5");
+        assert.strictEqual(metadata >>> 24, 6, "protocol version should be 6");
 
         // Verify we can decode it
         const decoded = decode(encoded);
@@ -169,11 +169,11 @@ describe("Encoder", () => {
         assert.strictEqual(rootKind, SyntaxKind.IfStatement);
     });
 
-    test("protocol version is 5", () => {
+    test("protocol version is 6", () => {
         const sf = makeSF("", "/test.ts", []);
         const encoded = encodeSourceFile(sf);
         const view = new DataView(encoded.buffer, encoded.byteOffset, encoded.byteLength);
-        assert.strictEqual(view.getUint32(0, true) >>> 24, 5);
+        assert.strictEqual(view.getUint32(0, true) >>> 24, 6);
     });
 
     test("boolean properties are encoded", () => {
diff --git a/_packages/native-preview/test/sync/api.test.ts b/_packages/native-preview/test/sync/api.test.ts
index 53b53be92d2..359387865b0 100644
--- a/_packages/native-preview/test/sync/api.test.ts
+++ b/_packages/native-preview/test/sync/api.test.ts
@@ -214,18 +214,24 @@ test("unicode escapes", () => {
         "/tsconfig.json": "{}",
         "/src/1.ts": `"😃"`,
         "/src/2.ts": `"\\ud83d\\ude03"`,
+        "/src/3.ts": `"\\ud800a\\udc00"`,
     });
     try {
         const snapshot = api.updateSnapshot({ openProject: "/tsconfig.json" });
         const project = snapshot.getProject("/tsconfig.json")!;
+        const expectedTexts = new Map([
+            ["/src/1.ts", "😃"],
+            ["/src/2.ts", "😃"],
+            ["/src/3.ts", "\ud800a\udc00"],
+        ]);
 
-        for (const file of ["/src/1.ts", "/src/2.ts"]) {
+        for (const file of expectedTexts.keys()) {
             const sourceFile = project.program.getSourceFile(file);
             assert.ok(sourceFile);
 
             sourceFile.forEachChild(function visit(node) {
                 if (isStringLiteral(node)) {
-                    assert.equal(node.text, "😃");
+                    assert.equal(node.text, expectedTexts.get(file));
                 }
                 node.forEachChild(visit);
             });
@@ -236,6 +242,38 @@ test("unicode escapes", () => {
     }
 });
 
+test("template unicode escapes", () => {
+    const api = spawnAPI({
+        "/tsconfig.json": "{}",
+        "/src/index.ts": "`\\ud800${0}\\udc00`",
+    });
+    try {
+        const snapshot = api.updateSnapshot({ openProject: "/tsconfig.json" });
+        const project = snapshot.getProject("/tsconfig.json")!;
+        const sourceFile = project.program.getSourceFile("/src/index.ts");
+        assert.ok(sourceFile);
+
+        let sawHead = false;
+        let sawTail = false;
+        sourceFile.forEachChild(function visit(node) {
+            if (isTemplateHead(node)) {
+                assert.equal(node.text, "\ud800");
+                sawHead = true;
+            }
+            else if (isTemplateTail(node)) {
+                assert.equal(node.text, "\udc00");
+                sawTail = true;
+            }
+            node.forEachChild(visit);
+        });
+        assert.ok(sawHead);
+        assert.ok(sawTail);
+    }
+    finally {
+        api.close();
+    }
+});
+
 test("Object equality", () => {
     const api = spawnAPI();
     try {
diff --git a/_packages/native-preview/test/wtf8.test.ts b/_packages/native-preview/test/wtf8.test.ts
new file mode 100644
index 00000000000..8fe8fcd3652
--- /dev/null
+++ b/_packages/native-preview/test/wtf8.test.ts
@@ -0,0 +1,44 @@
+import assert from "node:assert";
+import {
+    describe,
+    test,
+} from "node:test";
+import { Wtf8Decoder } from "../src/api/node/wtf8.ts";
+
+describe("Wtf8Decoder", () => {
+    test("decodes standard UTF-8", () => {
+        const decoder = new Wtf8Decoder();
+        assert.strictEqual(decoder.decode(new TextEncoder().encode("hello 🦀")), "hello 🦀");
+    });
+
+    test("preserves WTF-8 encoded lone surrogates", () => {
+        const decoder = new Wtf8Decoder();
+        const text = decoder.decode(Uint8Array.of(
+            0xF0,
+            0x9F,
+            0xA6,
+            0x80,
+            0xED,
+            0x9F,
+            0xBF,
+            0xED,
+            0xA0,
+            0x80,
+            0xED,
+            0xA0,
+            0x81,
+            0xED,
+            0xB0,
+            0x80,
+            0xF0,
+            0x9F,
+            0xA6,
+            0x80,
+        ));
+
+        assert.deepStrictEqual(
+            Array.from({ length: text.length }, (_, i) => text.charCodeAt(i)),
+            [0xD83E, 0xDD80, 0xD7FF, 0xD800, 0xD801, 0xDC00, 0xD83E, 0xDD80],
+        );
+    });
+});
diff --git a/internal/api/encoder/encoder.go b/internal/api/encoder/encoder.go
index 22b4c7fff5d..a50129f65ec 100644
--- a/internal/api/encoder/encoder.go
+++ b/internal/api/encoder/encoder.go
@@ -59,7 +59,7 @@ const (
 )
 
 const (
-	ProtocolVersion uint8 = 5
+	ProtocolVersion uint8 = 6
 )
 
 // Source File Binary Format
@@ -108,10 +108,11 @@ const (
 // String data (variable)
 // ----------------------
 //
-// The string data section contains UTF-8 encoded string data. In typical cases, the entirety of the string data is the
-// source file text, and individual nodes with string properties reference their positional slice of the file text. In
-// cases where a node's string property is not equal to the slice of file text at its position, the unique string is
-// appended to the string data section after the file text.
+// The string data section contains UTF-8 encoded string data, with WTF-8 used for JS strings containing lone UTF-16
+// surrogates. In typical cases, the entirety of the string data is the source file text, and individual nodes with
+// string properties reference their positional slice of the file text. In cases where a node's string property is not
+// equal to the slice of file text at its position, the unique string is appended to the string data section after the
+// file text.
 //
 // Extended node data (variable)
 // -----------------------------
@@ -546,21 +547,21 @@ func recordExtendedData_SourceFile(node *ast.Node, strs *stringTable, positionMa
 
 func recordExtendedData_TemplateHead(node *ast.Node, strs *stringTable, positionMap *ast.PositionMap, extendedData *[]byte, structuredData *[]byte) {
 	n := node.AsTemplateHead()
-	textIndex := strs.add(n.Text, node.Kind, node.Pos(), node.End())
+	textIndex := strs.add(encodeTemplateTextForJS(n.Text, n.RawText), node.Kind, node.Pos(), node.End())
 	rawTextIndex := strs.add(n.RawText, node.Kind, node.Pos(), node.End())
 	*extendedData = appendUint32s(*extendedData, textIndex, rawTextIndex, uint32(n.TemplateFlags))
 }
 
 func recordExtendedData_TemplateMiddle(node *ast.Node, strs *stringTable, positionMap *ast.PositionMap, extendedData *[]byte, structuredData *[]byte) {
 	n := node.AsTemplateMiddle()
-	textIndex := strs.add(n.Text, node.Kind, node.Pos(), node.End())
+	textIndex := strs.add(encodeTemplateTextForJS(n.Text, n.RawText), node.Kind, node.Pos(), node.End())
 	rawTextIndex := strs.add(n.RawText, node.Kind, node.Pos(), node.End())
 	*extendedData = appendUint32s(*extendedData, textIndex, rawTextIndex, uint32(n.TemplateFlags))
 }
 
 func recordExtendedData_TemplateTail(node *ast.Node, strs *stringTable, positionMap *ast.PositionMap, extendedData *[]byte, structuredData *[]byte) {
 	n := node.AsTemplateTail()
-	textIndex := strs.add(n.Text, node.Kind, node.Pos(), node.End())
+	textIndex := strs.add(encodeTemplateTextForJS(n.Text, n.RawText), node.Kind, node.Pos(), node.End())
 	rawTextIndex := strs.add(n.RawText, node.Kind, node.Pos(), node.End())
 	*extendedData = appendUint32s(*extendedData, textIndex, rawTextIndex, uint32(n.TemplateFlags))
 }
@@ -704,7 +705,7 @@ func getNodeCommonData_SyntheticExpression(_ *ast.Node) uint32 {
 
 func recordExtendedData_StringLiteral(node *ast.Node, strs *stringTable, _ *ast.PositionMap, extendedData *[]byte, _ *[]byte) {
 	n := node.AsStringLiteral()
-	textIndex := strs.add(n.Text, node.Kind, node.Pos(), node.End())
+	textIndex := strs.add(encodeLiteralTextForJS(n.Text, node, strs), node.Kind, node.Pos(), node.End())
 	*extendedData = appendUint32s(*extendedData, textIndex, uint32(n.TokenFlags))
 }
 
@@ -728,6 +729,6 @@ func recordExtendedData_RegularExpressionLiteral(node *ast.Node, strs *stringTab
 
 func recordExtendedData_NoSubstitutionTemplateLiteral(node *ast.Node, strs *stringTable, _ *ast.PositionMap, extendedData *[]byte, _ *[]byte) {
 	n := node.AsNoSubstitutionTemplateLiteral()
-	textIndex := strs.add(n.Text, node.Kind, node.Pos(), node.End())
+	textIndex := strs.add(encodeLiteralTextForJS(n.Text, node, strs), node.Kind, node.Pos(), node.End())
 	*extendedData = appendUint32s(*extendedData, textIndex, uint32(n.TemplateFlags))
 }
diff --git a/internal/api/encoder/encoder_test.go b/internal/api/encoder/encoder_test.go
index f676bf9eebd..7c6931122e3 100644
--- a/internal/api/encoder/encoder_test.go
+++ b/internal/api/encoder/encoder_test.go
@@ -53,6 +53,63 @@ func TestEncodeSourceFileWithUnicodeEscapes(t *testing.T) {
 	})
 }
 
+func TestEncodeSourceFilePreservesSurrogateEscapes(t *testing.T) {
+	t.Parallel()
+	sourceFile := parser.ParseSourceFile(ast.SourceFileParseOptions{
+		FileName: "/test.ts",
+		Path:     "/test.ts",
+	}, `let s = "\uD83E\uDD80\uD800a\uDC00\uD7FF\uD801\uDBFF\uDFFF";`, core.ScriptKindTS)
+
+	buf, err := encoder.EncodeSourceFile(sourceFile)
+	assert.NilError(t, err)
+
+	text, ok := findExtendedNodeText(buf, ast.KindStringLiteral)
+	assert.Assert(t, ok)
+	assert.DeepEqual(t, text, []byte{
+		0xf0, 0x9f, 0xa6, 0x80, // \uD83E\uDD80
+		0xed, 0xa0, 0x80, // \uD800
+		'a',
+		0xed, 0xb0, 0x80, // \uDC00
+		0xed, 0x9f, 0xbf, // \uD7FF
+		0xed, 0xa0, 0x81, // \uD801
+		0xf4, 0x8f, 0xbf, 0xbf, // \uDBFF\uDFFF
+	})
+}
+
+func TestEncodeSourceFilePreservesTemplateSurrogateEscapes(t *testing.T) {
+	t.Parallel()
+	sourceFile := parser.ParseSourceFile(ast.SourceFileParseOptions{
+		FileName: "/test.ts",
+		Path:     "/test.ts",
+	}, "let s = `\\uD800${1}\\uDC00`;", core.ScriptKindTS)
+
+	buf, err := encoder.EncodeSourceFile(sourceFile)
+	assert.NilError(t, err)
+
+	headText, ok := findExtendedNodeText(buf, ast.KindTemplateHead)
+	assert.Assert(t, ok)
+	assert.DeepEqual(t, headText, []byte{0xed, 0xa0, 0x80})
+
+	tailText, ok := findExtendedNodeText(buf, ast.KindTemplateTail)
+	assert.Assert(t, ok)
+	assert.DeepEqual(t, tailText, []byte{0xed, 0xb0, 0x80})
+}
+
+func TestEncodeSourceFileFallsBackForUnterminatedSurrogateEscape(t *testing.T) {
+	t.Parallel()
+	sourceFile := parser.ParseSourceFile(ast.SourceFileParseOptions{
+		FileName: "/test.ts",
+		Path:     "/test.ts",
+	}, `let s = "\uD800a`, core.ScriptKindTS)
+
+	buf, err := encoder.EncodeSourceFile(sourceFile)
+	assert.NilError(t, err)
+
+	text, ok := findExtendedNodeText(buf, ast.KindStringLiteral)
+	assert.Assert(t, ok)
+	assert.DeepEqual(t, text, []byte("\ufffda"))
+}
+
 func BenchmarkEncodeSourceFile(b *testing.B) {
 	repo.SkipIfNoTypeScriptSubmodule(b)
 	filePath := filepath.Join(repo.TypeScriptSubmodulePath(), "src/compiler/checker.ts")
@@ -73,6 +130,28 @@ func readUint32(buf []byte, offset int) uint32 {
 	return binary.LittleEndian.Uint32(buf[offset : offset+4])
 }
 
+func findExtendedNodeText(encoded []byte, kind ast.Kind) ([]byte, bool) {
+	offsetExtended := readUint32(encoded, encoder.HeaderOffsetExtendedData)
+	offsetNodes := readUint32(encoded, encoder.HeaderOffsetNodes)
+	for i := int(offsetNodes) + encoder.NodeSize; i < len(encoded); i += encoder.NodeSize {
+		if ast.Kind(readUint32(encoded, i+encoder.NodeOffsetKind)) != kind {
+			continue
+		}
+		data := readUint32(encoded, i+encoder.NodeOffsetData)
+		textIndex := readUint32(encoded, int(offsetExtended+(data&encoder.NodeDataStringIndexMask)))
+		return encodedString(encoded, textIndex), true
+	}
+	return nil, false
+}
+
+func encodedString(encoded []byte, stringIndex uint32) []byte {
+	offsetStringOffsets := readUint32(encoded, encoder.HeaderOffsetStringOffsets)
+	offsetStrings := readUint32(encoded, encoder.HeaderOffsetStringData)
+	strStart := readUint32(encoded, int(offsetStringOffsets+stringIndex*4))
+	strEnd := readUint32(encoded, int(offsetStringOffsets+stringIndex*4)+4)
+	return encoded[offsetStrings+strStart : offsetStrings+strEnd]
+}
+
 func formatEncodedSourceFile(encoded []byte) string {
 	var result strings.Builder
 	var getIndent func(parentIndex uint32) string
diff --git a/internal/api/encoder/literal_text.go b/internal/api/encoder/literal_text.go
new file mode 100644
index 00000000000..4ef27c3fdba
--- /dev/null
+++ b/internal/api/encoder/literal_text.go
@@ -0,0 +1,252 @@
+package encoder
+
+import (
+	"strings"
+	"unicode/utf8"
+
+	"github.com/microsoft/typescript-go/internal/ast"
+	"github.com/microsoft/typescript-go/internal/scanner"
+)
+
+const (
+	surr1    = 0xd800
+	surr2    = 0xdc00
+	surr3    = 0xe000
+	surrSelf = 0x10000
+)
+
+func encodeLiteralTextForJS(text string, node *ast.Node, strs *stringTable) string {
+	raw, ok := rawQuotedLiteralText(node, strs)
+	if !ok {
+		return text
+	}
+	decoded, hasSurrogate, ok := decodeQuotedLiteralText(raw)
+	if !ok || !hasSurrogate {
+		return text
+	}
+	return decoded
+}
+
+func rawQuotedLiteralText(node *ast.Node, strs *stringTable) (string, bool) {
+	if node.End() <= 0 || node.End() > len(strs.fileText) {
+		return "", false
+	}
+	start := scanner.SkipTrivia(strs.fileText, node.Pos())
+	if start >= node.End() {
+		return "", false
+	}
+	switch strs.fileText[start] {
+	case '\'', '"', '`':
+		if node.End()-start < 2 || strs.fileText[node.End()-1] != strs.fileText[start] {
+			return "", false
+		}
+		return strs.fileText[start:node.End()], true
+	default:
+		return "", false
+	}
+}
+
+func decodeQuotedLiteralText(raw string) (text string, hasSurrogate bool, ok bool) {
+	if len(raw) < 2 {
+		return "", false, false
+	}
+	return decodeEscapedLiteralText(raw[1:len(raw)-1], false)
+}
+
+func encodeTemplateTextForJS(text string, rawText string) string {
+	decoded, hasSurrogate, ok := decodeEscapedLiteralText(rawText, true)
+	if !ok || !hasSurrogate {
+		return text
+	}
+	return decoded
+}
+
+func decodeEscapedLiteralText(raw string, normalizeTemplateLineEndings bool) (text string, hasSurrogate bool, ok bool) {
+	var out strings.Builder
+	for i := 0; i < len(raw); {
+		if raw[i] != '\\' {
+			if normalizeTemplateLineEndings && raw[i] == '\r' {
+				out.WriteByte('\n')
+				i++
+				if i < len(raw) && raw[i] == '\n' {
+					i++
+				}
+				continue
+			}
+			out.WriteByte(raw[i])
+			i++
+			continue
+		}
+		ch, next, ok := decodeEscape(raw, i, len(raw))
+		if !ok {
+			return "", false, false
+		}
+		if codePointIsHighSurrogate(ch) {
+			hasSurrogate = true
+			if nextCh, nextNext, ok := decodeUnicodeEscape(raw, next, len(raw)); ok && codePointIsLowSurrogate(nextCh) {
+				out.WriteRune(surrogatePairToCodepoint(ch, nextCh))
+				i = nextNext
+				continue
+			}
+		} else if codePointIsLowSurrogate(ch) {
+			hasSurrogate = true
+		}
+		out.WriteString(encodeCodePointForJS(ch))
+		i = next
+	}
+	return out.String(), hasSurrogate, true
+}
+
+func decodeEscape(raw string, start int, end int) (rune, int, bool) {
+	if start+1 >= end {
+		return 0, 0, false
+	}
+	switch raw[start+1] {
+	case '0':
+		if start+2 >= end || !isDigit(raw[start+2]) {
+			return 0, start + 2, true
+		}
+		return decodeOctalEscape(raw, start, end, 3)
+	case '1', '2', '3':
+		return decodeOctalEscape(raw, start, end, 3)
+	case '4', '5', '6', '7':
+		return decodeOctalEscape(raw, start, end, 2)
+	case 'u':
+		return decodeUnicodeEscape(raw, start, end)
+	case 'x':
+		if start+4 > end {
+			return 0, 0, false
+		}
+		hi, ok := hexValue(raw[start+2])
+		if !ok {
+			return 0, 0, false
+		}
+		lo, ok := hexValue(raw[start+3])
+		if !ok {
+			return 0, 0, false
+		}
+		return rune(hi<<4 | lo), start + 4, true
+	case 'b':
+		return '\b', start + 2, true
+	case 't':
+		return '\t', start + 2, true
+	case 'n':
+		return '\n', start + 2, true
+	case 'v':
+		return '\v', start + 2, true
+	case 'f':
+		return '\f', start + 2, true
+	case 'r':
+		return '\r', start + 2, true
+	case '\r':
+		next := start + 2
+		if next < end && raw[next] == '\n' {
+			next++
+		}
+		return -1, next, true
+	case '\n':
+		return -1, start + 2, true
+	default:
+		ch, size := utf8.DecodeRuneInString(raw[start+1 : end])
+		if ch == utf8.RuneError && size == 0 {
+			return 0, 0, false
+		}
+		return ch, start + 1 + size, true
+	}
+}
+
+func decodeOctalEscape(raw string, start int, end int, maxDigits int) (rune, int, bool) {
+	next := start + 2
+	for digits := 1; digits < maxDigits && next < end && isOctalDigit(raw[next]); digits++ {
+		next++
+	}
+	return parseOctalEscape(raw[start+1 : next]), next, true
+}
+
+func parseOctalEscape(text string) rune {
+	value := rune(0)
+	for i := range len(text) {
+		value = value*8 + rune(text[i]-'0')
+	}
+	return value
+}
+
+func isDigit(b byte) bool {
+	return '0' <= b && b <= '9'
+}
+
+func isOctalDigit(b byte) bool {
+	return '0' <= b && b <= '7'
+}
+
+func decodeUnicodeEscape(raw string, start int, end int) (rune, int, bool) {
+	if start+1 >= end || raw[start] != '\\' || raw[start+1] != 'u' {
+		return 0, 0, false
+	}
+	if start+2 < end && raw[start+2] == '{' {
+		value := 0
+		i := start + 3
+		for ; i < end && raw[i] != '}'; i++ {
+			digit, ok := hexValue(raw[i])
+			if !ok {
+				return 0, 0, false
+			}
+			value = value*16 + digit
+		}
+		if i >= end || raw[i] != '}' || value > 0x10FFFF {
+			return 0, 0, false
+		}
+		return rune(value), i + 1, true
+	}
+	if start+6 > end {
+		return 0, 0, false
+	}
+	value := 0
+	for i := start + 2; i < start+6; i++ {
+		digit, ok := hexValue(raw[i])
+		if !ok {
+			return 0, 0, false
+		}
+		value = value*16 + digit
+	}
+	return rune(value), start + 6, true
+}
+
+func hexValue(b byte) (int, bool) {
+	switch {
+	case '0' <= b && b <= '9':
+		return int(b - '0'), true
+	case 'a' <= b && b <= 'f':
+		return int(b-'a') + 10, true
+	case 'A' <= b && b <= 'F':
+		return int(b-'A') + 10, true
+	default:
+		return 0, false
+	}
+}
+
+func codePointIsHighSurrogate(r rune) bool {
+	return surr1 <= r && r < surr2
+}
+
+func codePointIsLowSurrogate(r rune) bool {
+	return surr2 <= r && r < surr3
+}
+
+func surrogatePairToCodepoint(r1, r2 rune) rune {
+	return ((r1 - surr1) << 10) + (r2 - surr2) + surrSelf
+}
+
+func encodeCodePointForJS(r rune) string {
+	if r < 0 {
+		return ""
+	}
+	if codePointIsHighSurrogate(r) || codePointIsLowSurrogate(r) {
+		return string([]byte{
+			0xed,
+			byte(0x80 | ((r >> 6) & 0x3f)),
+			byte(0x80 | (r & 0x3f)),
+		})
+	}
+	return string(r)
+}