Skip to content

Commit 6754f84

Browse files
committed
refactor: move basic utf16 API impl to ./fallback dir
1 parent da5b153 commit 6754f84

File tree

3 files changed

+40
-39
lines changed

3 files changed

+40
-39
lines changed

fallback/utf16.js

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,47 @@
11
import { decodeUCS2, encodeCharcodes } from './latin1.js'
2-
import { isLE, E_STRICT_UNICODE } from './_utils.js'
2+
import { isLE, E_STRING, E_STRICT_UNICODE } from './_utils.js'
33

44
export const E_STRICT = 'Input is not well-formed utf16'
5+
const { TextDecoder } = globalThis
6+
const isWellFormedStr = String.prototype.isWellFormed
57

68
const replacementCodepoint = 0xff_fd
79
const replacementCodepointSwapped = 0xfd_ff
810

911
const to16 = (a) => new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2) // Requires checked length and alignment!
1012

13+
export function encodeApi(str, loose, format) {
14+
if (typeof str !== 'string') throw new TypeError(E_STRING)
15+
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
16+
throw new TypeError('Unknown format')
17+
}
18+
19+
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
20+
// On JSC, check during loop is faster than isWellFormed
21+
// If isWellFormed is available, we skip check during decoding and recheck after
22+
// If isWellFormed is unavailable, we check in js during decoding
23+
if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT_UNICODE)
24+
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
25+
const u16 = encode(str, loose, !loose && isWellFormedStr, shouldSwap)
26+
27+
// Bytes are already swapped and format is already checked, we need to just cast the view
28+
return format === 'uint16' ? u16 : new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength)
29+
}
30+
31+
export function decodeApiDecoders(input, loose, format) {
32+
if (format === 'uint16') {
33+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
34+
} else if (format === 'uint8-le' || format === 'uint8-be') {
35+
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
36+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
37+
} else {
38+
throw new TypeError('Unknown format')
39+
}
40+
41+
const encoding = format === 'uint8-le' || (format === 'uint16' && isLE) ? 'utf-16le' : 'utf-16be'
42+
return new TextDecoder(encoding, { ignoreBOM: true, fatal: !loose }).decode(input)
43+
}
44+
1145
export function to16input(u8, le) {
1246
// Assume even number of bytes
1347
if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8))

utf16.native.js

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as js from './fallback/utf16.js'
2-
import { nativeDecoder, isLE, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
2+
import { nativeDecoder, isLE } from './fallback/_utils.js'
33

44
function checkDecoders() {
55
// Not all barebone engines with TextDecoder support something except utf-8
@@ -28,24 +28,6 @@ const { E_STRICT } = js
2828

2929
// Unlike utf8, operates on Uint16Arrays by default
3030

31-
function encode(str, loose = false, format = 'uint16') {
32-
if (typeof str !== 'string') throw new TypeError(E_STRING)
33-
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
34-
throw new TypeError('Unknown format')
35-
}
36-
37-
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
38-
// On JSC, check during loop is faster than isWellFormed
39-
// If isWellFormed is available, we skip check during decoding and recheck after
40-
// If isWellFormed is unavailable, we check in js during decoding
41-
if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
42-
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
43-
const u16 = js.encode(str, loose, !loose && isWellFormed, shouldSwap)
44-
45-
// Bytes are already swapped and format is already checked, we need to just cast the view
46-
return format === 'uint16' ? u16 : new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength)
47-
}
48-
4931
function decode(input, loose = false, format = 'uint16') {
5032
let u16
5133
switch (format) {
@@ -77,7 +59,7 @@ function decode(input, loose = false, format = 'uint16') {
7759
return str
7860
}
7961

80-
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
81-
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
62+
export const utf16fromString = (str, format = 'uint16') => js.encodeApi(str, false, format)
63+
export const utf16fromStringLoose = (str, format = 'uint16') => js.encodeApi(str, true, format)
8264
export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
8365
export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)

utf16.node.js

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import { isDeno, isLE, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
2-
import { E_STRICT } from './fallback/utf16.js'
2+
import { E_STRICT, decodeApiDecoders } from './fallback/utf16.js'
33

44
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
55

6-
const { TextDecoder } = globalThis
76
const { isWellFormed, toWellFormed } = String.prototype
87
const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
98

@@ -62,21 +61,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
6261
throw new TypeError(E_STRICT)
6362
}
6463

65-
function decodeDecoder(input, loose = false, format = 'uint16') {
66-
if (format === 'uint16') {
67-
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
68-
} else if (format === 'uint8-le' || format === 'uint8-be') {
69-
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
70-
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
71-
} else {
72-
throw new TypeError('Unknown format')
73-
}
74-
75-
const encoding = format === 'uint8-le' || (format === 'uint16' && isLE) ? 'utf-16le' : 'utf-16be'
76-
return new TextDecoder(encoding, { ignoreBOM: true, fatal: !loose }).decode(input) // TODO: cache decoder?
77-
}
78-
79-
const decode = isDeno ? decodeDecoder : decodeNode
64+
const decode = isDeno ? decodeApiDecoders : decodeNode
8065

8166
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
8267
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)

0 commit comments

Comments
 (0)