Skip to content

Commit dd32d97

Browse files
committed
feat: support fatal utf-8 on Node.js without ICU
1 parent 01699f9 commit dd32d97

2 files changed

Lines changed: 29 additions & 6 deletions

File tree

tests/utf8.test.js

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,16 @@ describe('random data', () => {
145145
const restored = []
146146
const ignoreBOM = true
147147

148+
let nativeFatal = nativeDecoder
149+
if (nativeFatal) {
150+
try {
151+
// Non-fixed Node.js without ICU doesn't have 'fatal' option support
152+
new TextDecoder('utf8', { fatal: true }) // eslint-disable-line no-new
153+
} catch {
154+
nativeFatal = false
155+
}
156+
}
157+
148158
test('utf8toStringLoose', (t) => {
149159
const textDecoder = nativeDecoder ? new TextDecoder('utf8', { ignoreBOM }) : null // polyfilled might be wrong
150160
const NativeBuffer = globalThis.Buffer && !globalThis.Buffer.TYPED_ARRAY_SUPPORT ? Buffer : null
@@ -158,7 +168,7 @@ describe('random data', () => {
158168
})
159169

160170
test('utf8toString (ascii)', (t) => {
161-
const textDecoder = nativeDecoder ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
171+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
162172
for (const u8 of poolAscii) {
163173
const str = utf8toString(u8)
164174
t.assert.strictEqual(str, utf8toStringLoose(u8))
@@ -171,7 +181,7 @@ describe('random data', () => {
171181
})
172182

173183
test('utf8toString', (t) => {
174-
const textDecoder = nativeDecoder ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
184+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
175185
t.assert.strictEqual(strings.length, pool.length)
176186
for (let i = 0; i < pool.length; i++) {
177187
const u8 = pool[i]
@@ -227,7 +237,7 @@ describe('random data', () => {
227237
})
228238

229239
test('utf8toString / utf8toStringLoose', (t) => {
230-
const textDecoder = nativeDecoder ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
240+
const textDecoder = nativeFatal ? new TextDecoder('utf8', { fatal: true, ignoreBOM }) : null
231241
t.assert.strictEqual(strings.length, pool.length)
232242
for (let i = 0; i < pool.length; i++) {
233243
const str = strings[i]

utf8.node.js

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11
import { assertUint8 } from './assert.js'
22
import { typedView } from './array.js'
3-
import { E_STRICT_UNICODE } from './fallback/utf8.js'
3+
import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
44
import { isAscii } from 'node:buffer'
55

66
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
77

8-
const decoderFatal = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true })
8+
let decoderFatal
99
const decoderLoose = new TextDecoder('utf-8', { ignoreBOM: true })
1010
const { isWellFormed } = String.prototype
1111
const isDeno = Boolean(globalThis.Deno)
1212

13+
try {
14+
decoderFatal = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true })
15+
} catch {
16+
// Without ICU, Node.js doesn't support fatal option for utf-8
17+
}
18+
1319
function encode(str, loose = false) {
1420
if (typeof str !== 'string') throw new TypeError('Input is not a string')
1521
const strLength = str.length
@@ -45,7 +51,14 @@ function decode(arr, loose = false) {
4551
return buf.latin1Slice(0, arr.byteLength) // .latin1Slice is faster than .asciiSlice
4652
}
4753

48-
return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr)
54+
if (loose) return decoderLoose.decode(arr)
55+
if (decoderFatal) return decoderFatal.decode(arr)
56+
57+
// We are in an env without native fatal decoder support (non-fixed Node.js without ICU)
58+
// Well, just recheck against encode if it contains replacement then, this is still faster than js impl
59+
const str = decoderLoose.decode(arr)
60+
if (str.includes('\uFFFD') && !Buffer.from(str).equals(arr)) throw new TypeError(E_STRICT)
61+
return str
4962
}
5063

5164
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)

0 commit comments

Comments
 (0)