Skip to content

Commit 0e3427a

Browse files
committed
chore: getting closer at utf-32 impl
1 parent 6b01b50 commit 0e3427a

File tree

2 files changed

+12
-27
lines changed

2 files changed

+12
-27
lines changed

fallback/utf32.js

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import { isLE } from './_utils.js'
22

33
export const E_STRICT = 'Input is not well-formed utf32'
4-
const replacementCodepoint = 0xff_fd
54

65
export const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
76
const to32 = (a) => new Uint32Array(a.buffer, a.byteOffset, a.byteLength / 4) // Requires checked length and alignment!
87

98
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
109

10+
// Surrogates are an UTF-16 thing and can not be represented in UTF-32, iconv-lite got it wrong
11+
// See https://unicode.org/faq/utf_bom#utf32-7
12+
1113
// Assumes checked length % 4 === 0, otherwise does not swap tail
1214
export function swap32(u8) {
1315
let i = 0
@@ -31,12 +33,8 @@ export function to32input(u8, le) {
3133
return to32(swap32(Uint8Array.from(u8)))
3234
}
3335

34-
export function decode(u32) {
35-
return String.fromCodePoint.apply(String, u32) // TODO: max len
36-
}
37-
3836
// No surrogates (paired or unpaired), no out of range codepoints
39-
export function isStrict(u32) {
37+
export function isWellFormed(u32) {
4038
const length = u32.length
4139
for (let i = 0; i < length; i++) {
4240
const x = u32[i]
@@ -50,27 +48,13 @@ export function toWellFormed(u32) {
5048
const length = u32.length
5149
for (let i = 0; i < length; i++) {
5250
const x = u32[i]
53-
if (x >= 0xd8_00) {
54-
if (x < 0xe0_00) {
55-
// An unexpected trail or a lead at the very end of input
56-
if (x > 0xdb_ff || i + 1 >= length) {
57-
u32[i] = replacementCodepoint
58-
} else {
59-
const next = u32[i + 1] // Process valid pairs immediately
60-
if (next < 0xdc_00 || next >= 0xe0_00) {
61-
u32[i] = replacementCodepoint
62-
} else {
63-
i++ // consume next
64-
}
65-
}
66-
} else if (x >= 0x11_00_00) {
67-
// also fix out-of-range in the same pass, both are unlikely
68-
u32[i] = replacementCodepoint
69-
}
70-
}
51+
if (x >= 0xd8_00 && (x < 0xe0_00 || x >= 0x11_00_00)) u32[i] = 0xff_fd
7152
}
53+
}
7254

73-
return u32
55+
// Only defined on valid input
56+
export function decode(u32) {
57+
return String.fromCodePoint.apply(String, u32) // TODO: max len
7458
}
7559

7660
// Only defined on valid input

utf32.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ function decode(input, loose = false, format = 'uint32') {
4646

4747
// TODO: recheck spidermonkey/Firefox/jsc perf
4848

49-
if (!js.isStrict(u32)) {
49+
if (!js.isWellFormed(u32)) {
5050
if (!loose) throw new RangeError(js.E_STRICT)
51-
u32 = js.toWellFormed(new Uint32Array(u32))
51+
if (u32.buffer === input.buffer) u32 = new Uint32Array(u32)
52+
js.toWellFormed(u32)
5253
}
5354

5455
// Significantly faster on Hermes

0 commit comments

Comments
 (0)