@@ -8,6 +8,9 @@ const to32 = (a) => new Uint32Array(a.buffer, a.byteOffset, a.byteLength / 4) //
88
99/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
1010
11+ // Surrogates are an UTF-16 thing and can not be represented in UTF-32, iconv-lite got it wrong
12+ // See https://unicode.org/faq/utf_bom#utf32-7
13+
1114// Assumes checked length % 4 === 0, otherwise does not swap tail
1215export function swap32 ( u8 ) {
1316 let i = 0
@@ -31,12 +34,8 @@ export function to32input(u8, le) {
3134 return to32 ( swap32 ( Uint8Array . from ( u8 ) ) )
3235}
3336
34- export function decode ( u32 ) {
35- return String . fromCodePoint . apply ( String , u32 ) // TODO: max len
36- }
37-
3837// No surrogates (paired or unpaired), no out of range codepoints
39- export function isStrict ( u32 ) {
38+ export function isWellFormed ( u32 ) {
4039 const length = u32 . length
4140 for ( let i = 0 ; i < length ; i ++ ) {
4241 const x = u32 [ i ]
@@ -50,27 +49,13 @@ export function toWellFormed(u32) {
5049 const length = u32 . length
5150 for ( let i = 0 ; i < length ; i ++ ) {
5251 const x = u32 [ i ]
53- if ( x >= 0xd8_00 ) {
54- if ( x < 0xe0_00 ) {
55- // An unexpected trail or a lead at the very end of input
56- if ( x > 0xdb_ff || i + 1 >= length ) {
57- u32 [ i ] = replacementCodepoint
58- } else {
59- const next = u32 [ i + 1 ] // Process valid pairs immediately
60- if ( next < 0xdc_00 || next >= 0xe0_00 ) {
61- u32 [ i ] = replacementCodepoint
62- } else {
63- i ++ // consume next
64- }
65- }
66- } else if ( x >= 0x11_00_00 ) {
67- // also fix out-of-range in the same pass, both are unlikely
68- u32 [ i ] = replacementCodepoint
69- }
70- }
52+ if ( x >= 0xd8_00 && ( x < 0xe0_00 || x >= 0x11_00_00 ) ) u32 [ i ] = replacementCodepoint
7153 }
54+ }
7255
73- return u32
56+ // Only defined on valid input
57+ export function decode ( u32 ) {
58+ return String . fromCodePoint . apply ( String , u32 ) // TODO: max len
7459}
7560
7661// Only defined on valid input
0 commit comments