11import { isLE } from './_utils.js'
22
33export const E_STRICT = 'Input is not well-formed utf32'
4- const replacementCodepoint = 0xff_fd
54
65export const to8 = ( a ) => new Uint8Array ( a . buffer , a . byteOffset , a . byteLength )
76const to32 = ( a ) => new Uint32Array ( a . buffer , a . byteOffset , a . byteLength / 4 ) // Requires checked length and alignment!
87
98/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
109
10+ // Surrogates are an UTF-16 thing and can not be represented in UTF-32, iconv-lite got it wrong
11+ // See https://unicode.org/faq/utf_bom#utf32-7
12+
1113// Assumes checked length % 4 === 0, otherwise does not swap tail
1214export function swap32 ( u8 ) {
1315 let i = 0
@@ -31,12 +33,8 @@ export function to32input(u8, le) {
3133 return to32 ( swap32 ( Uint8Array . from ( u8 ) ) )
3234}
3335
34- export function decode ( u32 ) {
35- return String . fromCodePoint . apply ( String , u32 ) // TODO: max len
36- }
37-
3836// No surrogates (paired or unpaired), no out of range codepoints
39- export function isStrict ( u32 ) {
37+ export function isWellFormed ( u32 ) {
4038 const length = u32 . length
4139 for ( let i = 0 ; i < length ; i ++ ) {
4240 const x = u32 [ i ]
@@ -50,27 +48,13 @@ export function toWellFormed(u32) {
5048 const length = u32 . length
5149 for ( let i = 0 ; i < length ; i ++ ) {
5250 const x = u32 [ i ]
53- if ( x >= 0xd8_00 ) {
54- if ( x < 0xe0_00 ) {
55- // An unexpected trail or a lead at the very end of input
56- if ( x > 0xdb_ff || i + 1 >= length ) {
57- u32 [ i ] = replacementCodepoint
58- } else {
59- const next = u32 [ i + 1 ] // Process valid pairs immediately
60- if ( next < 0xdc_00 || next >= 0xe0_00 ) {
61- u32 [ i ] = replacementCodepoint
62- } else {
63- i ++ // consume next
64- }
65- }
66- } else if ( x >= 0x11_00_00 ) {
67- // also fix out-of-range in the same pass, both are unlikely
68- u32 [ i ] = replacementCodepoint
69- }
70- }
51+ if ( x >= 0xd8_00 && ( x < 0xe0_00 || x >= 0x11_00_00 ) ) u32 [ i ] = 0xff_fd
7152 }
53+ }
7254
73- return u32
55+ // Only defined on valid input
56+ export function decode ( u32 ) {
57+ return String . fromCodePoint . apply ( String , u32 ) // TODO: max len
7458}
7559
7660// Only defined on valid input
0 commit comments