|
| 1 | +import type { TextDecodeOptions } from "node:util"; |
| 2 | + |
| 3 | +const surrogateLeadByte = 0xED; |
| 4 | +const surrogateSecondByteMin = 0xA0; |
| 5 | +const surrogateSecondByteMax = 0xBF; |
| 6 | +const continuationByteMin = 0x80; |
| 7 | +const continuationByteMax = 0xBF; |
| 8 | + |
| 9 | +function isWtf8Surrogate(bytes: Uint8Array, index: number): boolean { |
| 10 | + return index + 2 < bytes.length |
| 11 | + && bytes[index] === surrogateLeadByte |
| 12 | + && bytes[index + 1] >= surrogateSecondByteMin |
| 13 | + && bytes[index + 1] <= surrogateSecondByteMax |
| 14 | + && bytes[index + 2] >= continuationByteMin |
| 15 | + && bytes[index + 2] <= continuationByteMax; |
| 16 | +} |
| 17 | + |
| 18 | +function getSurrogateCodeUnit(bytes: Uint8Array, index: number): number { |
| 19 | + return 0xD000 | ((bytes[index + 1] & 0x3F) << 6) | (bytes[index + 2] & 0x3F); |
| 20 | +} |
| 21 | + |
| 22 | +function toUint8Array(input: NodeJS.AllowSharedBufferSource): Uint8Array { |
| 23 | + if (input instanceof Uint8Array) { |
| 24 | + return input; |
| 25 | + } |
| 26 | + if (ArrayBuffer.isView(input)) { |
| 27 | + return new Uint8Array(input.buffer, input.byteOffset, input.byteLength); |
| 28 | + } |
| 29 | + return new Uint8Array(input); |
| 30 | +} |
| 31 | + |
| 32 | +export class Wtf8Decoder extends TextDecoder { |
| 33 | + override decode(input?: NodeJS.AllowSharedBufferSource, options?: TextDecodeOptions): string { |
| 34 | + if (input === undefined) { |
| 35 | + return super.decode(input, options); |
| 36 | + } |
| 37 | + |
| 38 | + const bytes = toUint8Array(input); |
| 39 | + let result = ""; |
| 40 | + let segmentStart = 0; |
| 41 | + |
| 42 | + for (let i = 0; i < bytes.length; i++) { |
| 43 | + if (!isWtf8Surrogate(bytes, i)) { |
| 44 | + continue; |
| 45 | + } |
| 46 | + |
| 47 | + if (segmentStart < i) { |
| 48 | + result += super.decode(bytes.subarray(segmentStart, i)); |
| 49 | + } |
| 50 | + result += String.fromCharCode(getSurrogateCodeUnit(bytes, i)); |
| 51 | + i += 2; |
| 52 | + segmentStart = i + 1; |
| 53 | + } |
| 54 | + |
| 55 | + if (segmentStart === 0) { |
| 56 | + return super.decode(bytes, options); |
| 57 | + } |
| 58 | + if (segmentStart < bytes.length) { |
| 59 | + result += super.decode(bytes.subarray(segmentStart), options); |
| 60 | + } |
| 61 | + return result; |
| 62 | + } |
| 63 | +} |
0 commit comments