Skip to content

Commit 90f6410

Browse files
committed
lib: unify ICU and no-ICU TextDecoder
1 parent 4967625 commit 90f6410

File tree

1 file changed

+86
-129
lines changed

1 file changed

+86
-129
lines changed

lib/internal/encoding.js

Lines changed: 86 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ const {
2525
ERR_INVALID_THIS,
2626
ERR_NO_ICU,
2727
} = require('internal/errors').codes;
28-
const kMethod = Symbol('method');
28+
const kSingleByte = Symbol('method');
2929
const kHandle = Symbol('handle');
3030
const kFlags = Symbol('flags');
3131
const kEncoding = Symbol('encoding');
@@ -52,6 +52,8 @@ const {
5252
validateObject,
5353
kValidateObjectAllowObjectsAndNull,
5454
} = require('internal/validators');
55+
56+
const { hasIntl } = internalBinding('config');
5557
const binding = internalBinding('encoding_binding');
5658
const {
5759
encodeInto,
@@ -447,168 +449,123 @@ function parseInput(input) {
447449
}
448450
}
449451

450-
const TextDecoder =
451-
internalBinding('config').hasIntl ?
452-
makeTextDecoderICU() :
453-
makeTextDecoderJS();
454-
455-
function makeTextDecoderICU() {
456-
const {
452+
let _decode, getConverter;
453+
if (hasIntl) {
454+
;({
457455
decode: _decode,
458456
getConverter,
459-
} = internalBinding('icu');
457+
} = internalBinding('icu'));
458+
}
460459

461-
class TextDecoder {
462-
constructor(encoding = 'utf-8', options = kEmptyObject) {
463-
encoding = `${encoding}`;
464-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
460+
const kBOMSeen = Symbol('BOM seen');
465461

466-
const enc = getEncodingFromLabel(encoding);
467-
if (enc === undefined)
468-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
462+
let StringDecoder;
463+
function lazyStringDecoder() {
464+
if (StringDecoder === undefined)
465+
({ StringDecoder } = require('string_decoder'));
466+
return StringDecoder;
467+
}
469468

470-
let flags = 0;
471-
if (options !== null) {
472-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
473-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
474-
}
469+
class TextDecoder {
470+
constructor(encoding = 'utf-8', options = kEmptyObject) {
471+
encoding = `${encoding}`;
472+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
475473

476-
this[kDecoder] = true;
477-
this[kFlags] = flags;
478-
this[kEncoding] = enc;
479-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
480-
this[kFatal] = Boolean(options?.fatal);
481-
// Only support fast path for UTF-8.
482-
this[kUTF8FastPath] = enc === 'utf-8';
483-
this[kHandle] = undefined;
484-
this[kMethod] = undefined;
485-
486-
if (isSinglebyteEncoding(this.encoding)) {
487-
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
488-
} else if (!this[kUTF8FastPath]) {
489-
this.#prepareConverter();
490-
}
474+
const enc = getEncodingFromLabel(encoding);
475+
if (enc === undefined)
476+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
477+
478+
let flags = 0;
479+
if (options !== null) {
480+
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
481+
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
482+
}
483+
484+
this[kDecoder] = true;
485+
this[kFlags] = flags;
486+
this[kEncoding] = enc;
487+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
488+
this[kFatal] = Boolean(options?.fatal);
489+
// Only support fast path for UTF-8.
490+
this[kUTF8FastPath] = enc === 'utf-8';
491+
this[kHandle] = undefined;
492+
this[kSingleByte] = undefined; // Does not care about streaming or BOM
493+
494+
if (isSinglebyteEncoding(enc)) {
495+
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
496+
} else if (!this[kUTF8FastPath]) {
497+
this.#prepareConverter();
491498
}
499+
}
492500

493-
#prepareConverter() {
494-
if (this[kHandle] !== undefined) return;
501+
#prepareConverter() {
502+
if (this[kHandle] !== undefined) return;
503+
if (hasIntl) {
495504
let icuEncoding = this[kEncoding];
496505
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
497506
const handle = getConverter(icuEncoding, this[kFlags]);
498507
if (handle === undefined)
499508
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
500509
this[kHandle] = handle;
501-
}
502-
503-
decode(input = empty, options = kEmptyObject) {
504-
validateDecoder(this);
505-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
506-
507-
if (this[kMethod]) return this[kMethod](input);
508-
509-
this[kUTF8FastPath] &&= !(options?.stream);
510-
511-
if (this[kUTF8FastPath]) {
512-
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
510+
} else {
511+
if (this.encoding !== 'utf-8' && this.encoding !== 'utf-16le') {
512+
throw new ERR_ENCODING_NOT_SUPPORTED(`${this.encoding}`);
513513
}
514514

515-
this.#prepareConverter();
516-
517-
let flags = 0;
518-
if (options !== null)
519-
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
520-
521-
return _decode(this[kHandle], input, flags, this.encoding);
515+
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
516+
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
517+
this[kHandle] = new (lazyStringDecoder())(this.encoding);
518+
this[kBOMSeen] = false;
522519
}
523520
}
524521

525-
return TextDecoder;
526-
}
527-
528-
function makeTextDecoderJS() {
529-
let StringDecoder;
530-
function lazyStringDecoder() {
531-
if (StringDecoder === undefined)
532-
({ StringDecoder } = require('string_decoder'));
533-
return StringDecoder;
534-
}
522+
decode(input = empty, options = kEmptyObject) {
523+
validateDecoder(this);
524+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
535525

536-
const kBOMSeen = Symbol('BOM seen');
526+
if (this[kSingleByte]) return this[kSingleByte](input);
537527

538-
function hasConverter(encoding) {
539-
return encoding === 'utf-8' || encoding === 'utf-16le';
540-
}
528+
const stream = options?.stream;
529+
if (this[kUTF8FastPath]) {
530+
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
531+
this[kUTF8FastPath] = false;
532+
}
541533

542-
class TextDecoder {
543-
constructor(encoding = 'utf-8', options = kEmptyObject) {
544-
encoding = `${encoding}`;
545-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
534+
input = parseInput(input);
546535

547-
const enc = getEncodingFromLabel(encoding);
548-
if (enc === undefined)
549-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
536+
this.#prepareConverter();
550537

551-
let flags = 0;
552-
if (options !== null) {
553-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
554-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
555-
}
538+
if (hasIntl) {
539+
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
540+
return _decode(this[kHandle], input, flags, this.encoding);
541+
}
556542

557-
this[kDecoder] = true;
558-
this[kFlags] = flags;
559-
this[kEncoding] = enc;
560-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
561-
this[kFatal] = Boolean(options?.fatal);
543+
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
562544
this[kBOMSeen] = false;
563-
this[kMethod] = undefined;
564-
565-
if (isSinglebyteEncoding(enc)) {
566-
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
567-
} else {
568-
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
569-
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
570-
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
571-
this[kHandle] = new (lazyStringDecoder())(enc);
572-
}
573545
}
574546

575-
decode(input = empty, options = kEmptyObject) {
576-
validateDecoder(this);
577-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
578-
579-
if (this[kMethod]) return this[kMethod](input);
580-
581-
input = parseInput(input);
547+
if (stream) {
548+
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
549+
} else {
550+
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
551+
}
582552

583-
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
584-
this[kBOMSeen] = false;
585-
}
553+
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
554+
this[kHandle].end(input) :
555+
this[kHandle].write(input);
586556

587-
if (options !== null && options.stream) {
588-
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
589-
} else {
590-
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
557+
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
558+
// If the very first result in the stream is a BOM, and we are not
559+
// explicitly told to ignore it, then we discard it.
560+
if (result[0] === '\ufeff') {
561+
result = StringPrototypeSlice(result, 1);
591562
}
563+
this[kBOMSeen] = true;
564+
}
592565

593-
input = parseInput(input);
594-
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
595-
this[kHandle].end(input) :
596-
this[kHandle].write(input);
597-
598-
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
599-
// If the very first result in the stream is a BOM, and we are not
600-
// explicitly told to ignore it, then we discard it.
601-
if (result[0] === '\ufeff') {
602-
result = StringPrototypeSlice(result, 1);
603-
}
604-
this[kBOMSeen] = true;
605-
}
566+
return result;
606567

607-
return result;
608-
}
609568
}
610-
611-
return TextDecoder;
612569
}
613570

614571
// Mix in some shared properties.

0 commit comments

Comments
 (0)