Skip to content

Commit de91b9d

Browse files
ChALkeRmertcanaltin
andcommitted
src: move all 1-byte encodings to native
Co-authored-by: Mert Can Altin <mertgold60@gmail.com>
1 parent e155415 commit de91b9d

File tree

10 files changed

+540
-159
lines changed

10 files changed

+540
-159
lines changed

lib/internal/encoding.js

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
// https://encoding.spec.whatwg.org
55

66
const {
7+
ArrayPrototypeMap,
78
Boolean,
89
ObjectDefineProperties,
910
ObjectGetOwnPropertyDescriptors,
1011
ObjectSetPrototypeOf,
1112
ObjectValues,
13+
SafeArrayIterator,
1214
SafeMap,
1315
StringPrototypeSlice,
1416
Symbol,
@@ -32,8 +34,6 @@ const kFatal = Symbol('kFatal');
3234
const kUTF8FastPath = Symbol('kUTF8FastPath');
3335
const kIgnoreBOM = Symbol('kIgnoreBOM');
3436

35-
const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
36-
3737
const {
3838
getConstructorOf,
3939
customInspectSymbol: inspect,
@@ -60,6 +60,7 @@ const {
6060
encodeIntoResults,
6161
encodeUtf8String,
6262
decodeUTF8,
63+
decodeSingleByte,
6364
} = binding;
6465

6566
function validateDecoder(obj) {
@@ -73,6 +74,47 @@ const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
7374

7475
const empty = new FastBuffer();
7576

77+
// Has to be synced with src/
78+
const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([
79+
'ibm866',
80+
'koi8-r',
81+
'koi8-u',
82+
'macintosh',
83+
'x-mac-cyrillic',
84+
'iso-8859-2',
85+
'iso-8859-3',
86+
'iso-8859-4',
87+
'iso-8859-5',
88+
'iso-8859-6',
89+
'iso-8859-7',
90+
'iso-8859-8',
91+
'iso-8859-8-i',
92+
'iso-8859-10',
93+
'iso-8859-13',
94+
'iso-8859-14',
95+
'iso-8859-15',
96+
'iso-8859-16',
97+
'windows-874',
98+
'windows-1250',
99+
'windows-1251',
100+
'windows-1252',
101+
'windows-1253',
102+
'windows-1254',
103+
'windows-1255',
104+
'windows-1256',
105+
'windows-1257',
106+
'windows-1258',
107+
'x-user-defined', // Has to be last, special case
108+
], (e, i) => [e, i])));
109+
110+
const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc);
111+
112+
function createSinglebyteDecoder(encoding, fatal) {
113+
const key = encodingsSinglebyte.get(encoding);
114+
if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
115+
return (buf) => decodeSingleByte(buf, key, fatal);
116+
}
117+
76118
const encodings = new SafeMap([
77119
['unicode-1-1-utf-8', 'utf-8'],
78120
['unicode11utf8', 'utf-8'],
@@ -479,7 +521,7 @@ class TextDecoder {
479521
validateDecoder(this);
480522
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
481523

482-
if (this[kSingleByte]) return this[kSingleByte](parseInput(input));
524+
if (this[kSingleByte]) return this[kSingleByte](input);
483525

484526
const stream = options?.stream;
485527
if (this[kUTF8FastPath]) {

lib/internal/encoding/single-byte.js

Lines changed: 0 additions & 155 deletions
This file was deleted.

node.gyp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
'src/debug_utils.cc',
9090
'src/embedded_data.cc',
9191
'src/encoding_binding.cc',
92+
'src/encoding_singlebyte.cc',
9293
'src/env.cc',
9394
'src/fs_event_wrap.cc',
9495
'src/handle_wrap.cc',
@@ -221,6 +222,7 @@
221222
'src/debug_utils-inl.h',
222223
'src/embedded_data.h',
223224
'src/encoding_binding.h',
225+
'src/encoding_singlebyte.h',
224226
'src/env_properties.h',
225227
'src/env.h',
226228
'src/env-inl.h',

src/encoding_binding.cc

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
3+
#include "encoding_singlebyte.h"
34
#include "env-inl.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
@@ -398,6 +399,73 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
398399
}
399400
}
400401

402+
void BindingData::DecodeSingleByte(const FunctionCallbackInfo<Value>& args) {
403+
Environment* env = Environment::GetCurrent(args);
404+
405+
CHECK_GE(args.Length(), 2);
406+
Isolate* isolate = env->isolate();
407+
408+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
409+
args[0]->IsArrayBufferView())) {
410+
return node::THROW_ERR_INVALID_ARG_TYPE(
411+
isolate,
412+
"The \"input\" argument must be an instance of SharedArrayBuffer, "
413+
"ArrayBuffer or ArrayBufferView.");
414+
}
415+
416+
CHECK(args[1]->IsInt32());
417+
const int encoding = args[1].As<v8::Int32>()->Value();
418+
CHECK(encoding >= 0 && encoding <= kXUserDefined);
419+
420+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
421+
const uint8_t* data = buffer.data();
422+
size_t length = buffer.length();
423+
424+
if (length == 0) return args.GetReturnValue().SetEmptyString();
425+
426+
const char* dataChar = reinterpret_cast<const char*>(data);
427+
if (!simdutf::validate_ascii_with_errors(dataChar, length).error) {
428+
Local<Value> ret;
429+
if (StringBytes::Encode(isolate, dataChar, length, LATIN1).ToLocal(&ret)) {
430+
args.GetReturnValue().Set(ret);
431+
}
432+
return;
433+
}
434+
435+
if (length > static_cast<size_t>(v8::String::kMaxLength)) {
436+
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
437+
return;
438+
}
439+
440+
uint16_t* dst = node::UncheckedMalloc<uint16_t>(length);
441+
if (dst == nullptr) return node::THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);
442+
443+
if (encoding == kXUserDefined) {
444+
// x-user-defined
445+
for (size_t i = 0; i < length; i++) {
446+
dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i];
447+
}
448+
} else {
449+
bool has_fatal = args[2]->IsTrue();
450+
451+
const uint16_t* table = tSingleByteEncodings[encoding];
452+
for (size_t i = 0; i < length; i++) dst[i] = table[data[i]];
453+
454+
const char16_t* dst16 = reinterpret_cast<char16_t*>(dst);
455+
if (has_fatal && fSingleByteEncodings[encoding] &&
456+
simdutf::find(dst16, dst16 + length, 0xfffd) != dst16 + length) {
457+
free(dst);
458+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
459+
isolate, "The encoded data was not valid for this encoding");
460+
}
461+
}
462+
463+
Local<Value> ret;
464+
if (StringBytes::Raw(isolate, dst, length).ToLocal(&ret)) {
465+
args.GetReturnValue().Set(ret);
466+
}
467+
}
468+
401469
void BindingData::ToASCII(const FunctionCallbackInfo<Value>& args) {
402470
Environment* env = Environment::GetCurrent(args);
403471
CHECK_GE(args.Length(), 1);
@@ -430,6 +498,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
430498
SetMethod(isolate, target, "encodeInto", EncodeInto);
431499
SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String);
432500
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
501+
SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte);
433502
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
434503
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
435504
}
@@ -447,6 +516,7 @@ void BindingData::RegisterTimerExternalReferences(
447516
registry->Register(EncodeInto);
448517
registry->Register(EncodeUtf8String);
449518
registry->Register(DecodeUTF8);
519+
registry->Register(DecodeSingleByte);
450520
registry->Register(ToASCII);
451521
registry->Register(ToUnicode);
452522
}

0 commit comments

Comments
 (0)