Skip to content

Commit d6bbc57

Browse files
committed
src: use stack allocation for small string encoding
Use stack-allocated buffers in StringBytes::Encode() for small inputs instead of heap-allocating via UncheckedMalloc for every call. Refs: nodejs/performance#194
1 parent 511a57a commit d6bbc57

File tree

1 file changed

+91
-89
lines changed

1 file changed

+91
-89
lines changed

src/string_bytes.cc

Lines changed: 91 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,49 @@ class ExternString: public ResourceType {
145145
size_t length_;
146146
};
147147

148+
typedef ExternString<String::ExternalOneByteStringResource, char>
149+
ExternOneByteString;
150+
typedef ExternString<String::ExternalStringResource, uint16_t>
151+
ExternTwoByteString;
152+
153+
template <typename EncodeFn>
154+
static MaybeLocal<Value> EncodeOneByteString(Isolate* isolate,
155+
size_t length,
156+
EncodeFn encode) {
157+
// 512B stack threshold: covers common small outputs (hex SHA-256/512, UUIDs).
158+
// Larger thresholds were benchmarked
159+
MaybeStackBuffer<char, 512> buf(length);
160+
encode(buf.out());
161+
// Copy stack-backed data, but release heap-backed storage to V8.
162+
if (buf.IsAllocated()) {
163+
char* data = buf.out();
164+
buf.Release();
165+
return ExternOneByteString::New(isolate, data, length);
166+
}
167+
return String::NewFromOneByte(isolate,
168+
reinterpret_cast<const uint8_t*>(buf.out()),
169+
v8::NewStringType::kNormal,
170+
static_cast<int>(length));
171+
}
148172

149-
typedef ExternString<String::ExternalOneByteStringResource,
150-
char> ExternOneByteString;
151-
typedef ExternString<String::ExternalStringResource,
152-
uint16_t> ExternTwoByteString;
153-
173+
template <typename EncodeFn>
174+
static MaybeLocal<Value> EncodeTwoByteString(Isolate* isolate,
175+
size_t char_length,
176+
EncodeFn encode) {
177+
// 256 uint16_t = 512 bytes on the stack, matching the one-byte
178+
MaybeStackBuffer<uint16_t, 256> buf(char_length);
179+
encode(buf.out());
180+
// Copy stack-backed data, but release heap-backed storage to V8.
181+
if (buf.IsAllocated()) {
182+
uint16_t* data = buf.out();
183+
buf.Release();
184+
return ExternTwoByteString::New(isolate, data, char_length);
185+
}
186+
return String::NewFromTwoByte(isolate,
187+
buf.out(),
188+
v8::NewStringType::kNormal,
189+
static_cast<int>(char_length));
190+
}
154191

155192
template <>
156193
MaybeLocal<Value> ExternOneByteString::NewExternal(
@@ -513,27 +550,23 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
513550
MaybeLocal<String> val;
514551

515552
switch (encoding) {
516-
case BUFFER:
517-
{
518-
auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
519-
Local<v8::Object> buf;
520-
if (!maybe_buf.ToLocal(&buf)) {
521-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
522-
}
523-
return buf;
553+
case BUFFER: {
554+
auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
555+
Local<v8::Object> buf;
556+
if (!maybe_buf.ToLocal(&buf)) {
557+
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
524558
}
559+
return buf;
560+
}
525561

526562
case ASCII:
527563
buflen = keep_buflen_in_range(buflen);
528564
if (simdutf::validate_ascii_with_errors(buf, buflen).error) {
529565
// The input contains non-ASCII bytes.
530-
char* out = node::UncheckedMalloc(buflen);
531-
if (out == nullptr) {
532-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
533-
return MaybeLocal<Value>();
534-
}
535-
nbytes::ForceAscii(buf, out, buflen);
536-
return ExternOneByteString::New(isolate, out, buflen);
566+
567+
return EncodeOneByteString(isolate, buflen, [buf, buflen](char* dst) {
568+
nbytes::ForceAscii(buf, dst, buflen);
569+
});
537570
} else {
538571
return ExternOneByteString::NewFromCopy(isolate, buf, buflen);
539572
}
@@ -557,14 +590,12 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
557590
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
558591
return MaybeLocal<Value>();
559592
}
560-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(u16size);
561-
if (u16size != 0 && dst == nullptr) {
562-
THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);
563-
return MaybeLocal<Value>();
564-
}
565-
size_t utf16len = simdutf::convert_valid_utf8_to_utf16(
566-
buf, buflen, reinterpret_cast<char16_t*>(dst));
567-
return ExternTwoByteString::New(isolate, dst, utf16len);
593+
return EncodeTwoByteString(
594+
isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {
595+
size_t written = simdutf::convert_valid_utf8_to_utf16(
596+
buf, buflen, reinterpret_cast<char16_t*>(dst));
597+
CHECK_EQ(written, u16size);
598+
});
568599
}
569600

570601
val =
@@ -583,77 +614,52 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
583614
case BASE64: {
584615
buflen = keep_buflen_in_range(buflen);
585616
size_t dlen = simdutf::base64_length_from_binary(buflen);
586-
char* dst = node::UncheckedMalloc(dlen);
587-
if (dst == nullptr) {
588-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
589-
return MaybeLocal<Value>();
590-
}
591-
592-
size_t written = simdutf::binary_to_base64(buf, buflen, dst);
593-
CHECK_EQ(written, dlen);
594-
595-
return ExternOneByteString::New(isolate, dst, dlen);
617+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
618+
size_t written = simdutf::binary_to_base64(buf, buflen, dst);
619+
CHECK_EQ(written, dlen);
620+
});
596621
}
597622

598623
case BASE64URL: {
599624
buflen = keep_buflen_in_range(buflen);
600625
size_t dlen =
601626
simdutf::base64_length_from_binary(buflen, simdutf::base64_url);
602-
char* dst = node::UncheckedMalloc(dlen);
603-
if (dst == nullptr) {
604-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
605-
return MaybeLocal<Value>();
606-
}
607-
608-
size_t written =
609-
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
610-
CHECK_EQ(written, dlen);
611-
612-
return ExternOneByteString::New(isolate, dst, dlen);
627+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
628+
size_t written =
629+
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
630+
CHECK_EQ(written, dlen);
631+
});
613632
}
614633

615634
case HEX: {
616635
buflen = keep_buflen_in_range(buflen);
617636
size_t dlen = buflen * 2;
618-
char* dst = node::UncheckedMalloc(dlen);
619-
if (dst == nullptr) {
620-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
621-
return MaybeLocal<Value>();
622-
}
623-
size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);
624-
CHECK_EQ(written, dlen);
625-
626-
return ExternOneByteString::New(isolate, dst, dlen);
637+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
638+
size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);
639+
CHECK_EQ(written, dlen);
640+
});
627641
}
628642

629643
case UCS2: {
630644
buflen = keep_buflen_in_range(buflen);
631645
size_t str_len = buflen / 2;
632646
if constexpr (IsBigEndian()) {
633-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(str_len);
634-
if (str_len != 0 && dst == nullptr) {
635-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
636-
return MaybeLocal<Value>();
637-
}
638-
for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {
639-
// The input is in *little endian*, because that's what Node.js
640-
// expects, so the high byte comes after the low byte.
641-
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
642-
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
643-
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
644-
}
645-
return ExternTwoByteString::New(isolate, dst, str_len);
647+
return EncodeTwoByteString(
648+
isolate, str_len, [buf, str_len](uint16_t* dst) {
649+
for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {
650+
// The input is in *little endian*, because that's what Node.js
651+
// expects, so the high byte comes after the low byte.
652+
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
653+
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
654+
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
655+
}
656+
});
646657
}
647658
if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
648-
// Unaligned data still means we can't directly pass it to V8.
649-
char* dst = node::UncheckedMalloc(buflen);
650-
if (dst == nullptr) {
651-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
652-
return MaybeLocal<Value>();
653-
}
654-
memcpy(dst, buf, buflen);
655-
return ExternTwoByteString::New(
656-
isolate, reinterpret_cast<uint16_t*>(dst), str_len);
659+
return EncodeTwoByteString(
660+
isolate, str_len, [buf, buflen](uint16_t* dst) {
661+
memcpy(dst, buf, buflen);
662+
});
657663
}
658664
return ExternTwoByteString::NewFromCopy(
659665
isolate, reinterpret_cast<const uint16_t*>(buf), str_len);
@@ -675,15 +681,11 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
675681
// https://nodejs.org/api/buffer.html regarding Node's "ucs2"
676682
// encoding specification
677683
if constexpr (IsBigEndian()) {
678-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen);
679-
if (dst == nullptr) {
680-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
681-
return MaybeLocal<Value>();
682-
}
683-
size_t nbytes = buflen * sizeof(uint16_t);
684-
memcpy(dst, buf, nbytes);
685-
CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));
686-
return ExternTwoByteString::New(isolate, dst, buflen);
684+
return EncodeTwoByteString(isolate, buflen, [buf, buflen](uint16_t* dst) {
685+
size_t nbytes = buflen * sizeof(uint16_t);
686+
memcpy(dst, buf, nbytes);
687+
CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));
688+
});
687689
} else {
688690
return ExternTwoByteString::NewFromCopy(isolate, buf, buflen);
689691
}

0 commit comments

Comments
 (0)