Skip to content

Commit 43fd2f8

Browse files
committed
src: use stack allocation for small string encoding
Use stack-allocated buffers in StringBytes::Encode() for small inputs instead of heap-allocating via UncheckedMalloc for every call. Refs: nodejs/performance#194
1 parent 0d7e4b1 commit 43fd2f8

File tree

1 file changed

+99
-89
lines changed

1 file changed

+99
-89
lines changed

src/string_bytes.cc

Lines changed: 99 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,57 @@ class ExternString: public ResourceType {
145145
size_t length_;
146146
};
147147

148+
typedef ExternString<String::ExternalOneByteStringResource, char>
149+
ExternOneByteString;
150+
typedef ExternString<String::ExternalStringResource, uint16_t>
151+
ExternTwoByteString;
152+
153+
template <typename EncodeFn>
154+
static MaybeLocal<Value> EncodeOneByteString(Isolate* isolate,
155+
size_t length,
156+
EncodeFn encode) {
157+
// 512B: covers common small outputs (hex SHA-256/512, UUIDs).
158+
// Larger thresholds were benchmarked and regressed other paths.
159+
static constexpr size_t kStackThreshold = 512;
160+
if (length <= kStackThreshold) {
161+
char stack_buf[kStackThreshold];
162+
encode(stack_buf);
163+
return String::NewFromOneByte(isolate,
164+
reinterpret_cast<const uint8_t*>(stack_buf),
165+
v8::NewStringType::kNormal,
166+
static_cast<int>(length));
167+
}
168+
char* heap_buf = node::UncheckedMalloc<char>(length);
169+
if (heap_buf == nullptr) {
170+
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
171+
return MaybeLocal<Value>();
172+
}
173+
encode(heap_buf);
174+
return ExternOneByteString::New(isolate, heap_buf, length);
175+
}
148176

149-
typedef ExternString<String::ExternalOneByteStringResource,
150-
char> ExternOneByteString;
151-
typedef ExternString<String::ExternalStringResource,
152-
uint16_t> ExternTwoByteString;
153-
177+
template <typename EncodeFn>
178+
static MaybeLocal<Value> EncodeTwoByteString(Isolate* isolate,
179+
size_t char_length,
180+
EncodeFn encode) {
181+
// 512 bytes on the stack, matching the one-byte
182+
static constexpr size_t kStackThreshold = 256;
183+
if (char_length <= kStackThreshold) {
184+
uint16_t stack_buf[kStackThreshold];
185+
encode(stack_buf);
186+
return String::NewFromTwoByte(isolate,
187+
stack_buf,
188+
v8::NewStringType::kNormal,
189+
static_cast<int>(char_length));
190+
}
191+
uint16_t* heap_buf = node::UncheckedMalloc<uint16_t>(char_length);
192+
if (heap_buf == nullptr) {
193+
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
194+
return MaybeLocal<Value>();
195+
}
196+
encode(heap_buf);
197+
return ExternTwoByteString::New(isolate, heap_buf, char_length);
198+
}
154199

155200
template <>
156201
MaybeLocal<Value> ExternOneByteString::NewExternal(
@@ -513,27 +558,23 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
513558
MaybeLocal<String> val;
514559

515560
switch (encoding) {
516-
case BUFFER:
517-
{
518-
auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
519-
Local<v8::Object> buf;
520-
if (!maybe_buf.ToLocal(&buf)) {
521-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
522-
}
523-
return buf;
561+
case BUFFER: {
562+
auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
563+
Local<v8::Object> buf;
564+
if (!maybe_buf.ToLocal(&buf)) {
565+
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
524566
}
567+
return buf;
568+
}
525569

526570
case ASCII:
527571
buflen = keep_buflen_in_range(buflen);
528572
if (simdutf::validate_ascii_with_errors(buf, buflen).error) {
529573
// The input contains non-ASCII bytes.
530-
char* out = node::UncheckedMalloc(buflen);
531-
if (out == nullptr) {
532-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
533-
return MaybeLocal<Value>();
534-
}
535-
nbytes::ForceAscii(buf, out, buflen);
536-
return ExternOneByteString::New(isolate, out, buflen);
574+
575+
return EncodeOneByteString(isolate, buflen, [buf, buflen](char* dst) {
576+
nbytes::ForceAscii(buf, dst, buflen);
577+
});
537578
} else {
538579
return ExternOneByteString::NewFromCopy(isolate, buf, buflen);
539580
}
@@ -557,14 +598,12 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
557598
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
558599
return MaybeLocal<Value>();
559600
}
560-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(u16size);
561-
if (u16size != 0 && dst == nullptr) {
562-
THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);
563-
return MaybeLocal<Value>();
564-
}
565-
size_t utf16len = simdutf::convert_valid_utf8_to_utf16(
566-
buf, buflen, reinterpret_cast<char16_t*>(dst));
567-
return ExternTwoByteString::New(isolate, dst, utf16len);
601+
return EncodeTwoByteString(
602+
isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {
603+
size_t written = simdutf::convert_valid_utf8_to_utf16(
604+
buf, buflen, reinterpret_cast<char16_t*>(dst));
605+
CHECK_EQ(written, u16size);
606+
});
568607
}
569608

570609
val =
@@ -583,77 +622,52 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
583622
case BASE64: {
584623
buflen = keep_buflen_in_range(buflen);
585624
size_t dlen = simdutf::base64_length_from_binary(buflen);
586-
char* dst = node::UncheckedMalloc(dlen);
587-
if (dst == nullptr) {
588-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
589-
return MaybeLocal<Value>();
590-
}
591-
592-
size_t written = simdutf::binary_to_base64(buf, buflen, dst);
593-
CHECK_EQ(written, dlen);
594-
595-
return ExternOneByteString::New(isolate, dst, dlen);
625+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
626+
size_t written = simdutf::binary_to_base64(buf, buflen, dst);
627+
CHECK_EQ(written, dlen);
628+
});
596629
}
597630

598631
case BASE64URL: {
599632
buflen = keep_buflen_in_range(buflen);
600633
size_t dlen =
601634
simdutf::base64_length_from_binary(buflen, simdutf::base64_url);
602-
char* dst = node::UncheckedMalloc(dlen);
603-
if (dst == nullptr) {
604-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
605-
return MaybeLocal<Value>();
606-
}
607-
608-
size_t written =
609-
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
610-
CHECK_EQ(written, dlen);
611-
612-
return ExternOneByteString::New(isolate, dst, dlen);
635+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
636+
size_t written =
637+
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
638+
CHECK_EQ(written, dlen);
639+
});
613640
}
614641

615642
case HEX: {
616643
buflen = keep_buflen_in_range(buflen);
617644
size_t dlen = buflen * 2;
618-
char* dst = node::UncheckedMalloc(dlen);
619-
if (dst == nullptr) {
620-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
621-
return MaybeLocal<Value>();
622-
}
623-
size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);
624-
CHECK_EQ(written, dlen);
625-
626-
return ExternOneByteString::New(isolate, dst, dlen);
645+
return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {
646+
size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);
647+
CHECK_EQ(written, dlen);
648+
});
627649
}
628650

629651
case UCS2: {
630652
buflen = keep_buflen_in_range(buflen);
631653
size_t str_len = buflen / 2;
632654
if constexpr (IsBigEndian()) {
633-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(str_len);
634-
if (str_len != 0 && dst == nullptr) {
635-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
636-
return MaybeLocal<Value>();
637-
}
638-
for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {
639-
// The input is in *little endian*, because that's what Node.js
640-
// expects, so the high byte comes after the low byte.
641-
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
642-
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
643-
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
644-
}
645-
return ExternTwoByteString::New(isolate, dst, str_len);
655+
return EncodeTwoByteString(
656+
isolate, str_len, [buf, str_len](uint16_t* dst) {
657+
for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {
658+
// The input is in *little endian*, because that's what Node.js
659+
// expects, so the high byte comes after the low byte.
660+
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
661+
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
662+
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
663+
}
664+
});
646665
}
647666
if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
648-
// Unaligned data still means we can't directly pass it to V8.
649-
char* dst = node::UncheckedMalloc(buflen);
650-
if (dst == nullptr) {
651-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
652-
return MaybeLocal<Value>();
653-
}
654-
memcpy(dst, buf, buflen);
655-
return ExternTwoByteString::New(
656-
isolate, reinterpret_cast<uint16_t*>(dst), str_len);
667+
return EncodeTwoByteString(
668+
isolate, str_len, [buf, buflen](uint16_t* dst) {
669+
memcpy(dst, buf, buflen);
670+
});
657671
}
658672
return ExternTwoByteString::NewFromCopy(
659673
isolate, reinterpret_cast<const uint16_t*>(buf), str_len);
@@ -675,15 +689,11 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
675689
// https://nodejs.org/api/buffer.html regarding Node's "ucs2"
676690
// encoding specification
677691
if constexpr (IsBigEndian()) {
678-
uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen);
679-
if (dst == nullptr) {
680-
isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));
681-
return MaybeLocal<Value>();
682-
}
683-
size_t nbytes = buflen * sizeof(uint16_t);
684-
memcpy(dst, buf, nbytes);
685-
CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));
686-
return ExternTwoByteString::New(isolate, dst, buflen);
692+
return EncodeTwoByteString(isolate, buflen, [buf, buflen](uint16_t* dst) {
693+
size_t nbytes = buflen * sizeof(uint16_t);
694+
memcpy(dst, buf, nbytes);
695+
CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));
696+
});
687697
} else {
688698
return ExternTwoByteString::NewFromCopy(isolate, buf, buflen);
689699
}

0 commit comments

Comments
 (0)