@@ -761,9 +761,35 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
761761void SlowByteLengthUtf8 (const FunctionCallbackInfo<Value>& args) {
762762 CHECK (args[0 ]->IsString ());
763763
764- // Fast case: avoid StringBytes on UTF8 string. Jump to v8.
765- size_t result = args[0 ].As <String>()->Utf8LengthV2 (args.GetIsolate ());
766- args.GetReturnValue ().Set (static_cast <uint64_t >(result));
764+ Isolate* isolate = args.GetIsolate ();
765+ Local<String> source = args[0 ].As <String>();
766+
767+ // For small strings, use V8's path for better performance
768+ static constexpr int kSmallStringThreshold = 128 ;
769+ if (source->Length () <= kSmallStringThreshold ) {
770+ size_t result = source->Utf8LengthV2 (isolate);
771+ args.GetReturnValue ().Set (static_cast <uint64_t >(result));
772+ return ;
773+ }
774+
775+ String::ValueView view (isolate, source);
776+ size_t length = view.length ();
777+ size_t utf8_length;
778+
779+ if (view.is_one_byte ()) {
780+ auto data = reinterpret_cast <const char *>(view.data8 ());
781+ simdutf::result result = simdutf::validate_ascii_with_errors (data, length);
782+ if (result.error == simdutf::SUCCESS) {
783+ utf8_length = length; // Pure ASCII, length stays the same
784+ } else {
785+ utf8_length = simdutf::utf8_length_from_latin1 (data, length);
786+ }
787+ } else {
788+ auto data = reinterpret_cast <const char16_t *>(view.data16 ());
789+ utf8_length = simdutf::utf8_length_from_utf16 (data, length);
790+ }
791+
792+ args.GetReturnValue ().Set (static_cast <uint64_t >(utf8_length));
767793}
768794
769795uint32_t FastByteLengthUtf8 (
@@ -776,49 +802,28 @@ uint32_t FastByteLengthUtf8(
776802 CHECK (sourceValue->IsString ());
777803 Local<String> sourceStr = sourceValue.As <String>();
778804
779- if (!sourceStr->IsExternalOneByte ()) {
805+ // For short inputs, use V8's path - function call overhead not worth it
806+ static constexpr int kSmallStringThreshold = 128 ;
807+ if (sourceStr->Length () <= kSmallStringThreshold ) {
780808 return sourceStr->Utf8LengthV2 (isolate);
781809 }
782- auto source = sourceStr->GetExternalOneByteStringResource ();
783- // For short inputs, the function call overhead to simdutf is maybe
784- // not worth it, reserve simdutf for long strings.
785- if (source->length () > 128 ) {
786- return simdutf::utf8_length_from_latin1 (source->data (), source->length ());
787- }
788-
789- uint32_t length = source->length ();
790- const auto input = reinterpret_cast <const uint8_t *>(source->data ());
791-
792- uint32_t answer = length;
793- uint32_t i = 0 ;
794810
795- auto pop = [](uint64_t v) {
796- return static_cast <size_t >(((v >> 7 ) & UINT64_C (0x0101010101010101 )) *
797- UINT64_C (0x0101010101010101 ) >>
798- 56 );
799- };
811+ // For large strings, use simdutf with String::ValueView for direct access
812+ // This is ~6x faster for large strings
813+ String::ValueView view (isolate, sourceStr);
814+ size_t length = view.length ();
800815
801- for (; i + 32 <= length; i += 32 ) {
802- uint64_t v;
803- memcpy (&v, input + i, 8 );
804- answer += pop (v);
805- memcpy (&v, input + i + 8 , 8 );
806- answer += pop (v);
807- memcpy (&v, input + i + 16 , 8 );
808- answer += pop (v);
809- memcpy (&v, input + i + 24 , 8 );
810- answer += pop (v);
811- }
812- for (; i + 8 <= length; i += 8 ) {
813- uint64_t v;
814- memcpy (&v, input + i, 8 );
815- answer += pop (v);
816- }
817- for (; i + 1 <= length; i += 1 ) {
818- answer += input[i] >> 7 ;
816+ if (view.is_one_byte ()) {
817+ auto data = reinterpret_cast <const char *>(view.data8 ());
818+ simdutf::result result = simdutf::validate_ascii_with_errors (data, length);
819+ if (result.error == simdutf::SUCCESS) {
820+ return length; // Pure ASCII, length stays the same
821+ }
822+ return simdutf::utf8_length_from_latin1 (data, length);
819823 }
820824
821- return answer;
825+ auto data = reinterpret_cast <const char16_t *>(view.data16 ());
826+ return simdutf::utf8_length_from_utf16 (data, length);
822827}
823828
824829static CFunction fast_byte_length_utf8 (CFunction::Make(FastByteLengthUtf8));
0 commit comments