Skip to content

Commit 2e2020b

Browse files
tsaichienmeta-codesync[bot]
authored andcommitted
Add String::length API
Summary: Add a JSI API to get the length of a JavaScript string in UTF-16 code units. This is equivalent to the "length" property of a JS string. New API: - `IRuntime::length(const String&)`: Returns the number of UTF-16 code units in the string. Default implementation: The default implementation in `jsi::Runtime` calls `utf16()` then returns the size. Note that the default `utf16()` implementation first converts the JS string to UTF-8, then converts that to UTF-16. This intermediate UTF-8 step does not handle lone surrogates correctly, causing code units to be "lost" in the conversion. Runtimes should provide optimized implementations when possible. The following optimized implementations are provided: JSC implementation: Uses the `JSStringGetLength` API for efficient retrieval. V8 implementation: Uses the `v8::String::Length()` API for efficient retrieval. Differential Revision: D96030262
1 parent 4307394 commit 2e2020b

4 files changed

Lines changed: 60 additions & 0 deletions

File tree

packages/react-native/ReactCommon/jsc/JSCRuntime.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ class JSCRuntime : public jsi::Runtime {
179179
jsi::String createStringFromAscii(const char* str, size_t length) override;
180180
jsi::String createStringFromUtf8(const uint8_t* utf8, size_t length) override;
181181
std::string utf8(const jsi::String& /*str*/) override;
182+
size_t length(const jsi::String& /*str*/) override;
182183

183184
jsi::Object createObject() override;
184185
jsi::Object createObject(std::shared_ptr<jsi::HostObject> ho) override;
@@ -745,6 +746,10 @@ std::string JSCRuntime::utf8(const jsi::String& str) {
745746
return JSStringToSTLString(stringRef(str));
746747
}
747748

749+
size_t JSCRuntime::length(const jsi::String& str) {
750+
return JSStringGetLength(stringRef(str));
751+
}
752+
748753
jsi::Object JSCRuntime::createObject() {
749754
return createObject(static_cast<JSObjectRef>(nullptr));
750755
}

packages/react-native/ReactCommon/jsi/jsi/jsi.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,10 @@ Value Runtime::createURIError(const String& msg) {
602602
return callGlobalFunction(*this, "URIError", Value(*this, msg));
603603
}
604604

605+
size_t Runtime::length(const String& str) {
606+
return utf16(str).size();
607+
}
608+
605609
bool Runtime::detached(const ArrayBuffer& buffer) {
606610
Value prop = buffer.getProperty(*this, "detached");
607611
if (!prop.isBool()) {

packages/react-native/ReactCommon/jsi/jsi/jsi.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,10 @@ class JSI_EXPORT IRuntime : public ICast {
675675
/// Create a new URIError object with the message property set to \p message.
676676
virtual Value createURIError(const String& msg) = 0;
677677

678+
/// Returns the number of code units in the string, equivalent to 'length'
679+
/// property of a JS string.
680+
virtual size_t length(const String& str) = 0;
681+
678682
protected:
679683
virtual ~IRuntime() = default;
680684
};
@@ -786,6 +790,8 @@ class JSI_EXPORT Runtime : public IRuntime {
786790
Value createTypeError(const String& msg) override;
787791
Value createURIError(const String& msg) override;
788792

793+
size_t length(const String& str) override;
794+
789795
protected:
790796
friend class Pointer;
791797
friend class PropNameID;
@@ -1105,6 +1111,11 @@ class JSI_EXPORT String : public Pointer {
11051111
return runtime.strictEquals(a, b);
11061112
}
11071113

1114+
/// \return the 'length' property of this JS string.
1115+
size_t length(IRuntime& runtime) const {
1116+
return runtime.length(*this);
1117+
}
1118+
11081119
/// Copies the data in a JS string as utf8 into a C++ string.
11091120
std::string utf8(IRuntime& runtime) const {
11101121
return runtime.utf8(*this);

packages/react-native/ReactCommon/jsi/jsi/test/testlib.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,46 @@ TEST_P(JSITest, StringTest) {
9898
EXPECT_EQ(movedQuux.utf8(rt), "quux2");
9999
}
100100

101+
TEST_P(JSITest, StringLengthTest) {
102+
// Test ASCII string length
103+
String ascii = String::createFromAscii(rt, "hello");
104+
EXPECT_EQ(ascii.length(rt), 5);
105+
106+
// Test empty string
107+
String empty = String::createFromAscii(rt, "");
108+
EXPECT_EQ(empty.length(rt), 0);
109+
110+
// Test euro sign '€' (U+20AC) - BMP character, 1 code unit
111+
String euro = eval("'\\u20AC'").getString(rt);
112+
EXPECT_EQ(euro.length(rt), 1);
113+
114+
// Test codepoint requiring 2 code units (surrogate pair)
115+
// U+1F408 (🐈) is encoded as \uD83D\uDC08 in UTF-16
116+
String emoji = eval("'\\uD83D\\uDC08'").getString(rt);
117+
EXPECT_EQ(emoji.length(rt), 2);
118+
119+
// Test another surrogate pair: U+10000 (first supplementary character)
120+
String supplementary = eval("'\\uD800\\uDC00'").getString(rt);
121+
EXPECT_EQ(supplementary.length(rt), 2);
122+
123+
// Test lone high surrogate (U+D800)
124+
String loneHighSurrogate = eval("'\\uD800'").getString(rt);
125+
EXPECT_EQ(loneHighSurrogate.length(rt), 1);
126+
127+
// Test lone low surrogate (U+DC00)
128+
String loneLowSurrogate = eval("'\\uDC00'").getString(rt);
129+
EXPECT_EQ(loneLowSurrogate.length(rt), 1);
130+
131+
// Test lone surrogate in the middle of a string
132+
String mixedWithLoneSurrogate = eval("'a\\uD800b'").getString(rt);
133+
EXPECT_EQ(mixedWithLoneSurrogate.length(rt), 3);
134+
135+
// Unicode Max Value is U+10FFFF, U+11FFFF is invalid
136+
// But it could be theoretically encoded as \uDBFF\uDFFF
137+
String invalid = eval("'\\uDBFF\\uDFFF'").getString(rt);
138+
EXPECT_EQ(invalid.length(rt), 2);
139+
}
140+
101141
TEST_P(JSITest, ObjectTest) {
102142
eval("x = {1:2, '3':4, 5:'six', 'seven':['eight', 'nine']}");
103143
Object x = rt.global().getPropertyAsObject(rt, "x");

0 commit comments

Comments
 (0)