Skip to content

Commit d2415b6

Browse files
authored
[NFC] Use pascal-style string storage for IString/Name (#8662)
Previously our interned strings were std::string_view, which means a pointer and a size. Instead, store the size as a header alongside the string data. As we have many views on the same data, this reduces memory usage, basically anywhere we use a Name, which is every Call, GlobalSet, Load, etc. I see a 5% RAM reduction. Changes to instructions, branches, and wall time are very very small. This removes the "reuse" optimization where we could reuse a string from the input. That was error-prone and a bad idea anyhow. In practice, it might have helped a little, but this new model is simpler and saves a lot more. (We can't reuse now since we need to convert to pascal-style storage anyhow.)
1 parent e7f8ce2 commit d2415b6

22 files changed

Lines changed: 266 additions & 121 deletions

src/ir/names.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ inline Name getValidName(Name root,
5656
if (check(root)) {
5757
return root;
5858
}
59-
auto prefixed = std::string(root.str) + separator;
59+
auto prefixed = std::string(root.view()) + separator;
6060
Index num = hint;
6161
while (1) {
6262
auto name = prefixed + std::to_string(num);

src/ir/possible-contents.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,8 +1211,7 @@ struct InfoCollector
12111211
addRoot(curr, PossibleContents::exactType(curr->type));
12121212
}
12131213
void visitStringConst(StringConst* curr) {
1214-
addRoot(curr,
1215-
PossibleContents::literal(Literal(std::string(curr->string.str))));
1214+
addRoot(curr, PossibleContents::literal(Literal(curr->string.view())));
12161215
}
12171216
void visitStringMeasure(StringMeasure* curr) {
12181217
// TODO: optimize when possible

src/parser/contexts.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1996,7 +1996,7 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx>, AnnotationParserCtx {
19961996
void setSrcLoc(const std::vector<Annotation>& annotations) {
19971997
const Annotation* annotation = nullptr;
19981998
for (auto& a : annotations) {
1999-
if (a.kind.str == std::string_view("src")) {
1999+
if (a.kind.view() == std::string_view("src")) {
20002000
annotation = &a;
20012001
}
20022002
}

src/passes/Asyncify.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1730,7 +1730,7 @@ struct AsyncifyLocals : public WalkerPass<PostWalker<AsyncifyLocals>> {
17301730
} // anonymous namespace
17311731

17321732
static std::string getFullImportName(Name module, Name base) {
1733-
return std::string(module.str) + '.' + base.toString();
1733+
return module.toString() + '.' + base.toString();
17341734
}
17351735

17361736
struct Asyncify : public Pass {

src/passes/J2CLOpts.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ class ConstantHoister : public WalkerPass<PostWalker<ConstantHoister>> {
197197
}
198198

199199
Name getEnclosingClass(Name name) {
200-
return Name(name.str.substr(name.str.find_last_of('@')));
200+
return Name(name.view().substr(name.view().find_last_of('@')));
201201
}
202202

203203
AssignmentCountMap& assignmentCounts;

src/passes/MinifyImportsAndExports.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ struct MinifyImportsAndExports : public Pass {
112112
std::cout << ',';
113113
}
114114
std::cout << "\n [";
115-
String::printEscaped(std::cout, key.first.str) << ", ";
116-
String::printEscaped(std::cout, key.second.str) << ", ";
117-
String::printEscaped(std::cout, new_.str) << "]";
115+
String::printEscaped(std::cout, key.first.view()) << ", ";
116+
String::printEscaped(std::cout, key.second.view()) << ", ";
117+
String::printEscaped(std::cout, new_.view()) << "]";
118118
}
119119
}
120120
std::cout << "\n ],\n\"exports\": [";
@@ -127,8 +127,8 @@ struct MinifyImportsAndExports : public Pass {
127127
std::cout << ',';
128128
}
129129
std::cout << "\n [";
130-
String::printEscaped(std::cout, key.second.str) << ", ";
131-
String::printEscaped(std::cout, new_.str) << "]";
130+
String::printEscaped(std::cout, key.second.view()) << ", ";
131+
String::printEscaped(std::cout, new_.view()) << "]";
132132
}
133133
}
134134
std::cout << "\n ]\n";

src/passes/Print.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,7 +2566,7 @@ struct PrintExpressionContents
25662566
// Re-encode from WTF-16 to WTF-8.
25672567
std::stringstream wtf8;
25682568
[[maybe_unused]] bool valid =
2569-
String::convertWTF16ToWTF8(wtf8, curr->string.str);
2569+
String::convertWTF16ToWTF8(wtf8, curr->string.view());
25702570
assert(valid);
25712571
// TODO: Use wtf8.view() once we have C++20.
25722572
String::printEscaped(o, wtf8.str());
@@ -3190,7 +3190,7 @@ void PrintSExpression::visitExport(Export* curr) {
31903190
o << '(';
31913191
printMedium(o, "export ");
31923192
std::stringstream escaped;
3193-
String::printEscaped(escaped, curr->name.str);
3193+
String::printEscaped(escaped, curr->name.view());
31943194
printText(o, escaped.str(), false) << " (";
31953195
switch (curr->kind) {
31963196
case ExternalKind::Function:
@@ -3219,8 +3219,8 @@ void PrintSExpression::visitExport(Export* curr) {
32193219
void PrintSExpression::emitImportHeader(Importable* curr) {
32203220
printMedium(o, "import ");
32213221
std::stringstream escapedModule, escapedBase;
3222-
String::printEscaped(escapedModule, curr->module.str);
3223-
String::printEscaped(escapedBase, curr->base.str);
3222+
String::printEscaped(escapedModule, curr->module.view());
3223+
String::printEscaped(escapedBase, curr->base.view());
32243224
printText(o, escapedModule.str(), false) << ' ';
32253225
printText(o, escapedBase.str(), false) << ' ';
32263226
}

src/passes/StringLifting.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ struct StringLifting : public Pass {
7070
// Encode from WTF-8 to WTF-16.
7171
auto wtf8 = global->base;
7272
std::stringstream wtf16;
73-
bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.str);
73+
bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.view());
7474
if (!valid) {
7575
Fatal() << "Bad string to lift: " << wtf8;
7676
}
77-
importedStrings[global->name] = wtf16.str();
77+
importedStrings[global->name] = wtf16.view();
7878
found = true;
7979
}
8080
}
@@ -101,7 +101,7 @@ struct StringLifting : public Pass {
101101
continue;
102102
}
103103
// The index in the array is the basename.
104-
Index index = std::stoi(std::string(global->base.str));
104+
Index index = std::stoi(std::string(global->base.view()));
105105
if (index >= array.size()) {
106106
Fatal() << "StringLifting: bad index in string.const section";
107107
}
@@ -222,7 +222,7 @@ struct StringLifting : public Pass {
222222
auto iter = parent.importedStrings.find(curr->name);
223223
if (iter != parent.importedStrings.end()) {
224224
auto wtf16 = iter->second;
225-
replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.str));
225+
replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.view()));
226226
modified = true;
227227
}
228228
}

src/passes/StringLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ struct StringGathering : public Pass {
153153
// Re-encode from WTF-16 to WTF-8 to make the name easier to read.
154154
std::stringstream wtf8;
155155
[[maybe_unused]] bool valid =
156-
String::convertWTF16ToWTF8(wtf8, string.str);
156+
String::convertWTF16ToWTF8(wtf8, string.view());
157157
assert(valid);
158158
// Then escape it because identifiers must be valid UTF-8.
159159
// TODO: Use wtf8.view() and escaped.view() once we have C++20.
@@ -246,7 +246,7 @@ struct StringLowering : public StringGathering {
246246
if (auto* c = global->init->dynCast<StringConst>()) {
247247
std::stringstream utf8;
248248
if (useMagicImports &&
249-
String::convertUTF16ToUTF8(utf8, c->string.str)) {
249+
String::convertUTF16ToUTF8(utf8, c->string.view())) {
250250
global->module = stringConstsModule;
251251
global->base = Name(utf8.str());
252252
} else {
@@ -263,7 +263,7 @@ struct StringLowering : public StringGathering {
263263
} else {
264264
json << ',';
265265
}
266-
String::printEscapedJSON(json, c->string.str);
266+
String::printEscapedJSON(json, c->string.view());
267267
jsonImportIndex++;
268268
}
269269
global->init = nullptr;

src/support/istring.cpp

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,39 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include <limits>
18+
#include <mutex>
19+
1720
#include "istring.h"
1821
#include "mixed_arena.h"
1922

2023
namespace wasm {
2124

22-
std::string_view IString::interned(std::string_view s, bool reuse) {
23-
// We need a set of string_views that can be modified in-place to minimize
24-
// the number of lookups we do. Since set elements cannot normally be
25-
// modified, wrap the string_views in a container that provides mutability
26-
// even through a const reference.
27-
struct MutStringView {
28-
mutable std::string_view str;
29-
MutStringView(std::string_view str) : str(str) {}
30-
};
31-
struct MutStringViewHash {
32-
size_t operator()(const MutStringView& mut) const {
33-
return std::hash<std::string_view>{}(mut.str);
25+
const char* IString::interned(std::string_view s) {
26+
// A set of interned Views, i.e., that contains our pascal-style strings. We
27+
// need to query this using a std::string_view, as that is what we receive as
28+
// input (turning it into pascal-style storage would add overhead). To do so,
29+
// use overloading in the hash and equality functions (which works thanks to
30+
// `is_transparent`).
31+
struct InternedHash {
32+
using is_transparent = void;
33+
size_t operator()(View v) const {
34+
return std::hash<std::string_view>{}(v.view());
35+
}
36+
size_t operator()(std::string_view sv) const {
37+
return std::hash<std::string_view>{}(sv);
3438
}
3539
};
36-
struct MutStringViewEqual {
37-
bool operator()(const MutStringView& a, const MutStringView& b) const {
38-
return a.str == b.str;
40+
struct InternedEqual {
41+
using is_transparent = void;
42+
bool operator()(View a, View b) const { return a.view() == b.view(); }
43+
bool operator()(std::string_view a, View b) const { return a == b.view(); }
44+
bool operator()(View a, std::string_view b) const { return a.view() == b; }
45+
bool operator()(std::string_view a, std::string_view b) const {
46+
return a == b;
3947
}
4048
};
41-
using StringSet =
42-
std::unordered_set<MutStringView, MutStringViewHash, MutStringViewEqual>;
49+
using StringSet = std::unordered_set<View, InternedHash, InternedEqual>;
4350

4451
// The authoritative global set of interned string views.
4552
static StringSet globalStrings;
@@ -54,34 +61,37 @@ std::string_view IString::interned(std::string_view s, bool reuse) {
5461
// A thread-local cache of strings to reduce contention.
5562
thread_local static StringSet localStrings;
5663

57-
auto [localIt, localInserted] = localStrings.insert(s);
58-
if (!localInserted) {
64+
if (auto it = localStrings.find(s); it != localStrings.end()) {
5965
// We already had a local copy of this string.
60-
return localIt->str;
66+
return it->internal;
6167
}
6268

6369
// No copy yet in the local cache. Check the global cache.
6470
std::unique_lock<std::mutex> lock(mutex);
65-
auto [globalIt, globalInserted] = globalStrings.insert(s);
66-
if (!globalInserted) {
71+
if (auto it = globalStrings.find(s); it != globalStrings.end()) {
6772
// We already had a global copy of this string. Cache it locally.
68-
localIt->str = globalIt->str;
69-
return localIt->str;
73+
localStrings.insert(*it);
74+
return it->internal;
7075
}
7176

72-
if (!reuse) {
73-
// We have a new string, but it doesn't have a stable address. Create a copy
74-
// of the data at a stable address we can use. Make sure it is null
75-
// terminated so legacy uses that get a C string still work.
76-
char* data = (char*)arena.allocSpace(s.size() + 1, 1);
77-
std::copy(s.begin(), s.end(), data);
78-
data[s.size()] = '\0';
79-
s = std::string_view(data, s.size());
80-
}
77+
// We have a new string. Create a copy of the data at a stable address with a
78+
// header we can use. Make sure it is null terminated so legacy uses that get
79+
// a C string still work.
80+
size_t size = s.size();
81+
// The string's size must fit in 32 bits.
82+
assert(size <= std::numeric_limits<uint32_t>::max());
83+
char* buffer =
84+
(char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t));
85+
*(uint32_t*)(buffer) = size;
86+
char* data = buffer + sizeof(uint32_t);
87+
std::copy(s.begin(), s.end(), data);
88+
data[size] = '\0';
8189

8290
// Intern our new string.
83-
localIt->str = globalIt->str = s;
84-
return s;
91+
View v{data};
92+
globalStrings.insert(v);
93+
localStrings.insert(v);
94+
return data;
8595
}
8696

8797
} // namespace wasm

0 commit comments

Comments
 (0)