-
Notifications
You must be signed in to change notification settings - Fork 674
Expand file tree
/
Copy pathustring.cpp
More file actions
754 lines (643 loc) · 24.4 KB
/
ustring.cpp
File metadata and controls
754 lines (643 loc) · 24.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
// Copyright Contributors to the OpenImageIO project.
// SPDX-License-Identifier: Apache-2.0
// https://github.com/AcademySoftwareFoundation/OpenImageIO
#include <cstdlib>
#include <string>
#include <unordered_map>
#include <vector>
#include <OpenImageIO/dassert.h>
#include <OpenImageIO/export.h>
#include <OpenImageIO/strutil.h>
#include <OpenImageIO/sysutil.h>
#include <OpenImageIO/thread.h>
#include <OpenImageIO/unordered_map_concurrent.h>
#include <OpenImageIO/ustring.h>
OIIO_NAMESPACE_BEGIN
namespace pvt {
// If nonzero, the ustring table will be freed at process exit. This is off by
// default because cleanup is unnecessary (the OS reclaims the memory) and can
// add measurable time at exit for large tables. Enable it when using valgrind
// or other leak detectors to suppress false positives. Settable via
// OIIO::attribute("ustring:cleanup",1) or the OIIO_USTRING_CLEANUP
// environment variable.
OIIO_UTIL_API int oiio_ustring_cleanup = Strutil::stoi(
Sysutil::getenv("OIIO_USTRING_CLEANUP"));
} // namespace pvt
OIIO_NAMESPACE_END
OIIO_NAMESPACE_3_1_BEGIN
// Use rw spin locks
typedef spin_rw_mutex ustring_mutex_t;
typedef spin_rw_read_lock ustring_read_lock_t;
typedef spin_rw_write_lock ustring_write_lock_t;
#define PREVENT_HASH_COLLISIONS 1
template<class T> struct identity {
constexpr T operator()(T val) const noexcept { return val; }
};
// #define USTRING_TRACK_NUM_LOOKUPS
template<unsigned BASE_CAPACITY, unsigned POOL_SIZE> struct TableRepMap {
static_assert((BASE_CAPACITY & (BASE_CAPACITY - 1)) == 0,
"BASE_CAPACITY must be a power of 2");
TableRepMap()
: memory_usage(sizeof(*this))
{
resize_entries(BASE_CAPACITY);
allocate_pool_block();
}
~TableRepMap()
{
if (OIIO::pvt::oiio_ustring_cleanup) {
// If requested, take the time to properly destroy all the
// entries, and also the unique_ptr arrays all_pools and
// large_allocs will naturally free their contents after this
// destructor body ends.
destroy_entries();
} else {
// If no ustring cleanup was requested, take the fastest possible
// route to exit, just release the pointers and let them leak!
for (auto& p : all_pools)
(void)p.release();
for (auto& p : large_allocs)
(void)p.release();
}
}
size_t get_memory_usage()
{
ustring_read_lock_t lock(mutex);
return memory_usage;
}
size_t get_num_entries()
{
ustring_read_lock_t lock(mutex);
return num_entries;
}
#ifdef USTRING_TRACK_NUM_LOOKUPS
size_t get_num_lookups()
{
ustring_read_lock_t lock(mutex);
return num_lookups;
}
#endif
const char* lookup(string_view str, uint64_t hash)
{
ustring_read_lock_t lock(mutex);
#ifdef USTRING_TRACK_NUM_LOOKUPS
// NOTE: this simple increment adds a substantial amount of overhead
// so keep it off by default, unless the user really wants it
// NOTE2: note that in debug, asserts like the one in ustring::from_unique
// can skew the number of lookups compared to release builds
++num_lookups;
#endif
size_t pos = hash & mask, dist = 0;
for (;;) {
if (entries[pos] == 0)
return 0;
if (entries[pos]->hashed == hash
&& entries[pos]->length == str.length()
&& strncmp(entries[pos]->c_str(), str.data(), str.length())
== 0)
return entries[pos]->c_str();
++dist;
pos = (pos + dist) & mask; // quadratic probing
}
}
// Look up based on hash only. Return nullptr if not found. Note that if
// the hash is not unique, this will return the first entry that matches
// the hash.
const char* lookup(uint64_t hash)
{
ustring_read_lock_t lock(mutex);
#ifdef USTRING_TRACK_NUM_LOOKUPS
// NOTE: this simple increment adds a substantial amount of overhead
// so keep it off by default, unless the user really wants it
// NOTE2: note that in debug, asserts like the one in ustring::from_unique
// can skew the number of lookups compared to release builds
++num_lookups;
#endif
size_t pos = hash & mask, dist = 0;
for (;;) {
if (entries[pos] == 0)
return 0;
if (entries[pos]->hashed == hash)
return entries[pos]->c_str();
++dist;
pos = (pos + dist) & mask; // quadratic probing
}
}
const char* insert(string_view str, uint64_t hash)
{
ustring_write_lock_t lock(mutex);
size_t pos = hash & mask, dist = 0;
for (;;) {
if (entries[pos] == 0)
break; // found insert pos
if (entries[pos]->hashed == hash
&& entries[pos]->length == str.length()
&& !strncmp(entries[pos]->c_str(), str.data(), str.length())) {
// same string is already inserted, return the one that is
// already in the table
return entries[pos]->c_str();
}
++dist;
pos = (pos + dist) & mask; // quadratic probing
}
ustring::TableRep* rep = make_rep(str, hash);
entries[pos] = rep;
++num_entries;
if (2 * num_entries > mask)
grow(); // maintain 0.5 load factor
return rep->c_str(); // rep is now in the table
}
private:
void grow()
{
// Temporarily hold the old entries while we are copying them
std::vector<ustring::TableRep*> old_entries;
old_entries.swap(entries);
size_t old_num_entries = num_entries;
size_t to_copy = old_num_entries;
// Make bigger space for new entries table and new mask
resize_entries(2 * old_entries.size());
num_entries = old_num_entries;
// Copy each entry from old into the new, recomputing the hash because
// the mask has changd.
for (size_t i = 0; to_copy != 0; i++) {
if (old_entries[i] == nullptr)
continue;
// i is old position, pos will be new position
size_t pos = old_entries[i]->hashed & mask, dist = 0;
for (;;) {
if (entries[pos] == 0)
break;
++dist;
pos = (pos + dist) & mask; // quadratic probing
}
entries[pos] = old_entries[i];
old_entries[i] = nullptr;
to_copy--;
}
// old_entries will free when we exit this function
memory_usage -= sizeof(ustring::TableRep*) * old_entries.size();
}
ustring::TableRep* make_rep(string_view str, uint64_t hash)
{
char* repmem = pool_alloc(sizeof(ustring::TableRep) + str.length() + 1);
return new (repmem) ustring::TableRep(str, hash);
}
// Allocate `len` bytes from the pool. Allocate a new pool block if len
// doesn't fit in the current block. In the unlikely even that len > the
// pool block size, do a separate allocation just for it.
char* pool_alloc(size_t len)
{
// round up to nearest multiple of pointer size to guarantee proper alignment of TableRep objects
len = (len + alignof(ustring::TableRep) - 1)
& ~(alignof(ustring::TableRep) - 1);
if (len >= POOL_SIZE) {
memory_usage += len;
char* p = new char[len];
large_allocs.emplace_back(p);
return p;
}
if (pool_offset + len > POOL_SIZE) {
allocate_pool_block();
}
char* result = pool + pool_offset;
pool_offset += len;
return result;
}
// Allocate one more standard POOL_SIZE block for `pool`
void allocate_pool_block()
{
memory_usage += POOL_SIZE;
pool = new char[POOL_SIZE];
pool_offset = 0;
all_pools.emplace_back(pool);
}
void destroy_entries()
{
// Destroy all TableRep objects. The destructor safely handles the
// case where the internal std::string aliases the pool chars.
for (auto& e : entries) {
if (e) {
e->~TableRep();
e = nullptr;
}
}
}
void resize_entries(size_t newsize)
{
OIIO_CONTRACT_ASSERT(entries.empty());
OIIO_CONTRACT_ASSERT_MSG((newsize & (newsize - 1)) == 0,
"New entries size must be power of 2");
entries.resize(newsize, nullptr);
memory_usage += sizeof(ustring::TableRep*) * entries.size();
num_entries = 0;
mask = newsize - 1;
}
std::vector<ustring::TableRep*> entries;
size_t mask = BASE_CAPACITY - 1;
size_t num_entries = 0;
char* pool = nullptr; // Current pool block we're using
size_t pool_offset = 0; // Next offset within current block
size_t memory_usage = 0; // Total memory usage
std::vector<std::unique_ptr<char[]>> all_pools;
std::vector<std::unique_ptr<char[]>> large_allocs;
#ifdef USTRING_TRACK_NUM_LOOKUPS
size_t num_lookups = 0;
#endif
OIIO_CACHE_ALIGN mutable ustring_mutex_t mutex;
};
#if 0
// Naive map with a single lock for the whole table
typedef TableRepMap<1 << 20, 16 << 20> UstringTable;
#else
// Optimized map broken up into chunks by the top bits of the hash.
// This helps reduce the amount of contention for locks.
struct UstringTable {
using hash_t = ustring::hash_t;
const char* lookup(string_view str, hash_t hash)
{
return whichbin(hash).lookup(str, hash);
}
const char* lookup(hash_t hash) { return whichbin(hash).lookup(hash); }
const char* insert(string_view str, uint64_t hash)
{
return whichbin(hash).insert(str, hash);
}
size_t get_memory_usage()
{
size_t mem = 0;
for (auto& bin : bins)
mem += bin.get_memory_usage();
return mem;
}
size_t get_num_entries()
{
size_t num = 0;
for (auto& bin : bins)
num += bin.get_num_entries();
return num;
}
# ifdef USTRING_TRACK_NUM_LOOKUPS
size_t get_num_lookups()
{
size_t num = 0;
for (auto& bin : bins)
num += bin.get_num_lookups();
return num;
}
# endif
private:
enum {
// NOTE: this guarantees NUM_BINS is a power of 2
BIN_SHIFT = 12,
NUM_BINS = 1 << BIN_SHIFT,
TOP_SHIFT = 8 * sizeof(size_t) - BIN_SHIFT
};
typedef TableRepMap<(1 << 20) / NUM_BINS, (16 << 20) / NUM_BINS> Bin;
Bin bins[NUM_BINS];
Bin& whichbin(uint64_t hash)
{
// use the top bits of the hash to pick a bin
// (lower bits choose position within the table)
return bins[(hash >> TOP_SHIFT) % NUM_BINS];
}
};
#endif
// This string is here so that we can return sensible values of str when the ustring's pointer is NULL
std::string ustring::empty_std_string;
// The reverse map that lets you look up a string by its initial hash.
using ReverseMap
= unordered_map_concurrent<uint64_t, const char*, identity<uint64_t>,
std::equal_to<uint64_t>, 256 /*bins*/>;
namespace { // anonymous
static UstringTable&
ustring_table()
{
static OIIO_CACHE_ALIGN UstringTable table;
return table;
}
static ReverseMap&
reverse_map()
{
static OIIO_CACHE_ALIGN ReverseMap rm;
return rm;
}
// Keep track of any collisions
static std::vector<std::pair<const char*, uint64_t>> all_hash_collisions;
OIIO_CACHE_ALIGN static std::mutex collision_mutex;
} // end anonymous namespace
// Put a ustring in the global scope to force at least one call to
// make_unique to happen before main(), i.e. before threads are launched,
// in order to eliminate any possible thread collision on construction of
// the ustring_table statically declared within make_unique.
namespace pvt {
static ustring ustring_force_make_unique_call("");
}
namespace {
// Definitions to let us access libc++ string internals.
// See libc++ <string> file for details.
// clang-format off
#ifdef _LIBCPP_VERSION
#ifdef _LIBCPP_ALTERNATE_STRING_LAYOUT
struct libcpp_string__long {
std::string::pointer __data_;
std::string::size_type __size_;
std::string::size_type __cap_;
};
# ifdef _LIBCPP_BIG_ENDIAN
enum { libcpp_string__long_mask = 0x1ul };
# else // _LIBCPP_BIG_ENDIAN
enum { libcpp_string__long_mask = ~(std::string::size_type(~0) >> 1) };
# endif // _LIBCPP_BIG_ENDIAN
#else
struct libcpp_string__long {
std::string::size_type __cap_;
std::string::size_type __size_;
std::string::pointer __data_;
};
# ifdef _LIBCPP_BIG_ENDIAN
enum { libcpp_string__long_mask = ~(std::string::size_type(~0) >> 1) };
# else // _LIBCPP_BIG_ENDIAN
enum { libcpp_string__long_mask = 0x1ul };
# endif // _LIBCPP_BIG_ENDIAN
#endif
enum {
libcpp_string__min_cap
= (sizeof(libcpp_string__long) - 1) / sizeof(std::string::value_type) > 2
? (sizeof(libcpp_string__long) - 1) / sizeof(std::string::value_type)
: 2
};
#endif
// clang-format on
} // namespace
ustring::TableRep::TableRep(string_view strref, ustring::hash_t hash)
: hashed(hash)
{
length = strref.length();
memcpy((char*)c_str(), strref.data(), length);
((char*)c_str())[length] = 0;
// We don't want the internal 'std::string str' to redundantly store the
// chars, along with our own allocation. So we use our knowledge of the
// internal structure of std::string (for certain compilers) to force
// the std::string to make it point to our chars! In such a case, the
// destructor will be careful not to allow a deallocation.
#if defined(__GNUC__) && !defined(_LIBCPP_VERSION) \
&& defined(_GLIBCXX_USE_CXX11_ABI) && _GLIBCXX_USE_CXX11_ABI
// NEW gcc ABI
// FIXME -- do something smart with this.
#elif defined(__GNUC__) && !defined(_LIBCPP_VERSION)
// OLD gcc ABI
// It turns out that the first field of a gcc std::string is a pointer
// to the characters within the basic_string::_Rep. We merely redirect
// that pointer, though for std::string to function properly, the chars
// must be preceded immediately in memory by the rest of
// basic_string::_Rep, consisting of length, capacity and refcount
// fields. And we have designed our TableRep to do just that! So now
// we redirect the std::string's pointer to our own characters and its
// mocked-up _Rep.
//
// See /usr/include/c++/VERSION/bits/basic_string.h for the details of
// gcc's std::string implementation.
dummy_capacity = length;
dummy_refcount = 1; // so it never frees
*(const char**)&str = c_str();
OIIO_DASSERT(str.c_str() == c_str() && str.size() == length);
return;
#elif defined(_LIBCPP_VERSION) && !defined(__aarch64__)
// FIXME -- we seem to do the wrong thing with libcpp on Mac M1. Disable
// when on aarch64 for now. Come back and fix then when I have easier
// access to an M1 Mac.
//
// libc++ uses a different std::string representation than gcc. For
// long char sequences, it's two size_t's (capacity & length) followed
// by the pointer to allocated characters. (Gory detail: see the
// definitions above for how it varies slightly with endianness and
// _LIBCPP_ALTERNATE_STRING_LAYOUT.) For short enough sequences, it's a
// single byte length followed immediately by the chars (the total being
// the same size as the long string). There's no savings of space or
// allocations to be had for short strings, so we just let those behave
// as normal. But if it's going to make a long string (we can tell from
// the length), we construct it ourselves, forcing the pointer to point
// to the characters in the TableRep we allocated.
if (length >= libcpp_string__min_cap /* it'll be a "long string" */) {
((libcpp_string__long*)&str)->__cap_ = libcpp_string__long_mask
| (length + 1);
((libcpp_string__long*)&str)->__size_ = length;
((libcpp_string__long*)&str)->__data_ = (char*)c_str();
OIIO_DASSERT(str.c_str() == c_str() && str.size() == length);
return;
}
#endif
// Remaining cases - just assign the internal string. This may result
// in double allocation for the chars. If you care about that, do
// something special for your platform, much like we did for gcc and
// libc++ above. (Windows users, I'm talking to you.)
str = strref;
}
ustring::TableRep::~TableRep()
{
if (str.c_str() == c_str()) {
// This is one of those cases where we've carefully doctored the
// string to point to our allocated characters. To make a safe
// string destroy, now force it to look like an empty string.
new (&str) std::string(); // "placement new"
}
}
const char*
ustring::make_unique(string_view strref)
{
UstringTable& table(ustring_table());
// Eliminate nullptr-referred string views
if (!strref.data())
strref = string_view("", 0);
hash_t hash = Strutil::strhash64(strref);
// This line, if uncommented, lets you force lots of hash collisions:
// hash &= ~hash_t(0xffffff);
#if !PREVENT_HASH_COLLISIONS
// Check the ustring table to see if this string already exists. If so,
// construct from its canonical representation.
// NOTE: all locking is performed internally to the table implementation
const char* result = table.lookup(strref, hash);
if (result)
return result;
auto nul = strref.find('\0');
if (nul != string_view::npos) {
// Strutil::print("ustring::make_unique: string contains nulls @{}/{}: \"{}\"\n",
// strref.find('\0'), strref.size(), strref);
// OIIO_ASSERT(strref.find('\0') == string_view::npos &&
// "ustring::make_unique() does not support embedded nulls");
strref = strref.substr(0, nul);
hash = Strutil::strhash64(strref);
result = table.lookup(strref, hash);
if (result)
return result;
}
// Strutil::print("ADDED ustring \"{}\" {:08x}\n", strref, hash);
return table.insert(strref, hash);
#else
// Check the ustring table to see if this string already exists with the
// default hash. If so, we're done. This is by far the common case --
// most lookups already exist in the table, and hash collisions are
// extremely rare.
const char* result = table.lookup(strref, hash);
if (result)
return result;
// ustring doesn't allow strings with embedded nul characters. Before we
// go any further, trim beyond any nul and rehash.
auto nul = strref.find('\0');
if (nul != string_view::npos) {
// Strutil::print("ustring::make_unique: string contains nulls @{}/{}: \"{}\"\n",
// strref.find('\0'), strref.size(), strref);
// OIIO_ASSERT(strref.find('\0') == string_view::npos &&
// "ustring::make_unique() does not support embedded nulls");
strref = strref.substr(0, nul);
hash = Strutil::strhash64(strref);
result = table.lookup(strref, hash);
if (result)
return result;
}
// We did not find it. There are two possibilities: (1) the string is in
// the table but has a different hash because it collided; or (2) the
// string is not yet in the table.
// Thread safety by locking reverse_map's bin corresponding to our
// original hash. This will prevent any potentially colliding ustring
// from being added to either table. But ustrings whose hashes go to
// different bins of the reverse map (which by definition cannot clash)
// are allowed to be added concurrently.
auto& rm(reverse_map());
size_t bin = rm.lock_bin(hash);
hash_t orighash = hash;
size_t binbits = orighash & (~rm.nobin_mask());
size_t num_rehashes = 0;
while (1) {
auto rev = rm.find(hash, false);
// rev now either holds an iterator into the reverse map for a
// record that has this hash, or else it's end().
if (rev == rm.end()) {
// That hash is unused, insert the string with that hash into
// the ustring table, and insert the hash with the unique char
// pointer into the reverse_map.
result = table.insert(strref, hash);
bool ok = rm.insert(hash, result, false);
// Strutil::print("ADDED \"{}\" {:08x}\n", strref, hash);
OIIO_ASSERT(ok && "thread safety failure");
break;
}
// Something uses this hash. Is it our string?
if (!strncmp(rev->second, strref.data(), strref.size())) {
// It is our string, already in this hash slot!
result = rev->second;
break;
}
// Rehash, but keep the bin bits identical so we always rehash into
// the same (locked) bin. But watch out for rehashing that returns the
// identical non-bin part as before -- that will enter an infinite
// loop if we're not careful!
hash_t old_nonbin_bits = hash & rm.nobin_mask();
hash_t new_nonbin_bits = farmhash::Fingerprint(hash) & rm.nobin_mask();
if (OIIO_UNLIKELY(old_nonbin_bits == new_nonbin_bits)) {
new_nonbin_bits = (new_nonbin_bits + 7) & rm.nobin_mask();
# ifndef NDEBUG
std::string s = Strutil::escape_chars(strref);
print(stderr, "IDEMPOTENT RE-HASH! |{}|\n", s);
for (auto c : s)
print(stderr, c > 0 ? "{:c}" : "\\{:03o}",
static_cast<unsigned char>(c));
print(stderr, "\n");
# endif
}
hash = binbits | new_nonbin_bits;
++num_rehashes;
// Strutil::print("COLLISION \"{}\" {:08x} vs \"{}\"\n",
// strref, orighash, rev->second);
{
std::lock_guard<std::mutex> lock(collision_mutex);
all_hash_collisions.emplace_back(rev->second, rev->first);
}
OIIO_ASSERT(num_rehashes < 100000); // Something is very wrong
}
rm.unlock_bin(bin);
if (num_rehashes) {
std::lock_guard<std::mutex> lock(collision_mutex);
all_hash_collisions.emplace_back(result, orighash);
}
return result;
#endif
}
ustring
ustring::from_hash(hash_t hash)
{
UstringTable& table(ustring_table());
return from_unique(table.lookup(hash));
}
ustring
ustring::concat(string_view s, string_view t)
{
size_t sl = s.size();
size_t tl = t.size();
size_t len = sl + tl;
std::unique_ptr<char[]> heap_buf;
char local_buf[256];
char* buf = local_buf;
if (len > sizeof(local_buf)) {
heap_buf.reset(new char[len]);
buf = heap_buf.get();
}
memcpy(buf, s.data(), sl);
memcpy(buf + sl, t.data(), tl);
return ustring(buf, len);
}
std::string
ustring::getstats(bool verbose)
{
std::ostringstream out;
out.imbue(std::locale::classic()); // Force "C" locale with '.' decimal
size_t n_e = total_ustrings();
size_t mem = memory();
if (verbose) {
out << "ustring statistics:\n";
#ifdef USTRING_TRACK_NUM_LOOKUPS
out << " ustring requests: " << ustring_table().get_num_lookups()
<< "\n";
#endif
out << " unique strings: " << n_e << "\n";
out << " ustring memory: " << Strutil::memformat(mem) << "\n";
#ifndef NDEBUG
std::vector<ustring> collisions;
hash_collisions(&collisions);
if (collisions.size()) {
out << " Hash collisions: " << collisions.size() << "\n";
for (auto c : collisions)
out << Strutil::fmt::format(" {} \"{}\"\n", c.hash(), c);
}
#endif
} else {
#ifdef USTRING_TRACK_NUM_LOOKUPS
out << "requests: " << ustring_table().get_num_lookups() << ", ";
#endif
out << "unique " << n_e << ", " << Strutil::memformat(mem);
}
return out.str();
}
size_t
ustring::hash_collisions(std::vector<ustring>* collisions)
{
std::lock_guard<std::mutex> lock(collision_mutex);
if (collisions)
for (const auto& c : all_hash_collisions)
collisions->emplace_back(ustring::from_unique(c.first));
return all_hash_collisions.size();
}
size_t
ustring::total_ustrings()
{
UstringTable& table(ustring_table());
return table.get_num_entries();
}
size_t
ustring::memory()
{
UstringTable& table(ustring_table());
return table.get_memory_usage();
}
OIIO_NAMESPACE_3_1_END