From 1797c00cbf20f302df043d5e34bc4f0ef2c1710a Mon Sep 17 00:00:00 2001 From: Yohei Yukawa Date: Mon, 11 May 2026 18:05:35 +0900 Subject: [PATCH] Add constinit global string holder classes These are utility classes for global strings whose value is known only at runtime. They are motivated by the on-going effort to remove Singleton, which has surfaced several places in system_util.cc (directory caches, user-SID lookup, program-invocation-name holder) where the singleton wraps a string lookup that must run *after* the surrounding modules are ready. A function-local "static std::string" would be the obvious replacement, but reintroduces two issues that Singleton callers have learnt to avoid: * On Windows, function-local static initialization is not safe to invoke from DllMain because the compiler-generated guard can take locks that interact badly with the Loader Lock. * The resulting destructor runs at process exit in unspecified order relative to other globals. Both classes address these by being trivially destructible (values that don't fit inline live in an intentionally-leaked heap buffer) and by keeping all foreign code outside the publish lock. ConstInitImmutableString is the publish-once, read-many variant. It takes an IdempotentInitializer function pointer that is invoked lock-free on the first GetOrInit(), so the class is safe to use from DllMain even when the initializer itself acquires the Loader Lock. Subsequent reads return a string_view over stable, NUL-terminated storage on a lock-free fast path: #include "base/strings/const_init_immutable_string.h" constinit mozc::ConstInitImmutableString<256> kProgramFilesX86( []() -> std::string { return ComputeProgramFilesX86Path(); }); absl::string_view path = kProgramFilesX86.GetOrInit(); ConstInitMutableString is the set-many, snapshot-read variant. Set atomically replaces the stored value; Get returns a std::string snapshot under a spinlock-protected copy. Empty doubles as the "never set" sentinel; callers that want a lazy default observe empty and Set it themselves: #include "base/strings/const_init_mutable_string.h" constinit mozc::ConstInitMutableString<256> g_invocation_name; void Init(absl::string_view name) { g_invocation_name.Set(name); } std::string GetName() { return g_invocation_name.Get(); } Implementation notes: * Only the publish step is serialized, by a small atomic_flag spinlock with PAUSE / YIELD hints. Heap allocation for the fallback buffer is staged before the spinlock is taken, so the critical section is bounded by fixed_array_size and a throwing bad_alloc cannot leave the spinlock latched. * Constructors are consteval, so instances must be constant-initialized. * Common helpers (spin hint, RAII spinlock guard, heap-fallback staging and commit) live in base/strings/internal/const_init_string_helpers.{h,cc}. Templates that operate on CharT data are explicitly instantiated for char and wchar_t so they live in one TU. --- src/base/strings/BUILD.bazel | 32 ++++ .../strings/const_init_immutable_string.h | 142 ++++++++++++++++ .../const_init_immutable_string_test.cc | 78 +++++++++ src/base/strings/const_init_mutable_string.h | 140 ++++++++++++++++ .../strings/const_init_mutable_string_test.cc | 82 +++++++++ src/base/strings/internal/BUILD.bazel | 6 + .../internal/const_init_string_helpers.cc | 79 +++++++++ .../internal/const_init_string_helpers.h | 158 ++++++++++++++++++ 8 files changed, 717 insertions(+) create mode 100644 src/base/strings/const_init_immutable_string.h create mode 100644 src/base/strings/const_init_immutable_string_test.cc create mode 100644 src/base/strings/const_init_mutable_string.h create mode 100644 src/base/strings/const_init_mutable_string_test.cc create mode 100644 src/base/strings/internal/const_init_string_helpers.cc create mode 100644 src/base/strings/internal/const_init_string_helpers.h diff --git a/src/base/strings/BUILD.bazel b/src/base/strings/BUILD.bazel index 059e6ec9eb..98b15e7d0f 100644 --- a/src/base/strings/BUILD.bazel +++ b/src/base/strings/BUILD.bazel @@ -75,6 +75,38 @@ mozc_cc_test( ], ) +mozc_cc_library( + name = "const_init_immutable_string", + hdrs = ["const_init_immutable_string.h"], + deps = ["//base/strings/internal:const_init_string_helpers"], +) + +mozc_cc_test( + name = "const_init_immutable_string_test", + size = "small", + srcs = ["const_init_immutable_string_test.cc"], + deps = [ + ":const_init_immutable_string", + "//testing:gunit_main", + ], +) + +mozc_cc_library( + name = "const_init_mutable_string", + hdrs = ["const_init_mutable_string.h"], + deps = [":const_init_immutable_string"], +) + +mozc_cc_test( + name = "const_init_mutable_string_test", + size = "small", + srcs = ["const_init_mutable_string_test.cc"], + deps = [ + ":const_init_mutable_string", + "//testing:gunit_main", + ], +) + mozc_cc_library( name = "japanese", srcs = [ diff --git a/src/base/strings/const_init_immutable_string.h b/src/base/strings/const_init_immutable_string.h new file mode 100644 index 0000000000..ec9329dfa5 --- /dev/null +++ b/src/base/strings/const_init_immutable_string.h @@ -0,0 +1,142 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_ +#define MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base/strings/internal/const_init_string_helpers.h" + +namespace mozc { + +// A utility class to deal with constant global strings whose value is known +// only at runtime. It has the following capabilities: +// 1. It allows the library users to lazily initialize the string by calling +// `GetOrInit()` only after it becomes ready, e.g. only after dependent +// modules are fully loaded. +// 2. It is thread-safe, meaning that multiple threads can call `GetOrInit()` +// concurrently without causing data race, with an assumption that the +// `idempotent_initializer` is idempotent (i.e., it always returns the same +// value when called multiple times) and thread-safe. +// 3. It guarantees that the string is null-terminated. +// 4. It is trivially destructible, which means it can be used in static +// storage duration objects without causing destructor order issues, with a +// caveat that it may leak memory if the string is larger than the fixed +// array size provided by the template parameter. +// +// Once the string is published it cannot be replaced. See +// `ConstInitMutableString` for a variant that additionally supports +// thread-safe `Set()`. +template +class ConstInitImmutableString { + public: + using StringT = std::basic_string; + using StringViewT = std::basic_string_view; + + // A data initializer that is guaranteed to return the same value no matter + // how many times it is called. It must also be reentrant and safe to call + // from multiple threads concurrently. + using IdempotentInitializer = std::add_pointer_t; + + ConstInitImmutableString() = delete; + ~ConstInitImmutableString() = default; + + consteval explicit ConstInitImmutableString( + IdempotentInitializer idempotent_initializer) + : idempotent_initializer_(idempotent_initializer) {} + + [[nodiscard]] StringViewT GetOrInit() { + // Fast path: result already published. + if (const CharT* ptr = result_ptr_.load(std::memory_order::acquire)) + [[likely]] { + return StringViewT(ptr, result_size_); + } + + // The initializer is called outside any lock so that no foreign code + // runs under a lock held by this class. This avoids classic deadlock + // scenarios such as Win32 Loader Lock recursion when `GetOrInit()` is + // called from `DllMain` and the initializer internally invokes + // `LoadLibrary`. Racing threads may each call it; per the + // `IdempotentInitializer` contract that is acceptable. + // https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-best-practices#deadlocks-caused-by-lock-order-inversion + const StringT value = idempotent_initializer_(); + + // Stage any heap fallback outside the spinlock so that the critical + // section is bounded by `fixed_array_size` and a throwing + // allocation cannot leave the spinlock held. + std::unique_ptr heap_fallback = + const_init_string_internal::StageHeapFallback(value, + std::size(value_)); + + std::lock_guard l( + committing_); + if (const CharT* ptr = result_ptr_.load(std::memory_order::relaxed)) { + // Another thread already published; staged `heap_fallback` is + // freed by its destructor when this scope unwinds. + return StringViewT(ptr, result_size_); + } + + // Winner: any staged heap buffer is intentionally leaked for the + // remainder of the process so that `ConstInitImmutableString` itself + // remains trivially destructible. + CharT* dest = const_init_string_internal::CommitStagedValue( + value, heap_fallback, value_.data()); + result_size_ = value.size(); + result_ptr_.store(dest, std::memory_order::release); + return StringViewT(dest, value.size()); + } + + private: + std::atomic result_ptr_ = nullptr; + size_t result_size_ = 0; + const_init_string_internal::TrivialMicroSpinLock committing_; + std::array value_ = {}; + const IdempotentInitializer idempotent_initializer_; +}; + +// Verify the trivial destructibility contract for both supported `CharT` +// instantiations so misuse (e.g. accidentally adding a non-trivial member) +// is caught at compile time even if no instance is constructed. +static_assert( + std::is_trivially_destructible_v>); +static_assert( + std::is_trivially_destructible_v>); + +} // namespace mozc + +#endif // MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_ diff --git a/src/base/strings/const_init_immutable_string_test.cc b/src/base/strings/const_init_immutable_string_test.cc new file mode 100644 index 0000000000..55100a435b --- /dev/null +++ b/src/base/strings/const_init_immutable_string_test.cc @@ -0,0 +1,78 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/strings/const_init_immutable_string.h" + +#include +#include + +#include "testing/gunit.h" + +namespace mozc::strings { +namespace { + +constinit ConstInitImmutableString<256> g_value_simple_init( + []() -> std::string { return "Mozc"; }); + +TEST(ConstInitImmutableStringTest, SimpleInit) { + EXPECT_EQ(g_value_simple_init.GetOrInit(), "Mozc"); + EXPECT_EQ(g_value_simple_init.GetOrInit(), "Mozc"); +} + +TEST(ConstInitImmutableStringTest, NullTermination) { + const auto value = g_value_simple_init.GetOrInit(); + EXPECT_EQ(value.data()[value.size()], '\0'); +} + +constexpr std::string_view kLongValue = + "this string is longer than the inline buffer"; + +constinit ConstInitImmutableString<8> g_value_overflow([]() -> std::string { + return std::string("this string is longer than the inline buffer"); +}); + +TEST(ConstInitImmutableStringTest, HeapFallback) { + const auto value = g_value_overflow.GetOrInit(); + EXPECT_EQ(value, kLongValue); + EXPECT_EQ(value.data()[value.size()], '\0'); + // Repeated calls return the same pointer (stable storage). + EXPECT_EQ(g_value_overflow.GetOrInit().data(), value.data()); +} + +constinit ConstInitImmutableString<256, wchar_t> g_value_wide( + []() -> std::wstring { return L"WideMozc"; }); + +TEST(ConstInitImmutableStringTest, WideChar) { + EXPECT_EQ(g_value_wide.GetOrInit(), std::wstring_view(L"WideMozc")); + const auto value = g_value_wide.GetOrInit(); + EXPECT_EQ(value.data()[value.size()], L'\0'); +} + +} // namespace +} // namespace mozc::strings diff --git a/src/base/strings/const_init_mutable_string.h b/src/base/strings/const_init_mutable_string.h new file mode 100644 index 0000000000..666c1c295b --- /dev/null +++ b/src/base/strings/const_init_mutable_string.h @@ -0,0 +1,140 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MOZC_BASE_STRINGS_CONST_INIT_MUTABLE_STRING_H_ +#define MOZC_BASE_STRINGS_CONST_INIT_MUTABLE_STRING_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base/strings/internal/const_init_string_helpers.h" + +namespace mozc { + +// A variant of `ConstInitImmutableString` that additionally supports +// thread-safe `Set()`. Like `ConstInitImmutableString`, +// `ConstInitMutableString` is trivially destructible and may be used in +// `constinit` globals. +// +// Semantics: +// * `Get()` returns a snapshot of the most recent `Set` value, or an +// empty string if `Set` has never been called. Empty thus doubles as +// the "never set" sentinel; callers that need lazy default behaviour +// should compute the default themselves on observing empty and call +// `Set` to publish it. +// * `Set(value)` atomically replaces the stored value. +// +// Storage: +// * Values that fit in `fixed_array_size` characters (including the +// terminating NUL) live in an inline array. +// * Larger values live in a heap buffer. When `Set` replaces a heap-backed +// value, the prior heap allocation is freed before `Set` returns. +// * The currently-installed heap buffer (if any) is intentionally leaked at +// process exit so that the class itself remains trivially destructible. +template +class ConstInitMutableString { + public: + using StringT = std::basic_string; + using StringViewT = std::basic_string_view; + + consteval ConstInitMutableString() noexcept = default; + ~ConstInitMutableString() = default; + + void Set(StringViewT value); + StringT Get(); + + private: + std::atomic initialized_ = {}; + CharT* current_ptr_ = nullptr; + size_t current_size_ = 0; + const_init_string_internal::TrivialMicroSpinLock committing_; + std::array value_ = {}; +}; + +template +void ConstInitMutableString::Set(StringViewT value) { + // Stage any heap fallback outside the spinlock so the critical section + // is bounded by `fixed_array_size` and a throwing `bad_alloc` cannot + // leave the spinlock held. + std::unique_ptr heap_fallback = + const_init_string_internal::StageHeapFallback(value, + std::size(value_)); + + std::unique_ptr old_heap; + { + std::lock_guard l( + committing_); + // If the previously-installed buffer was on the heap, hand it to a + // local `unique_ptr` so it is freed after we drop the lock. Readers + // complete their copy under the same lock, so no one is left + // holding the old pointer. + if (current_ptr_ != nullptr && current_ptr_ != value_.data()) { + old_heap.reset(current_ptr_); + } + current_ptr_ = const_init_string_internal::CommitStagedValue( + value, heap_fallback, value_.data()); + current_size_ = value.size(); + initialized_.store(true, std::memory_order::release); + } +} + +template +auto ConstInitMutableString::Get() -> StringT { + // Lock-free fast exit when nothing has ever been set. + if (!initialized_.load(std::memory_order::acquire)) { + return StringT(); + } + // Snapshot under the lock since `Set` may concurrently swap pointers. + // The `lock_guard` releases the lock if the `StringT` allocation + // here throws. + std::lock_guard l( + committing_); + return StringT(current_ptr_, current_size_); +} + +// Verify the trivial destructibility contract for both supported `CharT` +// instantiations so misuse (e.g. accidentally adding a non-trivial member) +// is caught at compile time even if no instance is constructed. +static_assert( + std::is_trivially_destructible_v>); +static_assert( + std::is_trivially_destructible_v>); + +} // namespace mozc + +#endif // MOZC_BASE_STRINGS_CONST_INIT_MUTABLE_STRING_H_ diff --git a/src/base/strings/const_init_mutable_string_test.cc b/src/base/strings/const_init_mutable_string_test.cc new file mode 100644 index 0000000000..9668d4e754 --- /dev/null +++ b/src/base/strings/const_init_mutable_string_test.cc @@ -0,0 +1,82 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/strings/const_init_mutable_string.h" + +#include + +#include "testing/gunit.h" + +namespace mozc::strings { +namespace { + +constinit ConstInitMutableString<256> g_value_no_init; + +TEST(ConstInitMutableStringTest, EmptyByDefault) { + EXPECT_EQ(g_value_no_init.Get(), ""); +} + +constinit ConstInitMutableString<256> g_value_set_get; + +TEST(ConstInitMutableStringTest, SetThenGet) { + g_value_set_get.Set("hello"); + EXPECT_EQ(g_value_set_get.Get(), "hello"); + g_value_set_get.Set("world"); + EXPECT_EQ(g_value_set_get.Get(), "world"); +} + +constinit ConstInitMutableString<8> g_value_overflow; + +TEST(ConstInitMutableStringTest, HeapFallbackOnSet) { + const std::string long_value = "this string is longer than the inline buffer"; + g_value_overflow.Set(long_value); + EXPECT_EQ(g_value_overflow.Get(), long_value); + // Replacing a heap-backed value with another heap-backed value must free + // the prior allocation (validated under ASan); behaviorally we just check + // that the new value is observable. + const std::string longer_value = long_value + " and then some"; + g_value_overflow.Set(longer_value); + EXPECT_EQ(g_value_overflow.Get(), longer_value); + // Replacing a heap-backed value with one that fits inline. + g_value_overflow.Set("short"); + EXPECT_EQ(g_value_overflow.Get(), "short"); +} + +constinit ConstInitMutableString<256, wchar_t> g_value_wide; + +TEST(ConstInitMutableStringTest, WideChar) { + EXPECT_EQ(g_value_wide.Get(), std::wstring()); + g_value_wide.Set(L"WideMozc"); + EXPECT_EQ(g_value_wide.Get(), std::wstring(L"WideMozc")); + g_value_wide.Set(L"WideOverride"); + EXPECT_EQ(g_value_wide.Get(), std::wstring(L"WideOverride")); +} + +} // namespace +} // namespace mozc::strings diff --git a/src/base/strings/internal/BUILD.bazel b/src/base/strings/internal/BUILD.bazel index fdbfb831db..97ab6cf400 100644 --- a/src/base/strings/internal/BUILD.bazel +++ b/src/base/strings/internal/BUILD.bazel @@ -82,6 +82,12 @@ mozc_cc_binary( ], ) +mozc_cc_library( + name = "const_init_string_helpers", + srcs = ["const_init_string_helpers.cc"], + hdrs = ["const_init_string_helpers.h"], +) + mozc_cc_library( name = "utf8_internal", srcs = ["utf8_internal.cc"], diff --git a/src/base/strings/internal/const_init_string_helpers.cc b/src/base/strings/internal/const_init_string_helpers.cc new file mode 100644 index 0000000000..4889d772bf --- /dev/null +++ b/src/base/strings/internal/const_init_string_helpers.cc @@ -0,0 +1,79 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/strings/internal/const_init_string_helpers.h" + +#include +#include +#include +#include + +namespace mozc::const_init_string_internal { + +template +std::unique_ptr StageHeapFallback(std::basic_string_view value, + size_t inline_capacity) { + const size_t size_with_null = value.size() + 1; + if (size_with_null <= inline_capacity) { + return nullptr; + } + // `make_unique_for_overwrite` (default-init) over `make_unique` + // (value-init) avoids zero-filling the buffer before `std::copy_n` + // overwrites it. + auto heap = std::make_unique_for_overwrite(size_with_null); + std::copy_n(value.data(), value.size(), heap.get()); + heap[value.size()] = CharT(0); + return heap; +} + +template +CharT* CommitStagedValue(std::basic_string_view value, + std::unique_ptr& heap_fallback, + CharT* inline_buffer) { + if (heap_fallback) { + return heap_fallback.release(); + } + std::copy_n(value.data(), value.size(), inline_buffer); + inline_buffer[value.size()] = CharT(0); + return inline_buffer; +} + +// Explicit instantiations: only `char` and `wchar_t` are supported. +template std::unique_ptr StageHeapFallback( + std::basic_string_view, size_t); +template std::unique_ptr StageHeapFallback( + std::basic_string_view, size_t); + +template char* CommitStagedValue(std::basic_string_view, + std::unique_ptr&, char*); +template wchar_t* CommitStagedValue(std::basic_string_view, + std::unique_ptr&, + wchar_t*); + +} // namespace mozc::const_init_string_internal diff --git a/src/base/strings/internal/const_init_string_helpers.h b/src/base/strings/internal/const_init_string_helpers.h new file mode 100644 index 0000000000..9fe51956a7 --- /dev/null +++ b/src/base/strings/internal/const_init_string_helpers.h @@ -0,0 +1,158 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MOZC_BASE_STRINGS_INTERNAL_CONST_INIT_STRING_HELPERS_H_ +#define MOZC_BASE_STRINGS_INTERNAL_CONST_INIT_STRING_HELPERS_H_ + +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#include // _mm_pause, __yield +#endif // _MSC_VER + +namespace mozc::const_init_string_internal { + +// The character types supported by the const-init string family. Used to +// constrain template parameters in the public class headers and in the +// helper templates below. +template +concept SupportedChar = std::same_as || std::same_as; + +// CPU spin-loop hint. Emits `PAUSE` on x86 or `YIELD` on ARM, and is a +// no-op on other architectures. On SMT cores it lets the sibling thread +// make progress and reduces power draw; on modern x86 it also avoids the +// memory-order machine clear that would otherwise fire when the spin +// finally observes its target write. +// +// Kept inline (not out-of-lined into the .cc): the whole purpose of +// this function is to emit a single CPU instruction at the call site, +// which a function call would defeat. +inline void SpinLoopHint() noexcept { +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + _mm_pause(); +#elif defined(_MSC_VER) && defined(_M_ARM64) + __yield(); +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(__x86_64__) || defined(__i386__)) + __builtin_ia32_pause(); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(__aarch64__) + __asm__ __volatile__("yield" ::: "memory"); +#endif // (__GNUC__ || __clang__) && __aarch64__ +} + +// `atomic_flag`-based spinlock satisfying the BasicLockable named requirement, +// so it composes with std::lock_guard, std::scoped_lock and std::unique_lock. +// +// Trivially destructible and `constexpr`-default-constructible (via +// `std::atomic_flag`'s C++20 default ctor), so instances may live as +// `mutable` members of `consteval`-constructed constinit globals. +// +// For those who are wondering if we can use absl::base_internal::SpinLock +// instead, Abseil's one is intended to be used as an internal primitive for +// absl::Mutex or a special primitive for async signal safety as of Abseil +// 20260107.1. As Abseil's compatibility guideline implies, it is not a +// general-purpose "SpinLock" implementation. +// +// Do not depend upon internal details. If something is in a namespace, file, +// directory, or simply contains the string internal, impl, test, detail, +// benchmark, sample, or example, unless it is explicitly called out, it is +// not part of the public API. It’s an implementation detail. You cannot +// friend it, you cannot include it, you cannot mention it or refer to it in +// any way. +// https://abseil.io/about/compatibility#c-symbols-and-files +// +// Here are detailed reasons why absl::base_internal::SpinLock is not suitable +// for the const-init string implementations: +// +// 1. Its default constructor is not constexpr, and its destructor is not +// trivial in TSAN builds. Both properties are load-bearing for the +// const-init string classes that own this lock as a member of a constinit +// global with no static initializer or atexit-registered destructor. +// 2. Its slow path is a bare relaxed-load loop with no PAUSE/YIELD CPU +// spin-loop hint (it falls through to Sleep(0) on Windows and sched_yield +// on POSIX). For use cases in const-init strings, the microscopic critical +// sections protected by the Test-and-Test-and-Set + PAUSE/YIELD pattern +// should work well because lock contention should be low and and lock hold +// times should be quite short. +// 3. It does not satisfy the BasicLockable named requirement, which means we +// cannot use it with std::lock_guard and friends without a custom holder. +class TrivialMicroSpinLock { + public: + void lock() noexcept { + while (f_.test_and_set(std::memory_order::acquire)) { + // Test-and-test-and-set: spin using a relaxed read while the + // lock is held, so contending threads share the cache line + // read-only instead of fighting for it via RMWs. Only re-attempt + // the acquiring RMW once the inner read observes a release. + while (f_.test(std::memory_order::relaxed)) { + SpinLoopHint(); + } + } + } + void unlock() noexcept { f_.clear(std::memory_order::release); } + + private: + // `std::atomic_flag` deletes copy/move, so this class inherits that. + std::atomic_flag f_ = {}; +}; + +static_assert(std::is_trivially_destructible_v); + +// `StageHeapFallback` and `CommitStagedValue` are declarations only; +// definitions and explicit instantiations for `char` and `wchar_t` +// live in const_init_string_helpers.cc. Instantiating with any other +// character type is a link error by design. + +// Stages a copy of `value` for publication. If `value.size() + 1` +// characters (including a NUL terminator) fit in `inline_capacity`, +// returns a null pointer and the caller is expected to commit to its +// own inline buffer; otherwise returns a fresh, NUL-terminated heap +// copy. +template +std::unique_ptr StageHeapFallback(std::basic_string_view value, + size_t inline_capacity); + +// Commits the staged value to its destination: if `heap_fallback` is +// non-null its pointer is released and returned; otherwise `value` is +// copied into `inline_buffer` (which the caller must have sized to +// accommodate `value.size() + 1` characters; see `StageHeapFallback`) +// and a pointer to it is returned. The result is always NUL-terminated. +template +CharT* CommitStagedValue(std::basic_string_view value, + std::unique_ptr& heap_fallback, + CharT* inline_buffer); + +} // namespace mozc::const_init_string_internal + +#endif // MOZC_BASE_STRINGS_INTERNAL_CONST_INIT_STRING_HELPERS_H_