Skip to content

Commit 5c38653

Browse files
committed
Add constinit global string holder classes
These are utility classes for global strings whose value is known only at runtime. They are motivated by the on-going effort to remove Singleton<T>, which has surfaced several places in system_util.cc (directory caches, user-SID lookup, program-invocation-name holder) where the singleton wraps a string lookup that must run *after* the surrounding modules are ready. A function-local static std::string would be the obvious replacement, but reintroduces two issues that Singleton<T> callers have learnt to avoid: * On Windows, function-local static initialization is not safe to invoke from DllMain because the compiler-generated guard can take locks that interact badly with the Loader Lock. * The resulting destructor runs at process exit in unspecified order relative to other globals. Both classes address these keeping all foreign code outside the publish lock and by being trivially destructible (unless MOZC_NO_ATOMIC_FLAG_WAIT is specified). ConstInitImmutableString is the publish-once, read-many variant. It takes an IdempotentInitializer function pointer that is invoked lock-free on the first GetOrInit(), so the class is safe to use from DllMain even when the initializer itself acquires the Loader Lock. Subsequent reads return a string_view over stable, NUL-terminated storage on a lock-free fast path: #include "base/strings/const_init_immutable_string.h" constinit mozc::ConstInitImmutableString<256> g_program_files_x86( []() -> std::string { return ComputeProgramFilesX86Path(); }); absl::string_view path = g_program_files_x86.GetOrInit(); ConstInitMutableString is the set-many, snapshot-read variant. Set() atomically replaces the stored value; Get() returns a std::string snapshot under a lock-protected copy. #include "base/strings/const_init_mutable_string.h" constinit mozc::ConstInitMutableString<256> g_invocation_name; void Init(absl::string_view name) { g_invocation_name.Set(name); } std::string GetName() { return g_invocation_name.Get(); } For platforms where std::atomic_flag::wait() is disallowed in favor of absl::Mutex, MOZC_NO_ATOMIC_FLAG_WAIT macro is provided to switch the implementation to the absl::Mutex-based one. This guarantees that std::atomic_flag::wait() is used only for "client" build flavors in "mozc_select".
1 parent 3c327d8 commit 5c38653

8 files changed

Lines changed: 766 additions & 0 deletions

src/base/strings/BUILD.bazel

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,58 @@ mozc_cc_test(
7575
],
7676
)
7777

78+
mozc_cc_library(
79+
name = "const_init_immutable_string",
80+
hdrs = ["const_init_immutable_string.h"],
81+
defines = mozc_select(
82+
client = [],
83+
default = ["MOZC_NO_ATOMIC_FLAG_WAIT"],
84+
),
85+
deps = ["//base/strings/internal:const_init_string_helpers"] + mozc_select(
86+
client = [],
87+
default = [
88+
"@com_google_absl//absl/base:core_headers",
89+
"@com_google_absl//absl/synchronization",
90+
],
91+
),
92+
)
93+
94+
mozc_cc_test(
95+
name = "const_init_immutable_string_test",
96+
size = "small",
97+
srcs = ["const_init_immutable_string_test.cc"],
98+
deps = [
99+
":const_init_immutable_string",
100+
"//testing:gunit_main",
101+
],
102+
)
103+
104+
mozc_cc_library(
105+
name = "const_init_mutable_string",
106+
hdrs = ["const_init_mutable_string.h"],
107+
defines = mozc_select(
108+
client = [],
109+
default = ["MOZC_NO_ATOMIC_FLAG_WAIT"],
110+
),
111+
deps = ["//base/strings/internal:const_init_string_helpers"] + mozc_select(
112+
client = [],
113+
default = [
114+
"@com_google_absl//absl/base:core_headers",
115+
"@com_google_absl//absl/synchronization",
116+
],
117+
),
118+
)
119+
120+
mozc_cc_test(
121+
name = "const_init_mutable_string_test",
122+
size = "small",
123+
srcs = ["const_init_mutable_string_test.cc"],
124+
deps = [
125+
":const_init_mutable_string",
126+
"//testing:gunit_main",
127+
],
128+
)
129+
78130
mozc_cc_library(
79131
name = "japanese",
80132
srcs = [
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
// Copyright 2010-2021, Google Inc.
2+
// All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions are
6+
// met:
7+
//
8+
// * Redistributions of source code must retain the above copyright
9+
// notice, this list of conditions and the following disclaimer.
10+
// * Redistributions in binary form must reproduce the above
11+
// copyright notice, this list of conditions and the following disclaimer
12+
// in the documentation and/or other materials provided with the
13+
// distribution.
14+
// * Neither the name of Google Inc. nor the names of its
15+
// contributors may be used to endorse or promote products derived from
16+
// this software without specific prior written permission.
17+
//
18+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
30+
#ifndef MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_
31+
#define MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_
32+
33+
#include <array>
34+
#include <atomic>
35+
#include <cstddef>
36+
#include <memory>
37+
#include <string>
38+
#include <string_view>
39+
#include <type_traits>
40+
41+
#if defined(MOZC_NO_ATOMIC_FLAG_WAIT)
42+
#include "absl/base/const_init.h"
43+
#include "absl/synchronization/mutex.h"
44+
#endif // defined(MOZC_NO_ATOMIC_FLAG_WAIT)
45+
46+
#include "base/strings/internal/const_init_string_helpers.h"
47+
48+
namespace mozc {
49+
50+
// A utility class to deal with constant global strings whose value is known
51+
// only at runtime. It has the following capabilities:
52+
//
53+
// 1. It allows the library users to lazily initialize the string by calling
54+
// GetOrInit() only after it becomes ready, e.g. only after dependent
55+
// modules are fully loaded.
56+
// 2. It is thread-safe, meaning that multiple threads can call GetOrInit()
57+
// concurrently without causing data race, with an assumption that the
58+
// idempotent_initializer is thread-safe and *idempotent* (i.e., it always
59+
// returns the same value when called multiple times).
60+
// 3. It guarantees that the string is null-terminated.
61+
// 4. It accepts the constinit keyword.
62+
//
63+
// Synchronization model:
64+
// ---------------------
65+
// The initializer is invoked without any class-held state. This avoids lock
66+
// order inversion when the initializer triggers foreign code that acquires
67+
// its own locks -- e.g. a LoadLibrary() call from within DllMain on Windows,
68+
// which Microsoft documents as a classic deadlock pattern:
69+
//
70+
// https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-best-practices#deadlocks-caused-by-lock-order-inversion
71+
//
72+
// Racing threads may each invoke the initializer; per the IdempotentInitializer
73+
// contract that is acceptable.
74+
//
75+
// Once the string is published it cannot be replaced. See
76+
// ConstInitMutableString for a variant that additionally supports thread-safe
77+
// Set().
78+
//
79+
// Cavets on destruction:
80+
// ---------------------
81+
// * Without MOZC_NO_ATOMIC_FLAG_WAIT, this class is guaranteed to be
82+
// trivially destructible and can be safely used in the global scope without
83+
// worrying about destruction order at exit.
84+
// * With MOZC_NO_ATOMIC_FLAG_WAIT (e.g. server-side configurations where
85+
// std::atomic_flag::wait is not allowed), it falls back to absl::Mutex,
86+
// which offers better integration with TSAN. In this case it is no longer
87+
// trivially destructible, which means atexit-style destructions will happen
88+
// for globally instantiated objects. As explained in
89+
// absl/base/const_init.h, strictly speaking this can fall into undefined
90+
// behavior and we are just relying on known toolchain-specific behaviors
91+
// that are not guaranteed by the standard.
92+
//
93+
// Storage model:
94+
// -------------
95+
// * Values that fit in fixed_array_size characters (including the terminating
96+
// NUL) live in an inline array.
97+
// * Larger values live in a heap buffer. When Set() replaces a heap-backed
98+
// value, the prior heap allocation is freed before Set() returns.
99+
// * The currently-installed heap buffer (if any) is intentionally leaked at
100+
// process exit.
101+
#if !defined(MOZC_NO_ATOMIC_FLAG_WAIT)
102+
103+
// Trivially-destructible variant using `std::atomic_flag::wait`.
104+
template <size_t fixed_array_size,
105+
const_init_string_internal::SupportedChar CharT = char>
106+
class ConstInitImmutableString {
107+
public:
108+
using StringT = std::basic_string<CharT>;
109+
using StringViewT = std::basic_string_view<CharT>;
110+
111+
// A data initializer that is guaranteed to return the same value no matter
112+
// how many times it is called. It must also be reentrant and safe to call
113+
// from multiple threads concurrently.
114+
using IdempotentInitializer = std::add_pointer_t<StringT()>;
115+
116+
ConstInitImmutableString() = delete;
117+
~ConstInitImmutableString() = default;
118+
119+
consteval explicit ConstInitImmutableString(IdempotentInitializer init)
120+
: idempotent_initializer_(init) {}
121+
122+
[[nodiscard]] StringViewT GetOrInit() {
123+
// Fast path: latch observed. Synchronization is on init_done_; the acquire
124+
// on test() already publishes all writes prior to the publisher's
125+
// release on init_done_.test_and_set(), so result_ptr_ can can be loaded
126+
// with relaxed ordering.
127+
if (init_done_.test(std::memory_order::acquire)) [[likely]] {
128+
return StringViewT(result_ptr_.load(std::memory_order::relaxed),
129+
result_size_);
130+
}
131+
// Invoke the initializer and stage any heap fallback *outside* of
132+
// init_started_. Holding init_started_ across foreign code would introduce
133+
// the Loader-Lock-style lock-order-inversion deadlock described above;
134+
// staging outside also lets a throwing initializer on one thread leave the
135+
// instance recoverable for other threads.
136+
const StringT value = idempotent_initializer_();
137+
std::unique_ptr<CharT[]> heap_fallback =
138+
const_init_string_internal::StageHeapFallback<CharT>(value,
139+
std::size(value_));
140+
if (!init_started_.test_and_set(std::memory_order::acquire)) {
141+
// Won the publish race. Commit the staged value.
142+
CharT* dest = const_init_string_internal::CommitStagedValue<CharT>(
143+
value, heap_fallback, value_.data());
144+
result_size_ = value.size();
145+
result_ptr_.store(dest, std::memory_order::relaxed);
146+
init_done_.test_and_set(std::memory_order::release);
147+
init_done_.notify_all();
148+
return StringViewT(dest, value.size());
149+
}
150+
// Lost the publish race. Block at the OS level until the winner publishes.
151+
init_done_.wait(false, std::memory_order::acquire);
152+
return StringViewT(result_ptr_.load(std::memory_order::relaxed),
153+
result_size_);
154+
}
155+
156+
private:
157+
// Hot fields: all three are touched on the fast path. Keep them together at
158+
// the front so they share a cache line regardless of `fixed_array_size`.
159+
std::atomic_flag init_done_ = {};
160+
std::atomic<const CharT*> result_ptr_ = nullptr;
161+
size_t result_size_ = 0;
162+
// Cold fields: only the slow init path touches these.
163+
const IdempotentInitializer idempotent_initializer_;
164+
std::atomic_flag init_started_ = {};
165+
std::array<CharT, fixed_array_size> value_ = {};
166+
};
167+
168+
static_assert(
169+
std::is_trivially_destructible_v<ConstInitImmutableString<1, char>>);
170+
static_assert(
171+
std::is_trivially_destructible_v<ConstInitImmutableString<1, wchar_t>>);
172+
173+
#else // !defined(MOZC_NO_ATOMIC_FLAG_WAIT)
174+
175+
// absl::Mutex-based variant, which offers better integration with TSAN by
176+
// giving up trivial destructibility and relying on toolchain-specific behaviors
177+
// that are not guaranteed by the standard.
178+
template <size_t fixed_array_size,
179+
const_init_string_internal::SupportedChar CharT = char>
180+
class ConstInitImmutableString {
181+
public:
182+
using StringT = std::basic_string<CharT>;
183+
using StringViewT = std::basic_string_view<CharT>;
184+
185+
// A data initializer that is guaranteed to return the same value no matter
186+
// how many times it is called. It must also be reentrant and safe to call
187+
// from multiple threads concurrently.
188+
using IdempotentInitializer = std::add_pointer_t<StringT()>;
189+
190+
ConstInitImmutableString() = delete;
191+
~ConstInitImmutableString() = default;
192+
193+
consteval explicit ConstInitImmutableString(IdempotentInitializer init)
194+
: idempotent_initializer_(init) {}
195+
196+
[[nodiscard]] StringViewT GetOrInit() {
197+
// Fast path: publication observed (no mutex_ involvement).
198+
if (const CharT* p = result_ptr_.load(std::memory_order::acquire))
199+
[[likely]] {
200+
return StringViewT(p, result_size_);
201+
}
202+
// Invoke the initializer and stage any heap fallback *outside* of mutex_.
203+
// See the class comment for the Loader-Lock-style lock-order-inversion
204+
// hazard this avoids.
205+
const StringT value = idempotent_initializer_();
206+
std::unique_ptr<CharT[]> heap_fallback =
207+
const_init_string_internal::StageHeapFallback<CharT>(value,
208+
std::size(value_));
209+
absl::MutexLock l(mutex_);
210+
if (const CharT* p = result_ptr_.load(std::memory_order::relaxed)) {
211+
return StringViewT(p, result_size_);
212+
}
213+
CharT* dest = const_init_string_internal::CommitStagedValue<CharT>(
214+
value, heap_fallback, value_.data());
215+
result_size_ = value.size();
216+
result_ptr_.store(dest, std::memory_order::release);
217+
return StringViewT(dest, value.size());
218+
}
219+
220+
private:
221+
// Hot fields: all two are touched on the fast path. Keep them together at
222+
// the front so they share a cache line regardless of `value_`.
223+
std::atomic<const CharT*> result_ptr_ = nullptr;
224+
size_t result_size_ = 0;
225+
// Cold fields: only the slow init path touches these.
226+
const IdempotentInitializer idempotent_initializer_;
227+
absl::Mutex mutex_{absl::kConstInit};
228+
std::array<CharT, fixed_array_size> value_ = {};
229+
};
230+
231+
#endif // !defined(MOZC_NO_ATOMIC_FLAG_WAIT)
232+
233+
} // namespace mozc
234+
235+
#endif // MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Copyright 2010-2021, Google Inc.
2+
// All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions are
6+
// met:
7+
//
8+
// * Redistributions of source code must retain the above copyright
9+
// notice, this list of conditions and the following disclaimer.
10+
// * Redistributions in binary form must reproduce the above
11+
// copyright notice, this list of conditions and the following disclaimer
12+
// in the documentation and/or other materials provided with the
13+
// distribution.
14+
// * Neither the name of Google Inc. nor the names of its
15+
// contributors may be used to endorse or promote products derived from
16+
// this software without specific prior written permission.
17+
//
18+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
30+
#include "base/strings/const_init_immutable_string.h"
31+
32+
#include <string>
33+
#include <string_view>
34+
35+
#include "testing/gunit.h"
36+
37+
namespace mozc::strings {
38+
namespace {
39+
40+
constinit ConstInitImmutableString<256> g_value_simple_init(
41+
[]() -> std::string { return "Mozc"; });
42+
43+
TEST(ConstInitImmutableStringTest, SimpleInit) {
44+
EXPECT_EQ(g_value_simple_init.GetOrInit(), "Mozc");
45+
EXPECT_EQ(g_value_simple_init.GetOrInit(), "Mozc");
46+
}
47+
48+
TEST(ConstInitImmutableStringTest, NullTermination) {
49+
const auto value = g_value_simple_init.GetOrInit();
50+
EXPECT_EQ(value.data()[value.size()], '\0');
51+
}
52+
53+
constexpr std::string_view kLongValue =
54+
"this string is longer than the inline buffer";
55+
56+
constinit ConstInitImmutableString<8> g_value_overflow([]() -> std::string {
57+
return std::string("this string is longer than the inline buffer");
58+
});
59+
60+
TEST(ConstInitImmutableStringTest, HeapFallback) {
61+
const auto value = g_value_overflow.GetOrInit();
62+
EXPECT_EQ(value, kLongValue);
63+
EXPECT_EQ(value.data()[value.size()], '\0');
64+
// Repeated calls return the same pointer (stable storage).
65+
EXPECT_EQ(g_value_overflow.GetOrInit().data(), value.data());
66+
}
67+
68+
constinit ConstInitImmutableString<256, wchar_t> g_value_wide(
69+
[]() -> std::wstring { return L"WideMozc"; });
70+
71+
TEST(ConstInitImmutableStringTest, WideChar) {
72+
EXPECT_EQ(g_value_wide.GetOrInit(), std::wstring_view(L"WideMozc"));
73+
const auto value = g_value_wide.GetOrInit();
74+
EXPECT_EQ(value.data()[value.size()], L'\0');
75+
}
76+
77+
} // namespace
78+
} // namespace mozc::strings

0 commit comments

Comments
 (0)