Skip to content

Commit b7bfdb3

Browse files
committed
Encapsulating AtomicInteger128 class
1 parent 6aa0286 commit b7bfdb3

4 files changed

Lines changed: 162 additions & 94 deletions

File tree

src/bthread/task_control.cpp

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ double TaskControl::get_cumulated_worker_time() {
533533
int64_t cputime_ns = 0;
534534
BAIDU_SCOPED_LOCK(_modify_group_mutex);
535535
for_each_task_group([&](TaskGroup* g) {
536-
cputime_ns += get_cumulated_worker_time(g);
536+
cputime_ns += g->cumulated_cputime_ns();
537537
});
538538
return cputime_ns / 1000000000.0;
539539
}
@@ -544,40 +544,11 @@ double TaskControl::get_cumulated_worker_time(bthread_tag_t tag) {
544544
const size_t ngroup = tag_ngroup(tag).load(butil::memory_order_relaxed);
545545
auto& groups = tag_group(tag);
546546
for (size_t i = 0; i < ngroup; ++i) {
547-
cputime_ns += get_cumulated_worker_time(groups[i]);
547+
cputime_ns += groups[i]->cumulated_cputime_ns();
548548
}
549549
return cputime_ns / 1000000000.0;
550550
}
551551

552-
double TaskControl::get_cumulated_worker_time(TaskGroup* g) {
553-
if (NULL == g) {
554-
return 0.0;
555-
}
556-
557-
#if __x86_64__ || __ARM_NEON
558-
#ifdef __x86_64__
559-
__m128i cpu_time_stat = _mm_load_si128(reinterpret_cast<__m128i*>(&g->_cpu_time_stat));
560-
#else // __ARM_NEON
561-
int64x2_t cpu_time_stat = vld1q_s64(reinterpret_cast<const int64_t*>(&g->_cpu_time_stat));
562-
#endif // __x86_64__
563-
int64_t last_run_ns = cpu_time_stat[0];
564-
int64_t cputime_ns = cpu_time_stat[1];
565-
#else // __x86_64__ || __ARM_NEON
566-
int64_t last_run_ns = 0;
567-
int64_t cputime_ns = 0;
568-
{
569-
BAIDU_SCOPED_LOCK(g->_cpu_time_stat_mutex);
570-
last_run_ns = g->_cpu_time_stat.last_run_ns;
571-
cputime_ns = g->_cpu_time_stat.cumulated_cputime_ns;
572-
}
573-
#endif // __x86_64__ || __ARM_NEON
574-
// Add the elapsed time of running bthread.
575-
if (last_run_ns > 0) {
576-
cputime_ns += butil::cpuwide_time_ns() - last_run_ns;
577-
}
578-
return cputime_ns;
579-
}
580-
581552
int64_t TaskControl::get_cumulated_switch_count() {
582553
int64_t c = 0;
583554
BAIDU_SCOPED_LOCK(_modify_group_mutex);

src/bthread/task_control.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ friend bthread_t init_for_pthread_stack_trace();
8080

8181
double get_cumulated_worker_time();
8282
double get_cumulated_worker_time(bthread_tag_t tag);
83-
double get_cumulated_worker_time(TaskGroup* g);
8483
int64_t get_cumulated_switch_count();
8584
int64_t get_cumulated_signal_count();
8685

src/bthread/task_group.cpp

Lines changed: 65 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,39 @@ void* run_create_span_func() {
8686
return BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_bls).rpcz_parent_span;
8787
}
8888

89+
AtomicInteger128::Value AtomicInteger128::load() const {
90+
#if __x86_64__ || __ARM_NEON
91+
// Supress compiler warning.
92+
(void)_mutex;
93+
#endif // __x86_64__ || __ARM_NEON
94+
95+
#if __x86_64__ || __ARM_NEON
96+
#ifdef __x86_64__
97+
__m128i value = _mm_load_si128(reinterpret_cast<__m128i*>(&_value));
98+
#else // __ARM_NEON
99+
int64x2_t value = vld1q_s64(reinterpret_cast<const int64_t*>(&_value));
100+
#endif // __x86_64__
101+
return {value[0], value[1]};
102+
#else // __x86_64__ || __ARM_NEON
103+
BAIDU_SCOPED_LOCK(g->_mutex);
104+
return _value;
105+
#endif // __x86_64__ || __ARM_NEON
106+
}
107+
108+
void AtomicInteger128::store(Value value) {
109+
#if __x86_64__
110+
__m128i v = _mm_load_si128(reinterpret_cast<const __m128i*>(&value));
111+
_mm_store_si128(reinterpret_cast<__m128i*>(&_value), v);
112+
#elif __ARM_NEON
113+
int64x2_t v = vld1q_s64(reinterpret_cast<const int64_t*>(&value));
114+
vst1q_s64(reinterpret_cast<int64_t*>(&_value), v);
115+
#else
116+
BAIDU_SCOPED_LOCK(g->_mutex);
117+
_value = value;
118+
#endif // __x86_64__ || __ARM_NEON
119+
}
120+
121+
89122
int TaskGroup::get_attr(bthread_t tid, bthread_attr_t* out) {
90123
TaskMeta* const m = address_meta(tid);
91124
if (m != NULL) {
@@ -152,6 +185,16 @@ static double get_cumulated_cputime_from_this(void* arg) {
152185
return static_cast<TaskGroup*>(arg)->cumulated_cputime_ns() / 1000000000.0;
153186
}
154187

188+
int64_t TaskGroup::cumulated_cputime_ns() const {
189+
CPUTimeStat cpu_time_stat = _cpu_time_stat.load();
190+
// Add the elapsed time of running bthread.
191+
int64_t cumulated_cputime_ns = cpu_time_stat.cumulated_cputime_ns();
192+
if (!cpu_time_stat.is_main_task()) {
193+
cumulated_cputime_ns += butil::cpuwide_time_ns() - cpu_time_stat.last_run_ns();
194+
}
195+
return cumulated_cputime_ns;
196+
}
197+
155198
void TaskGroup::run_main_task() {
156199
bvar::PassiveStatus<double> cumulated_cputime(
157200
get_cumulated_cputime_from_this, this);
@@ -160,11 +203,11 @@ void TaskGroup::run_main_task() {
160203
TaskGroup* dummy = this;
161204
bthread_t tid;
162205
while (wait_task(&tid)) {
163-
TaskGroup::sched_to(&dummy, tid);
206+
sched_to(&dummy, tid);
164207
DCHECK_EQ(this, dummy);
165208
DCHECK_EQ(_cur_meta->stack, _main_stack);
166209
if (_cur_meta->tid != _main_tid) {
167-
TaskGroup::task_runner(1/*skip remained*/);
210+
task_runner(1/*skip remained*/);
168211
}
169212
if (FLAGS_show_per_worker_usage_in_vars && !usage_bvar) {
170213
char name[32];
@@ -181,17 +224,12 @@ void TaskGroup::run_main_task() {
181224
}
182225
// Don't forget to add elapse of last wait_task.
183226
current_task()->stat.cputime_ns +=
184-
butil::cpuwide_time_ns() - std::abs(_cpu_time_stat.last_run_ns);
227+
butil::cpuwide_time_ns() - std::abs(_cpu_time_stat.load_unsafe().last_run_ns());
185228
}
186229

187230
TaskGroup::TaskGroup(TaskControl* c)
188231
: _control(c) {
189232
CHECK(c);
190-
#if __x86_64__ || __ARM_NEON
191-
// Supress compiler warning.
192-
(void)_cpu_time_stat_mutex;
193-
#endif // __x86_64__ || __ARM_NEON
194-
195233
}
196234

197235
TaskGroup::~TaskGroup() {
@@ -282,8 +320,12 @@ int TaskGroup::init(size_t runqueue_capacity) {
282320
_cur_meta = m;
283321
_main_tid = m->tid;
284322
_main_stack = stk;
285-
_cpu_time_stat.last_run_ns = -m->cpuwide_start_ns;
323+
324+
CPUTimeStat cpu_time_stat;
325+
cpu_time_stat.set_last_run_ns(m->cpuwide_start_ns, true);
326+
_cpu_time_stat.store(cpu_time_stat);
286327
_last_cpu_clock_ns = 0;
328+
287329
return 0;
288330
}
289331

@@ -404,7 +446,7 @@ void TaskGroup::task_runner(intptr_t skip_remained) {
404446

405447
g->_control->_nbthreads << -1;
406448
g->_control->tag_nbthreads(g->tag()) << -1;
407-
g->set_remained(TaskGroup::_release_last_context, m);
449+
g->set_remained(_release_last_context, m);
408450
ending_sched(&g);
409451

410452
} while (g->_cur_meta->tid != g->_main_tid);
@@ -481,12 +523,10 @@ int TaskGroup::start_foreground(TaskGroup** pg,
481523
fn = ready_to_run_in_worker;
482524
}
483525
ReadyToRunArgs args = {
484-
g->tag(),
485-
g->_cur_meta,
486-
(bool)(using_attr.flags & BTHREAD_NOSIGNAL)
526+
g->tag(), g->_cur_meta, (bool)(using_attr.flags & BTHREAD_NOSIGNAL)
487527
};
488528
g->set_remained(fn, &args);
489-
TaskGroup::sched_to(pg, m->tid);
529+
sched_to(pg, m->tid);
490530
}
491531
return 0;
492532
}
@@ -668,13 +708,18 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta, bool cur_ending) {
668708
}
669709
#endif
670710
// Save errno so that errno is bthread-specific.
671-
const int saved_errno = errno;
711+
int saved_errno = errno;
672712
void* saved_unique_user_ptr = tls_unique_user_ptr;
673713

674714
TaskMeta* const cur_meta = g->_cur_meta;
675-
const int64_t now = butil::cpuwide_time_ns();
676-
const int64_t elp_ns = now - std::abs(g->_cpu_time_stat.last_run_ns);
715+
int64_t now = butil::cpuwide_time_ns();
716+
CPUTimeStat cpu_time_stat = g->_cpu_time_stat.load_unsafe();
717+
int64_t elp_ns = now - cpu_time_stat.last_run_ns();
677718
cur_meta->stat.cputime_ns += elp_ns;
719+
// Update cpu_time_stat.
720+
cpu_time_stat.set_last_run_ns(now, is_main_task(g, next_meta->tid));
721+
cpu_time_stat.add_cumulated_cputime_ns(elp_ns, is_main_task(g, cur_meta->tid));
722+
g->_cpu_time_stat.store(cpu_time_stat);
678723

679724
if (FLAGS_bthread_enable_cpu_clock_stat) {
680725
const int64_t cpu_thread_time = butil::cputhread_time_ns();
@@ -686,36 +731,6 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta, bool cur_ending) {
686731
g->_last_cpu_clock_ns = 0;
687732
}
688733

689-
#if __x86_64__ || __ARM_NEON
690-
// Refer to https://rigtorp.se/isatomic/, On the modern CPU microarchitectures
691-
// (Skylake and Zen 2) AVX/AVX2 128b/256b aligned loads and stores are atomic
692-
// even though Intel and AMD officially doesn’t guarantee this.
693-
CPUTimeStat cpu_time_stat{
694-
next_meta->tid != g->main_tid() ? now : -now,
695-
g->_cpu_time_stat.cumulated_cputime_ns
696-
};
697-
if (cur_meta->tid != g->main_tid()) {
698-
cpu_time_stat.cumulated_cputime_ns += elp_ns;
699-
}
700-
#if __x86_64__
701-
// On X86, SSE instructions can ensure atomic loads and stores.
702-
__m128i value = _mm_load_si128(reinterpret_cast<__m128i*>(&cpu_time_stat));
703-
_mm_store_si128(reinterpret_cast<__m128i*>(&g->_cpu_time_stat), value);
704-
#else // __ARM_NEON
705-
// Starting from Armv8.4-A, neon can ensure atomic loads and stores.
706-
int64x2_t value = vld1q_s64(reinterpret_cast<const int64_t*>(&cpu_time_stat));
707-
vst1q_s64(reinterpret_cast<int64_t*>(&g->_cpu_time_stat), value);
708-
#endif // __x86_64__
709-
#else // __x86_64__ || __ARM_NEON
710-
{
711-
BAIDU_SCOPED_LOCK(g->_cpu_time_stat_mutex);
712-
g->_cpu_time_stat.last_run_ns = next_meta->tid != g->main_tid() ? now : -now;
713-
if (cur_meta->tid != g->main_tid()) {
714-
g->_cpu_time_stat.cumulated_cputime_ns += elp_ns;
715-
}
716-
}
717-
#endif // __x86_64__ || __ARM_NEON
718-
719734
++cur_meta->stat.nswitch;
720735
++ g->_nswitch;
721736
// Switch to the task
@@ -1047,14 +1062,14 @@ int TaskGroup::interrupt(bthread_t tid, TaskControl* c, bthread_tag_t tag) {
10471062
}
10481063
} else if (sleep_id != 0) {
10491064
if (get_global_timer_thread()->unschedule(sleep_id) == 0) {
1050-
bthread::TaskGroup* g = bthread::tls_task_group;
1065+
TaskGroup* g = tls_task_group;
10511066
if (g) {
1052-
g->ready_to_run(TaskGroup::address_meta(tid));
1067+
g->ready_to_run(address_meta(tid));
10531068
} else {
10541069
if (!c) {
10551070
return EINVAL;
10561071
}
1057-
c->choose_one_group(tag)->ready_to_run_remote(TaskGroup::address_meta(tid));
1072+
c->choose_one_group(tag)->ready_to_run_remote(address_meta(tid));
10581073
}
10591074
}
10601075
}

0 commit comments

Comments
 (0)