Skip to content

Commit c0b6b1e

Browse files
Add flag to limit perf counters to main thread
1 parent b808118 commit c0b6b1e

4 files changed

Lines changed: 59 additions & 5 deletions

File tree

docs/perf_counters.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
3030
mapped by libpfm to platform-specifics - see libpfm
3131
[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
3232

33+
By default, perf counters include work done by all benchmark-created threads.
34+
Pass `--benchmark_perf_counters_all_threads=false` to measure only the main
35+
benchmark thread.
36+
3337
The counter values are reported back through the [User Counters](../README.md#custom-counters)
3438
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
3539
by User Counters.

src/benchmark.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ BM_DEFINE_bool(benchmark_counters_tabular, false);
145145
// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
146146
BM_DEFINE_string(benchmark_perf_counters, "");
147147

148+
// Whether perf counters should include benchmark-created threads.
149+
BM_DEFINE_bool(benchmark_perf_counters_all_threads, true);
150+
148151
// Extra context to include in the output formatted as comma-separated key-value
149152
// pairs. Kept internal as it's only used for parsing from env/command line.
150153
BM_DEFINE_kvpairs(benchmark_context, {});
@@ -427,8 +430,12 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
427430

428431
// This perfcounters object needs to be created before the runners vector
429432
// below so it outlasts their lifetime.
430-
PerfCountersMeasurement perfcounters(
431-
StrSplit(FLAGS_benchmark_perf_counters, ','));
433+
const std::vector<std::string> perf_counter_names =
434+
StrSplit(FLAGS_benchmark_perf_counters, ',');
435+
PerfCountersMeasurement perfcounters =
436+
FLAGS_benchmark_perf_counters_all_threads
437+
? PerfCountersMeasurement(perf_counter_names)
438+
: PerfCountersMeasurement::ForCurrentThread(perf_counter_names);
432439

433440
// Vector of benchmarks to run
434441
std::vector<internal::BenchmarkRunner> runners;
@@ -457,7 +464,8 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
457464

458465
// The use of performance counters with threads would be unintuitive for
459466
// the average user so we need to warn them about this case
460-
if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
467+
if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0) &&
468+
FLAGS_benchmark_perf_counters_all_threads) {
461469
GetErrorLogInstance()
462470
<< "***WARNING*** There are " << benchmarks_with_threads
463471
<< " benchmarks with threads and " << perfcounters.num_counters()
@@ -783,6 +791,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
783791
&FLAGS_benchmark_counters_tabular) ||
784792
ParseStringFlag(argv[i], "benchmark_perf_counters",
785793
&FLAGS_benchmark_perf_counters) ||
794+
ParseBoolFlag(argv[i], "benchmark_perf_counters_all_threads",
795+
&FLAGS_benchmark_perf_counters_all_threads) ||
786796
ParseKeyValueFlag(argv[i], "benchmark_context",
787797
&FLAGS_benchmark_context) ||
788798
ParseStringFlag(argv[i], "benchmark_time_unit",
@@ -895,6 +905,7 @@ void PrintDefaultHelp() {
895905
" [--benchmark_counters_tabular={true|false}]\n"
896906
#if defined HAVE_LIBPFM
897907
" [--benchmark_perf_counters=<counter>,...]\n"
908+
" [--benchmark_perf_counters_all_threads={true|false}]\n"
898909
#endif
899910
" [--benchmark_context=<key>=<value>,...]\n"
900911
" [--benchmark_time_unit={ns|us|ms|s}]\n"

src/perf_counters.cc

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ static std::vector<uint64_t> GetPMUTypesForEvent(const perf_event_attr& attr) {
135135
}
136136

137137
PerfCounters PerfCounters::Create(
138-
const std::vector<std::string>& counter_names) {
138+
const std::vector<std::string>& counter_names, Scope scope) {
139139
if (!counter_names.empty()) {
140140
Initialize();
141141
}
@@ -203,7 +203,7 @@ PerfCounters PerfCounters::Create(
203203
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
204204
// case.
205205
attr.disabled = is_first;
206-
attr.inherit = true;
206+
attr.inherit = (scope == Scope::kAllThreads);
207207
attr.pinned = is_first;
208208
attr.exclude_kernel = true;
209209
attr.exclude_user = false;
@@ -290,6 +290,16 @@ PerfCounters PerfCounters::Create(
290290
std::move(leader_ids));
291291
}
292292

293+
PerfCounters PerfCounters::Create(
294+
const std::vector<std::string>& counter_names) {
295+
return Create(counter_names, Scope::kAllThreads);
296+
}
297+
298+
PerfCounters PerfCounters::CreateForCurrentThread(
299+
const std::vector<std::string>& counter_names) {
300+
return Create(counter_names, Scope::kCurrentThread);
301+
}
302+
293303
void PerfCounters::CloseCounters() const {
294304
if (counter_ids_.empty()) {
295305
return;
@@ -318,6 +328,11 @@ PerfCounters PerfCounters::Create(
318328
return NoCounters();
319329
}
320330

331+
PerfCounters PerfCounters::CreateForCurrentThread(
332+
const std::vector<std::string>& counter_names) {
333+
return Create(counter_names);
334+
}
335+
321336
void PerfCounters::CloseCounters() const {}
322337
#endif // defined HAVE_LIBPFM
323338

@@ -327,6 +342,18 @@ PerfCountersMeasurement::PerfCountersMeasurement(
327342
counters_ = PerfCounters::Create(counter_names);
328343
}
329344

345+
PerfCountersMeasurement::PerfCountersMeasurement(
346+
const std::vector<std::string>& counter_names, PerfCounters&& counters)
347+
: counters_(std::move(counters)),
348+
start_values_(counter_names.size()),
349+
end_values_(counter_names.size()) {}
350+
351+
PerfCountersMeasurement PerfCountersMeasurement::ForCurrentThread(
352+
const std::vector<std::string>& counter_names) {
353+
return PerfCountersMeasurement(
354+
counter_names, PerfCounters::CreateForCurrentThread(counter_names));
355+
}
356+
330357
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
331358
if (this != &other) {
332359
CloseCounters();

src/perf_counters.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ class BENCHMARK_EXPORT PerfCounters final {
117117
// In case of failure, this method will in the worst case return an
118118
// empty object whose state will still be valid.
119119
static PerfCounters Create(const std::vector<std::string>& counter_names);
120+
static PerfCounters CreateForCurrentThread(
121+
const std::vector<std::string>& counter_names);
120122

121123
// Take a snapshot of the current value of the counters into the provided
122124
// valid PerfCounterValues storage. The values are populated such that:
@@ -135,6 +137,11 @@ class BENCHMARK_EXPORT PerfCounters final {
135137
size_t num_counters() const { return counter_names_.size(); }
136138

137139
private:
140+
enum class Scope { kCurrentThread, kAllThreads };
141+
142+
static PerfCounters Create(const std::vector<std::string>& counter_names,
143+
Scope scope);
144+
138145
PerfCounters(const std::vector<std::string>& counter_names,
139146
std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
140147
: counter_ids_(std::move(counter_ids)),
@@ -152,6 +159,8 @@ class BENCHMARK_EXPORT PerfCounters final {
152159
class BENCHMARK_EXPORT PerfCountersMeasurement final {
153160
public:
154161
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
162+
static PerfCountersMeasurement ForCurrentThread(
163+
const std::vector<std::string>& counter_names);
155164

156165
size_t num_counters() const { return counters_.num_counters(); }
157166

@@ -187,6 +196,9 @@ class BENCHMARK_EXPORT PerfCountersMeasurement final {
187196
}
188197

189198
private:
199+
PerfCountersMeasurement(const std::vector<std::string>& counter_names,
200+
PerfCounters&& counters);
201+
190202
PerfCounters counters_;
191203
bool valid_read_ = true;
192204
PerfCounterValues start_values_;

0 commit comments

Comments
 (0)