11#pragma once
22
33#include < gtest/gtest.h>
4+ // Keep Google Test first to match clang-tidy's LLVM include order.
5+ #include < benchmark/benchmark.h>
6+ #include < mpi.h>
47#include < omp.h>
58#include < tbb/tick_count.h>
69
710#include < chrono>
811#include < cstddef>
12+ #include < cstdint>
913#include < functional>
1014#include < sstream>
1115#include < stdexcept>
2125
2226namespace ppc ::util {
2327
24- double GetTimeMPI ();
25- int GetMPIRank ();
28+ namespace detail {
29+
30+ inline bool ContainsFilterToken (std::string_view value, std::string_view filter) {
31+ if (filter.empty ()) {
32+ return true ;
33+ }
34+ return value.contains (filter);
35+ }
36+
37+ inline bool ShouldRunBenchmark (std::string_view test_name) {
38+ const auto impl_filter = env::get<std::string>(" PPC_PERF_IMPL_FILTER" );
39+ const auto category_filter = env::get<std::string>(" PPC_PERF_CATEGORY_FILTER" );
40+ const auto impl_filter_value = impl_filter.has_value () ? std::string_view (impl_filter.value ()) : std::string_view{};
41+ const auto category_filter_value =
42+ category_filter.has_value () ? std::string_view (category_filter.value ()) : std::string_view{};
43+ return ContainsFilterToken (test_name, impl_filter_value) && ContainsFilterToken (test_name, category_filter_value);
44+ }
45+
46+ inline void CheckPerfMode (ppc::performance::PerfResults::TypeOfRunning mode) {
47+ if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ||
48+ mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun ) {
49+ return ;
50+ }
51+ std::stringstream err_msg{};
52+ err_msg << ' \n ' << " The type of performance check for the task was not selected.\n " ;
53+ throw std::runtime_error (err_msg.str ().c_str ());
54+ }
55+
56+ template <typename InType, typename OutType>
57+ void RunTaskPipeline (const ppc::task::TaskPtr<InType, OutType> &task) {
58+ task->Validation ();
59+ task->PreProcessing ();
60+ task->Run ();
61+ task->PostProcessing ();
62+ }
63+
64+ inline std::function<double ()> MakeTechnologyTimer (ppc::task::TypeOfTask task_type) {
65+ if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL ) {
66+ return [] { return GetTimeMPI (); };
67+ }
68+ if (task_type == ppc::task::TypeOfTask::kOMP ) {
69+ return [] { return omp_get_wtime (); };
70+ }
71+ if (task_type == ppc::task::TypeOfTask::kTBB ) {
72+ const auto t0 = tbb::tick_count::now ();
73+ return [t0] { return (tbb::tick_count::now () - t0).seconds (); };
74+ }
75+ if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL ) {
76+ const auto t0 = std::chrono::high_resolution_clock::now ();
77+ return [t0] {
78+ const auto now = std::chrono::high_resolution_clock::now ();
79+ const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count ();
80+ return static_cast <double >(ns) * 1e-9 ;
81+ };
82+ }
83+ throw std::runtime_error (" The task type is not supported for performance testing." );
84+ }
85+
86+ inline double MaxElapsedTimeAcrossMpiRanks (double elapsed, ppc::task::TypeOfTask task_type) {
87+ if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL ) {
88+ return elapsed;
89+ }
90+ double max_elapsed = elapsed;
91+ MPI_Allreduce (&elapsed, &max_elapsed, 1 , MPI_DOUBLE , MPI_MAX , MPI_COMM_WORLD );
92+ return max_elapsed;
93+ }
94+
95+ template <typename InType, typename OutType>
96+ double RunTaskForBenchmark (const ppc::task::TaskPtr<InType, OutType> &task,
97+ ppc::performance::PerfResults::TypeOfRunning mode) {
98+ const auto task_type = task->GetDynamicTypeOfTask ();
99+ const auto timer = MakeTechnologyTimer (task_type);
100+ task->GetStateOfTesting () = ppc::task::StateOfTesting::kPerf ;
101+ if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ) {
102+ SynchronizeMpiRanks ();
103+ const double begin = timer ();
104+ RunTaskPipeline (task);
105+ return MaxElapsedTimeAcrossMpiRanks (timer () - begin, task_type);
106+ }
107+
108+ task->Validation ();
109+ task->PreProcessing ();
110+ SynchronizeMpiRanks ();
111+ const double begin = timer ();
112+ task->Run ();
113+ const double elapsed = timer () - begin;
114+ task->PostProcessing ();
115+ return MaxElapsedTimeAcrossMpiRanks (elapsed, task_type);
116+ }
117+
118+ inline std::string MakeBenchmarkName (const std::string &test_name, ppc::performance::PerfResults::TypeOfRunning mode) {
119+ return test_name + " /" + ppc::performance::GetStringParamName (mode);
120+ }
121+
122+ } // namespace detail
26123
27124template <typename InType, typename OutType>
28125using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string,
@@ -47,27 +144,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
47144 virtual InType GetTestInputData () = 0;
48145
49146 virtual void SetPerfAttributes (ppc::performance::PerfAttr &perf_attrs) {
50- if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kMPI ||
51- task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kALL ) {
52- const double t0 = GetTimeMPI ();
53- perf_attrs.current_timer = [t0] { return GetTimeMPI () - t0; };
54- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kOMP ) {
55- const double t0 = omp_get_wtime ();
56- perf_attrs.current_timer = [t0] { return omp_get_wtime () - t0; };
57- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kSEQ ||
58- task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kSTL ) {
59- const auto t0 = std::chrono::high_resolution_clock::now ();
60- perf_attrs.current_timer = [t0] {
61- auto now = std::chrono::high_resolution_clock::now ();
62- auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count ();
63- return static_cast <double >(ns) * 1e-9 ;
64- };
65- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kTBB ) {
66- const auto t0 = tbb::tick_count::now ();
67- perf_attrs.current_timer = [t0] { return (tbb::tick_count::now () - t0).seconds (); };
68- } else {
69- throw std::runtime_error (" The task type is not supported for performance testing." );
70- }
147+ perf_attrs.current_timer = detail::MakeTechnologyTimer (task_->GetDynamicTypeOfTask ());
71148 }
72149
73150 void ExecuteTest (const PerfTestParam<InType, OutType> &perf_test_param) {
@@ -80,35 +157,42 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
80157 // A single perf test body may execute several implementations; do not abort the enabled ones.
81158 return ;
82159 }
160+ if (!detail::ShouldRunBenchmark (test_name)) {
161+ return ;
162+ }
163+ detail::CheckPerfMode (mode);
83164
84165 const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest (test_name);
85166
86- task_ = task_getter (GetTestInputData ());
87- ppc::performance::Perf perf (task_);
88- ppc::performance::PerfAttr perf_attr;
167+ const auto input_data = GetTestInputData ();
168+ task_ = task_getter (input_data);
89169 SynchronizeMpiRanks ();
90- SetPerfAttributes (perf_attr);
91-
92- if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ) {
93- perf.PipelineRun (perf_attr);
94- } else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun ) {
95- perf.TaskRun (perf_attr);
96- } else {
97- std::stringstream err_msg;
98- err_msg << ' \n ' << " The type of performance check for the task was not selected.\n " ;
99- throw std::runtime_error (err_msg.str ().c_str ());
100- }
101-
102- if (GetMPIRank () == 0 ) {
103- perf.PrintPerfStatistic (test_name);
104- }
170+ detail::RunTaskPipeline (task_);
105171
106172 OutType output_data = task_->GetOutput ();
107173 ASSERT_TRUE (CheckTestOutputData (output_data));
174+
175+ ppc::performance::PerfAttr perf_attr;
176+ SetPerfAttributes (perf_attr);
177+ const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running ;
178+
179+ const auto benchmark_name = detail::MakeBenchmarkName (test_name, mode);
180+ benchmark::RegisterBenchmark (benchmark_name,
181+ [task_getter, input_data, mode](benchmark::State &state) {
182+ for (auto _ : state) {
183+ auto task = task_getter (input_data);
184+ const double elapsed = detail::RunTaskForBenchmark (task, mode);
185+ state.SetIterationTime (elapsed);
186+ benchmark::DoNotOptimize (task->GetOutput ());
187+ }
188+ })
189+ ->UseManualTime ()
190+ ->Unit (benchmark::kSecond )
191+ ->Iterations (static_cast <std::int64_t >(num_iterations));
108192 }
109193
110194 private:
111- ppc::task::TaskPtr<InType, OutType> task_;
195+ ppc::task::TaskPtr<InType, OutType> task_{} ;
112196};
113197
114198template <typename TaskType, typename InputType>
@@ -129,7 +213,7 @@ auto TupleToGTestValuesImpl(const Tuple &tup, std::index_sequence<I...> /*unused
129213
130214template <typename Tuple>
131215auto TupleToGTestValues (Tuple &&tup) {
132- constexpr size_t kSize = std::tuple_size_v<std::decay_t <Tuple>>;
216+ constexpr std:: size_t kSize { std::tuple_size_v<std::decay_t <Tuple>>} ;
133217 return TupleToGTestValuesImpl (std::forward<Tuple>(tup), std::make_index_sequence<kSize >{});
134218}
135219
0 commit comments