11#pragma once
22
3+ #include < benchmark/benchmark.h>
34#include < gtest/gtest.h>
5+ #include < mpi.h>
46#include < omp.h>
57#include < tbb/tick_count.h>
68
2123
2224namespace ppc ::util {
2325
24- double GetTimeMPI ();
25- int GetMPIRank ();
26+ namespace detail {
27+
28+ inline bool ContainsFilterToken (std::string_view value, std::string_view filter) {
29+ if (filter.empty ()) {
30+ return true ;
31+ }
32+ return value.find (filter) != std::string_view::npos;
33+ }
34+
35+ inline bool ShouldRunBenchmark (std::string_view test_name) {
36+ const auto impl_filter = env::get<std::string>(" PPC_PERF_IMPL_FILTER" );
37+ const auto category_filter = env::get<std::string>(" PPC_PERF_CATEGORY_FILTER" );
38+ const auto impl_filter_value = impl_filter.has_value () ? std::string_view (impl_filter.value ()) : std::string_view{};
39+ const auto category_filter_value =
40+ category_filter.has_value () ? std::string_view (category_filter.value ()) : std::string_view{};
41+ return ContainsFilterToken (test_name, impl_filter_value) && ContainsFilterToken (test_name, category_filter_value);
42+ }
43+
44+ inline void CheckPerfMode (ppc::performance::PerfResults::TypeOfRunning mode) {
45+ if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ||
46+ mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun ) {
47+ return ;
48+ }
49+ std::stringstream err_msg{};
50+ err_msg << ' \n ' << " The type of performance check for the task was not selected.\n " ;
51+ throw std::runtime_error (err_msg.str ().c_str ());
52+ }
53+
54+ template <typename InType, typename OutType>
55+ void RunTaskPipeline (const ppc::task::TaskPtr<InType, OutType> &task) {
56+ task->Validation ();
57+ task->PreProcessing ();
58+ task->Run ();
59+ task->PostProcessing ();
60+ }
61+
62+ inline std::function<double ()> MakeTechnologyTimer (ppc::task::TypeOfTask task_type) {
63+ if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL ) {
64+ return [] { return GetTimeMPI (); };
65+ }
66+ if (task_type == ppc::task::TypeOfTask::kOMP ) {
67+ return [] { return omp_get_wtime (); };
68+ }
69+ if (task_type == ppc::task::TypeOfTask::kTBB ) {
70+ const auto t0 = tbb::tick_count::now ();
71+ return [t0] { return (tbb::tick_count::now () - t0).seconds (); };
72+ }
73+ if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL ) {
74+ const auto t0 = std::chrono::high_resolution_clock::now ();
75+ return [t0] {
76+ const auto now = std::chrono::high_resolution_clock::now ();
77+ const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count ();
78+ return static_cast <double >(ns) * 1e-9 ;
79+ };
80+ }
81+ throw std::runtime_error (" The task type is not supported for performance testing." );
82+ }
83+
84+ inline double MaxElapsedTimeAcrossMpiRanks (double elapsed, ppc::task::TypeOfTask task_type) {
85+ if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL ) {
86+ return elapsed;
87+ }
88+ double max_elapsed = elapsed;
89+ MPI_Allreduce (&elapsed, &max_elapsed, 1 , MPI_DOUBLE , MPI_MAX , MPI_COMM_WORLD );
90+ return max_elapsed;
91+ }
92+
93+ template <typename InType, typename OutType>
94+ double RunTaskForBenchmark (const ppc::task::TaskPtr<InType, OutType> &task,
95+ ppc::performance::PerfResults::TypeOfRunning mode) {
96+ const auto task_type = task->GetDynamicTypeOfTask ();
97+ const auto timer = MakeTechnologyTimer (task_type);
98+ task->GetStateOfTesting () = ppc::task::StateOfTesting::kPerf ;
99+ if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ) {
100+ SynchronizeMpiRanks ();
101+ const double begin = timer ();
102+ RunTaskPipeline (task);
103+ return MaxElapsedTimeAcrossMpiRanks (timer () - begin, task_type);
104+ }
105+
106+ task->Validation ();
107+ task->PreProcessing ();
108+ SynchronizeMpiRanks ();
109+ const double begin = timer ();
110+ task->Run ();
111+ const double elapsed = timer () - begin;
112+ task->PostProcessing ();
113+ return MaxElapsedTimeAcrossMpiRanks (elapsed, task_type);
114+ }
115+
116+ inline std::string MakeBenchmarkName (const std::string &test_name, ppc::performance::PerfResults::TypeOfRunning mode) {
117+ return test_name + " /" + ppc::performance::GetStringParamName (mode);
118+ }
119+
120+ } // namespace detail
26121
27122template <typename InType, typename OutType>
28123using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string,
@@ -47,27 +142,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
47142 virtual InType GetTestInputData () = 0;
48143
49144 virtual void SetPerfAttributes (ppc::performance::PerfAttr &perf_attrs) {
50- if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kMPI ||
51- task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kALL ) {
52- const double t0 = GetTimeMPI ();
53- perf_attrs.current_timer = [t0] { return GetTimeMPI () - t0; };
54- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kOMP ) {
55- const double t0 = omp_get_wtime ();
56- perf_attrs.current_timer = [t0] { return omp_get_wtime () - t0; };
57- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kSEQ ||
58- task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kSTL ) {
59- const auto t0 = std::chrono::high_resolution_clock::now ();
60- perf_attrs.current_timer = [t0] {
61- auto now = std::chrono::high_resolution_clock::now ();
62- auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count ();
63- return static_cast <double >(ns) * 1e-9 ;
64- };
65- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kTBB ) {
66- const auto t0 = tbb::tick_count::now ();
67- perf_attrs.current_timer = [t0] { return (tbb::tick_count::now () - t0).seconds (); };
68- } else {
69- throw std::runtime_error (" The task type is not supported for performance testing." );
70- }
145+ perf_attrs.current_timer = detail::MakeTechnologyTimer (task_->GetDynamicTypeOfTask ());
71146 }
72147
73148 void ExecuteTest (const PerfTestParam<InType, OutType> &perf_test_param) {
@@ -80,35 +155,42 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
80155 // A single perf test body may execute several implementations; do not abort the enabled ones.
81156 return ;
82157 }
158+ if (!detail::ShouldRunBenchmark (test_name)) {
159+ return ;
160+ }
161+ detail::CheckPerfMode (mode);
83162
84163 const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest (test_name);
85164
86- task_ = task_getter (GetTestInputData ());
87- ppc::performance::Perf perf (task_);
88- ppc::performance::PerfAttr perf_attr;
165+ const auto input_data = GetTestInputData ();
166+ task_ = task_getter (input_data);
89167 SynchronizeMpiRanks ();
90- SetPerfAttributes (perf_attr);
91-
92- if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ) {
93- perf.PipelineRun (perf_attr);
94- } else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun ) {
95- perf.TaskRun (perf_attr);
96- } else {
97- std::stringstream err_msg;
98- err_msg << ' \n ' << " The type of performance check for the task was not selected.\n " ;
99- throw std::runtime_error (err_msg.str ().c_str ());
100- }
101-
102- if (GetMPIRank () == 0 ) {
103- perf.PrintPerfStatistic (test_name);
104- }
168+ detail::RunTaskPipeline (task_);
105169
106170 OutType output_data = task_->GetOutput ();
107171 ASSERT_TRUE (CheckTestOutputData (output_data));
172+
173+ ppc::performance::PerfAttr perf_attr;
174+ SetPerfAttributes (perf_attr);
175+ const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running ;
176+
177+ const auto benchmark_name = detail::MakeBenchmarkName (test_name, mode);
178+ benchmark::RegisterBenchmark (benchmark_name,
179+ [task_getter, input_data, mode](benchmark::State &state) {
180+ for (auto _ : state) {
181+ auto task = task_getter (input_data);
182+ const double elapsed = detail::RunTaskForBenchmark (task, mode);
183+ state.SetIterationTime (elapsed);
184+ benchmark::DoNotOptimize (task->GetOutput ());
185+ }
186+ })
187+ ->UseManualTime ()
188+ ->Unit (benchmark::kSecond )
189+ ->Iterations (static_cast <int64_t >(num_iterations));
108190 }
109191
110192 private:
111- ppc::task::TaskPtr<InType, OutType> task_;
193+ ppc::task::TaskPtr<InType, OutType> task_{} ;
112194};
113195
114196template <typename TaskType, typename InputType>
@@ -129,7 +211,7 @@ auto TupleToGTestValuesImpl(const Tuple &tup, std::index_sequence<I...> /*unused
129211
130212template <typename Tuple>
131213auto TupleToGTestValues (Tuple &&tup) {
132- constexpr size_t kSize = std::tuple_size_v<std::decay_t <Tuple>>;
214+ constexpr std:: size_t kSize { std::tuple_size_v<std::decay_t <Tuple>>} ;
133215 return TupleToGTestValuesImpl (std::forward<Tuple>(tup), std::make_index_sequence<kSize >{});
134216}
135217
0 commit comments