11#pragma once
22
3+ #include < benchmark/benchmark.h>
34#include < gtest/gtest.h>
5+ #include < mpi.h>
46#include < omp.h>
57#include < tbb/tick_count.h>
68
79#include < chrono>
810#include < cstddef>
11+ #include < cstdint>
12+ #include < cstdlib>
913#include < functional>
1014#include < sstream>
1115#include < stdexcept>
2125
2226namespace ppc ::util {
2327
24- double GetTimeMPI ();
25- int GetMPIRank ();
28+ namespace detail {
29+
30+ inline bool ContainsFilterToken (std::string_view value, const char *filter_env) {
31+ if (filter_env == nullptr || std::string_view (filter_env).empty ()) {
32+ return true ;
33+ }
34+ return value.find (filter_env) != std::string_view::npos;
35+ }
36+
37+ inline bool ShouldRunBenchmark (std::string_view test_name) {
38+ return ContainsFilterToken (test_name, std::getenv (" PPC_PERF_IMPL_FILTER" )) &&
39+ ContainsFilterToken (test_name, std::getenv (" PPC_PERF_CATEGORY_FILTER" ));
40+ }
41+
42+ inline void CheckPerfMode (ppc::performance::PerfResults::TypeOfRunning mode) {
43+ if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ||
44+ mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun ) {
45+ return ;
46+ }
47+ std::stringstream err_msg;
48+ err_msg << ' \n ' << " The type of performance check for the task was not selected.\n " ;
49+ throw std::runtime_error (err_msg.str ().c_str ());
50+ }
51+
52+ template <typename InType, typename OutType>
53+ void RunTaskPipeline (const ppc::task::TaskPtr<InType, OutType> &task) {
54+ task->Validation ();
55+ task->PreProcessing ();
56+ task->Run ();
57+ task->PostProcessing ();
58+ }
59+
60+ inline std::function<double ()> MakeTechnologyTimer (ppc::task::TypeOfTask task_type) {
61+ if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL ) {
62+ return [] { return GetTimeMPI (); };
63+ }
64+ if (task_type == ppc::task::TypeOfTask::kOMP ) {
65+ return [] { return omp_get_wtime (); };
66+ }
67+ if (task_type == ppc::task::TypeOfTask::kTBB ) {
68+ const auto t0 = tbb::tick_count::now ();
69+ return [t0] { return (tbb::tick_count::now () - t0).seconds (); };
70+ }
71+ if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL ) {
72+ const auto t0 = std::chrono::high_resolution_clock::now ();
73+ return [t0] {
74+ const auto now = std::chrono::high_resolution_clock::now ();
75+ const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count ();
76+ return static_cast <double >(ns) * 1e-9 ;
77+ };
78+ }
79+ throw std::runtime_error (" The task type is not supported for performance testing." );
80+ }
81+
82+ inline double MaxElapsedTimeAcrossMpiRanks (double elapsed, ppc::task::TypeOfTask task_type) {
83+ if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL ) {
84+ return elapsed;
85+ }
86+ double max_elapsed = elapsed;
87+ MPI_Allreduce (&elapsed, &max_elapsed, 1 , MPI_DOUBLE , MPI_MAX , MPI_COMM_WORLD );
88+ return max_elapsed;
89+ }
90+
91+ template <typename InType, typename OutType>
92+ double RunTaskForBenchmark (const ppc::task::TaskPtr<InType, OutType> &task,
93+ ppc::performance::PerfResults::TypeOfRunning mode) {
94+ const auto task_type = task->GetDynamicTypeOfTask ();
95+ const auto timer = MakeTechnologyTimer (task_type);
96+ task->GetStateOfTesting () = ppc::task::StateOfTesting::kPerf ;
97+ if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ) {
98+ SynchronizeMpiRanks ();
99+ const double begin = timer ();
100+ RunTaskPipeline (task);
101+ return MaxElapsedTimeAcrossMpiRanks (timer () - begin, task_type);
102+ }
103+
104+ task->Validation ();
105+ task->PreProcessing ();
106+ SynchronizeMpiRanks ();
107+ const double begin = timer ();
108+ task->Run ();
109+ const double elapsed = timer () - begin;
110+ task->PostProcessing ();
111+ return MaxElapsedTimeAcrossMpiRanks (elapsed, task_type);
112+ }
113+
114+ inline std::string MakeBenchmarkName (const std::string &test_name, ppc::performance::PerfResults::TypeOfRunning mode) {
115+ return test_name + " /" + ppc::performance::GetStringParamName (mode);
116+ }
117+
118+ } // namespace detail
26119
27120template <typename InType, typename OutType>
28121using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string,
@@ -47,27 +140,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
47140 virtual InType GetTestInputData () = 0;
48141
49142 virtual void SetPerfAttributes (ppc::performance::PerfAttr &perf_attrs) {
50- if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kMPI ||
51- task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kALL ) {
52- const double t0 = GetTimeMPI ();
53- perf_attrs.current_timer = [t0] { return GetTimeMPI () - t0; };
54- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kOMP ) {
55- const double t0 = omp_get_wtime ();
56- perf_attrs.current_timer = [t0] { return omp_get_wtime () - t0; };
57- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kSEQ ||
58- task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kSTL ) {
59- const auto t0 = std::chrono::high_resolution_clock::now ();
60- perf_attrs.current_timer = [t0] {
61- auto now = std::chrono::high_resolution_clock::now ();
62- auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count ();
63- return static_cast <double >(ns) * 1e-9 ;
64- };
65- } else if (task_->GetDynamicTypeOfTask () == ppc::task::TypeOfTask::kTBB ) {
66- const auto t0 = tbb::tick_count::now ();
67- perf_attrs.current_timer = [t0] { return (tbb::tick_count::now () - t0).seconds (); };
68- } else {
69- throw std::runtime_error (" The task type is not supported for performance testing." );
70- }
143+ perf_attrs.current_timer = detail::MakeTechnologyTimer (task_->GetDynamicTypeOfTask ());
71144 }
72145
73146 void ExecuteTest (const PerfTestParam<InType, OutType> &perf_test_param) {
@@ -80,31 +153,38 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
80153 // A single perf test body may execute several implementations; do not abort the enabled ones.
81154 return ;
82155 }
156+ if (!detail::ShouldRunBenchmark (test_name)) {
157+ return ;
158+ }
159+ detail::CheckPerfMode (mode);
83160
84161 const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest (test_name);
85162
86- task_ = task_getter (GetTestInputData ());
87- ppc::performance::Perf perf (task_);
88- ppc::performance::PerfAttr perf_attr;
163+ const auto input_data = GetTestInputData ();
164+ task_ = task_getter (input_data);
89165 SynchronizeMpiRanks ();
90- SetPerfAttributes (perf_attr);
91-
92- if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ) {
93- perf.PipelineRun (perf_attr);
94- } else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun ) {
95- perf.TaskRun (perf_attr);
96- } else {
97- std::stringstream err_msg;
98- err_msg << ' \n ' << " The type of performance check for the task was not selected.\n " ;
99- throw std::runtime_error (err_msg.str ().c_str ());
100- }
101-
102- if (GetMPIRank () == 0 ) {
103- perf.PrintPerfStatistic (test_name);
104- }
166+ detail::RunTaskPipeline (task_);
105167
106168 OutType output_data = task_->GetOutput ();
107169 ASSERT_TRUE (CheckTestOutputData (output_data));
170+
171+ ppc::performance::PerfAttr perf_attr;
172+ SetPerfAttributes (perf_attr);
173+ const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running ;
174+
175+ const auto benchmark_name = detail::MakeBenchmarkName (test_name, mode);
176+ benchmark::RegisterBenchmark (benchmark_name,
177+ [task_getter, input_data, mode](benchmark::State &state) {
178+ for (auto _ : state) {
179+ auto task = task_getter (input_data);
180+ const double elapsed = detail::RunTaskForBenchmark (task, mode);
181+ state.SetIterationTime (elapsed);
182+ benchmark::DoNotOptimize (task->GetOutput ());
183+ }
184+ })
185+ ->UseManualTime ()
186+ ->Unit (benchmark::kSecond )
187+ ->Iterations (static_cast <int64_t >(num_iterations));
108188 }
109189
110190 private:
0 commit comments