Skip to content

Commit 84e36bf

Browse files
committed
Use Google Benchmark for performance tables
1 parent c358e80 commit 84e36bf

19 files changed

Lines changed: 759 additions & 802 deletions

.github/workflows/perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
tar -xzvf ubuntu-gcc-install-ubuntu-24.04.tar.gz -C install
3131
- name: Run perf tests
3232
run: |
33-
bash -e scripts/generate_perf_results.sh
33+
scripts/run_tests.py --running-type=performance
3434
env:
3535
PPC_NUM_PROC: 2
3636
PPC_NUM_THREADS: 2
@@ -68,7 +68,7 @@ jobs:
6868
tar -xzvf macos-clang-install.tar.gz -C install
6969
- name: Run perf tests
7070
run: |
71-
bash -e scripts/generate_perf_results.sh
71+
scripts/run_tests.py --running-type=performance
7272
env:
7373
PPC_NUM_PROC: 1
7474
PPC_NUM_THREADS: 2

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@
1313
[submodule "3rdparty/libenvpp"]
1414
path = 3rdparty/libenvpp
1515
url = https://github.com/ph3at/libenvpp
16+
[submodule "3rdparty/benchmark"]
17+
path = 3rdparty/benchmark
18+
url = https://github.com/google/benchmark

3rdparty/benchmark

Submodule benchmark added at a846068

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ endforeach()
4444

4545
message( STATUS "PPC step: Setup external projects" )
4646
include(cmake/gtest.cmake)
47+
include(cmake/benchmark.cmake)
4748

4849
############################## Modules ##############################
4950

cmake/benchmark.cmake

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
include_guard()
2+
3+
include(ExternalProject)
4+
5+
ExternalProject_Add(
6+
ppc_benchmark
7+
SOURCE_DIR "${CMAKE_SOURCE_DIR}/3rdparty/benchmark"
8+
PREFIX "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark"
9+
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
10+
INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
11+
EXCLUDE_FROM_ALL TRUE
12+
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
13+
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
14+
-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
15+
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
16+
-DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}
17+
-DCMAKE_CXX_STANDARD_REQUIRED=${CMAKE_CXX_STANDARD_REQUIRED}
18+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
19+
${PPC_EXTERNAL_PROJECT_CMAKE_ARGS}
20+
-DCMAKE_C_FLAGS=-w
21+
-DCMAKE_CXX_FLAGS=-w
22+
-DBENCHMARK_ENABLE_TESTING=OFF
23+
-DBENCHMARK_ENABLE_GTEST_TESTS=OFF
24+
-DBENCHMARK_ENABLE_WERROR=OFF
25+
-DBENCHMARK_ENABLE_INSTALL=ON
26+
-DBENCHMARK_ENABLE_LIBPFM=OFF
27+
BUILD_COMMAND
28+
"${CMAKE_COMMAND}" --build "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
29+
--config $<CONFIG> --parallel
30+
INSTALL_COMMAND
31+
"${CMAKE_COMMAND}" --install
32+
"${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build" --config $<CONFIG>
33+
--prefix "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
34+
${PPC_EXTERNAL_PROJECT_LOG_ARGS})
35+
36+
function(ppc_link_benchmark target_name)
37+
target_include_directories(
38+
${target_name} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/benchmark/include)
39+
40+
add_dependencies(${target_name} ppc_benchmark)
41+
target_link_directories(${target_name} PUBLIC
42+
"${CMAKE_BINARY_DIR}/ppc_benchmark/install/lib")
43+
target_link_libraries(${target_name} PUBLIC benchmark Threads::Threads)
44+
endfunction()

modules/util/include/perf_test_util.hpp

Lines changed: 121 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
#pragma once
22

3+
#include <benchmark/benchmark.h>
34
#include <gtest/gtest.h>
5+
#include <mpi.h>
46
#include <omp.h>
57
#include <tbb/tick_count.h>
68

79
#include <chrono>
810
#include <cstddef>
11+
#include <cstdint>
12+
#include <cstdlib>
913
#include <functional>
1014
#include <sstream>
1115
#include <stdexcept>
@@ -21,8 +25,97 @@
2125

2226
namespace ppc::util {
2327

24-
double GetTimeMPI();
25-
int GetMPIRank();
28+
namespace detail {
29+
30+
inline bool ContainsFilterToken(std::string_view value, const char *filter_env) {
31+
if (filter_env == nullptr || std::string_view(filter_env).empty()) {
32+
return true;
33+
}
34+
return value.find(filter_env) != std::string_view::npos;
35+
}
36+
37+
inline bool ShouldRunBenchmark(std::string_view test_name) {
38+
return ContainsFilterToken(test_name, std::getenv("PPC_PERF_IMPL_FILTER")) &&
39+
ContainsFilterToken(test_name, std::getenv("PPC_PERF_CATEGORY_FILTER"));
40+
}
41+
42+
inline void CheckPerfMode(ppc::performance::PerfResults::TypeOfRunning mode) {
43+
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ||
44+
mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) {
45+
return;
46+
}
47+
std::stringstream err_msg;
48+
err_msg << '\n' << "The type of performance check for the task was not selected.\n";
49+
throw std::runtime_error(err_msg.str().c_str());
50+
}
51+
52+
template <typename InType, typename OutType>
53+
void RunTaskPipeline(const ppc::task::TaskPtr<InType, OutType> &task) {
54+
task->Validation();
55+
task->PreProcessing();
56+
task->Run();
57+
task->PostProcessing();
58+
}
59+
60+
inline std::function<double()> MakeTechnologyTimer(ppc::task::TypeOfTask task_type) {
61+
if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL) {
62+
return [] { return GetTimeMPI(); };
63+
}
64+
if (task_type == ppc::task::TypeOfTask::kOMP) {
65+
return [] { return omp_get_wtime(); };
66+
}
67+
if (task_type == ppc::task::TypeOfTask::kTBB) {
68+
const auto t0 = tbb::tick_count::now();
69+
return [t0] { return (tbb::tick_count::now() - t0).seconds(); };
70+
}
71+
if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL) {
72+
const auto t0 = std::chrono::high_resolution_clock::now();
73+
return [t0] {
74+
const auto now = std::chrono::high_resolution_clock::now();
75+
const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
76+
return static_cast<double>(ns) * 1e-9;
77+
};
78+
}
79+
throw std::runtime_error("The task type is not supported for performance testing.");
80+
}
81+
82+
inline double MaxElapsedTimeAcrossMpiRanks(double elapsed, ppc::task::TypeOfTask task_type) {
83+
if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL) {
84+
return elapsed;
85+
}
86+
double max_elapsed = elapsed;
87+
MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
88+
return max_elapsed;
89+
}
90+
91+
template <typename InType, typename OutType>
92+
double RunTaskForBenchmark(const ppc::task::TaskPtr<InType, OutType> &task,
93+
ppc::performance::PerfResults::TypeOfRunning mode) {
94+
const auto task_type = task->GetDynamicTypeOfTask();
95+
const auto timer = MakeTechnologyTimer(task_type);
96+
task->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf;
97+
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) {
98+
SynchronizeMpiRanks();
99+
const double begin = timer();
100+
RunTaskPipeline(task);
101+
return MaxElapsedTimeAcrossMpiRanks(timer() - begin, task_type);
102+
}
103+
104+
task->Validation();
105+
task->PreProcessing();
106+
SynchronizeMpiRanks();
107+
const double begin = timer();
108+
task->Run();
109+
const double elapsed = timer() - begin;
110+
task->PostProcessing();
111+
return MaxElapsedTimeAcrossMpiRanks(elapsed, task_type);
112+
}
113+
114+
inline std::string MakeBenchmarkName(const std::string &test_name, ppc::performance::PerfResults::TypeOfRunning mode) {
115+
return test_name + "/" + ppc::performance::GetStringParamName(mode);
116+
}
117+
118+
} // namespace detail
26119

27120
template <typename InType, typename OutType>
28121
using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string,
@@ -47,27 +140,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
47140
virtual InType GetTestInputData() = 0;
48141

49142
virtual void SetPerfAttributes(ppc::performance::PerfAttr &perf_attrs) {
50-
if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kMPI ||
51-
task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kALL) {
52-
const double t0 = GetTimeMPI();
53-
perf_attrs.current_timer = [t0] { return GetTimeMPI() - t0; };
54-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kOMP) {
55-
const double t0 = omp_get_wtime();
56-
perf_attrs.current_timer = [t0] { return omp_get_wtime() - t0; };
57-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSEQ ||
58-
task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSTL) {
59-
const auto t0 = std::chrono::high_resolution_clock::now();
60-
perf_attrs.current_timer = [t0] {
61-
auto now = std::chrono::high_resolution_clock::now();
62-
auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
63-
return static_cast<double>(ns) * 1e-9;
64-
};
65-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kTBB) {
66-
const auto t0 = tbb::tick_count::now();
67-
perf_attrs.current_timer = [t0] { return (tbb::tick_count::now() - t0).seconds(); };
68-
} else {
69-
throw std::runtime_error("The task type is not supported for performance testing.");
70-
}
143+
perf_attrs.current_timer = detail::MakeTechnologyTimer(task_->GetDynamicTypeOfTask());
71144
}
72145

73146
void ExecuteTest(const PerfTestParam<InType, OutType> &perf_test_param) {
@@ -80,31 +153,38 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
80153
// A single perf test body may execute several implementations; do not abort the enabled ones.
81154
return;
82155
}
156+
if (!detail::ShouldRunBenchmark(test_name)) {
157+
return;
158+
}
159+
detail::CheckPerfMode(mode);
83160

84161
const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest(test_name);
85162

86-
task_ = task_getter(GetTestInputData());
87-
ppc::performance::Perf perf(task_);
88-
ppc::performance::PerfAttr perf_attr;
163+
const auto input_data = GetTestInputData();
164+
task_ = task_getter(input_data);
89165
SynchronizeMpiRanks();
90-
SetPerfAttributes(perf_attr);
91-
92-
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) {
93-
perf.PipelineRun(perf_attr);
94-
} else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) {
95-
perf.TaskRun(perf_attr);
96-
} else {
97-
std::stringstream err_msg;
98-
err_msg << '\n' << "The type of performance check for the task was not selected.\n";
99-
throw std::runtime_error(err_msg.str().c_str());
100-
}
101-
102-
if (GetMPIRank() == 0) {
103-
perf.PrintPerfStatistic(test_name);
104-
}
166+
detail::RunTaskPipeline(task_);
105167

106168
OutType output_data = task_->GetOutput();
107169
ASSERT_TRUE(CheckTestOutputData(output_data));
170+
171+
ppc::performance::PerfAttr perf_attr;
172+
SetPerfAttributes(perf_attr);
173+
const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running;
174+
175+
const auto benchmark_name = detail::MakeBenchmarkName(test_name, mode);
176+
benchmark::RegisterBenchmark(benchmark_name,
177+
[task_getter, input_data, mode](benchmark::State &state) {
178+
for (auto _ : state) {
179+
auto task = task_getter(input_data);
180+
const double elapsed = detail::RunTaskForBenchmark(task, mode);
181+
state.SetIterationTime(elapsed);
182+
benchmark::DoNotOptimize(task->GetOutput());
183+
}
184+
})
185+
->UseManualTime()
186+
->Unit(benchmark::kSecond)
187+
->Iterations(static_cast<int64_t>(num_iterations));
108188
}
109189

110190
private:

modules/util/include/util.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ int GetNumThreads();
7575
int GetNumProc();
7676
double GetTaskMaxTime();
7777
double GetPerfMaxTime();
78+
double GetTimeMPI();
79+
int GetMPIRank();
7880
void SynchronizeMpiRanks();
7981

8082
template <typename T>

modules/util/src/func_test_util.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <mpi.h>
22

3-
#include "util/include/perf_test_util.hpp"
3+
#include "util/include/util.hpp"
44

55
double ppc::util::GetTimeMPI() {
66
return MPI_Wtime();

0 commit comments

Comments
 (0)