Skip to content

Commit 8771835

Browse files
committed
Use Google Benchmark for performance tables
1 parent c358e80 commit 8771835

20 files changed

Lines changed: 781 additions & 804 deletions

.github/workflows/perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
tar -xzvf ubuntu-gcc-install-ubuntu-24.04.tar.gz -C install
3131
- name: Run perf tests
3232
run: |
33-
bash -e scripts/generate_perf_results.sh
33+
scripts/run_tests.py --running-type=performance
3434
env:
3535
PPC_NUM_PROC: 2
3636
PPC_NUM_THREADS: 2
@@ -68,7 +68,7 @@ jobs:
6868
tar -xzvf macos-clang-install.tar.gz -C install
6969
- name: Run perf tests
7070
run: |
71-
bash -e scripts/generate_perf_results.sh
71+
scripts/run_tests.py --running-type=performance
7272
env:
7373
PPC_NUM_PROC: 1
7474
PPC_NUM_THREADS: 2

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@
1313
[submodule "3rdparty/libenvpp"]
1414
path = 3rdparty/libenvpp
1515
url = https://github.com/ph3at/libenvpp
16+
[submodule "3rdparty/benchmark"]
17+
path = 3rdparty/benchmark
18+
url = https://github.com/google/benchmark

3rdparty/benchmark

Submodule benchmark added at a846068

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ endforeach()
4444

4545
message( STATUS "PPC step: Setup external projects" )
4646
include(cmake/gtest.cmake)
47+
include(cmake/benchmark.cmake)
4748

4849
############################## Modules ##############################
4950

cmake/benchmark.cmake

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
include_guard()
2+
3+
include(ExternalProject)
4+
5+
ExternalProject_Add(
6+
ppc_benchmark
7+
SOURCE_DIR "${CMAKE_SOURCE_DIR}/3rdparty/benchmark"
8+
PREFIX "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark"
9+
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
10+
INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
11+
EXCLUDE_FROM_ALL TRUE
12+
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
13+
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
14+
-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
15+
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
16+
-DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}
17+
-DCMAKE_CXX_STANDARD_REQUIRED=${CMAKE_CXX_STANDARD_REQUIRED}
18+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
19+
${PPC_EXTERNAL_PROJECT_CMAKE_ARGS}
20+
-DCMAKE_C_FLAGS=-w
21+
-DCMAKE_CXX_FLAGS=-w
22+
-DBENCHMARK_ENABLE_TESTING=OFF
23+
-DBENCHMARK_ENABLE_GTEST_TESTS=OFF
24+
-DBENCHMARK_ENABLE_WERROR=OFF
25+
-DBENCHMARK_ENABLE_INSTALL=ON
26+
-DBENCHMARK_ENABLE_LIBPFM=OFF
27+
BUILD_COMMAND
28+
"${CMAKE_COMMAND}" --build "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
29+
--config $<CONFIG> --parallel
30+
INSTALL_COMMAND
31+
"${CMAKE_COMMAND}" --install
32+
"${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build" --config $<CONFIG>
33+
--prefix "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
34+
${PPC_EXTERNAL_PROJECT_LOG_ARGS})
35+
36+
function(ppc_include_benchmark target_name)
37+
target_include_directories(
38+
${target_name} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/benchmark/include)
39+
endfunction()
40+
41+
function(ppc_link_benchmark target_name)
42+
ppc_include_benchmark(${target_name})
43+
add_dependencies(${target_name} ppc_benchmark)
44+
target_link_directories(${target_name} PUBLIC
45+
"${CMAKE_BINARY_DIR}/ppc_benchmark/install/lib")
46+
target_link_libraries(${target_name} PUBLIC benchmark Threads::Threads)
47+
endfunction()

modules/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set_target_properties(${exec_func_lib} PROPERTIES LINKER_LANGUAGE CXX)
2626
target_include_directories(
2727
${exec_func_lib} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty
2828
${CMAKE_SOURCE_DIR}/modules ${CMAKE_SOURCE_DIR}/tasks)
29+
ppc_include_benchmark(${exec_func_lib})
2930

3031
foreach(
3132
link

modules/util/include/perf_test_util.hpp

Lines changed: 125 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#pragma once
22

3+
#include <benchmark/benchmark.h>
34
#include <gtest/gtest.h>
5+
#include <mpi.h>
46
#include <omp.h>
57
#include <tbb/tick_count.h>
68

@@ -21,8 +23,101 @@
2123

2224
namespace ppc::util {
2325

24-
double GetTimeMPI();
25-
int GetMPIRank();
26+
namespace detail {
27+
28+
inline bool ContainsFilterToken(std::string_view value, std::string_view filter) {
29+
if (filter.empty()) {
30+
return true;
31+
}
32+
return value.find(filter) != std::string_view::npos;
33+
}
34+
35+
inline bool ShouldRunBenchmark(std::string_view test_name) {
36+
const auto impl_filter = env::get<std::string>("PPC_PERF_IMPL_FILTER");
37+
const auto category_filter = env::get<std::string>("PPC_PERF_CATEGORY_FILTER");
38+
const auto impl_filter_value = impl_filter.has_value() ? std::string_view(impl_filter.value()) : std::string_view{};
39+
const auto category_filter_value =
40+
category_filter.has_value() ? std::string_view(category_filter.value()) : std::string_view{};
41+
return ContainsFilterToken(test_name, impl_filter_value) && ContainsFilterToken(test_name, category_filter_value);
42+
}
43+
44+
inline void CheckPerfMode(ppc::performance::PerfResults::TypeOfRunning mode) {
45+
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ||
46+
mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) {
47+
return;
48+
}
49+
std::stringstream err_msg{};
50+
err_msg << '\n' << "The type of performance check for the task was not selected.\n";
51+
throw std::runtime_error(err_msg.str().c_str());
52+
}
53+
54+
template <typename InType, typename OutType>
55+
void RunTaskPipeline(const ppc::task::TaskPtr<InType, OutType> &task) {
56+
task->Validation();
57+
task->PreProcessing();
58+
task->Run();
59+
task->PostProcessing();
60+
}
61+
62+
inline std::function<double()> MakeTechnologyTimer(ppc::task::TypeOfTask task_type) {
63+
if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL) {
64+
return [] { return GetTimeMPI(); };
65+
}
66+
if (task_type == ppc::task::TypeOfTask::kOMP) {
67+
return [] { return omp_get_wtime(); };
68+
}
69+
if (task_type == ppc::task::TypeOfTask::kTBB) {
70+
const auto t0 = tbb::tick_count::now();
71+
return [t0] { return (tbb::tick_count::now() - t0).seconds(); };
72+
}
73+
if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL) {
74+
const auto t0 = std::chrono::high_resolution_clock::now();
75+
return [t0] {
76+
const auto now = std::chrono::high_resolution_clock::now();
77+
const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
78+
return static_cast<double>(ns) * 1e-9;
79+
};
80+
}
81+
throw std::runtime_error("The task type is not supported for performance testing.");
82+
}
83+
84+
inline double MaxElapsedTimeAcrossMpiRanks(double elapsed, ppc::task::TypeOfTask task_type) {
85+
if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL) {
86+
return elapsed;
87+
}
88+
double max_elapsed = elapsed;
89+
MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
90+
return max_elapsed;
91+
}
92+
93+
template <typename InType, typename OutType>
94+
double RunTaskForBenchmark(const ppc::task::TaskPtr<InType, OutType> &task,
95+
ppc::performance::PerfResults::TypeOfRunning mode) {
96+
const auto task_type = task->GetDynamicTypeOfTask();
97+
const auto timer = MakeTechnologyTimer(task_type);
98+
task->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf;
99+
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) {
100+
SynchronizeMpiRanks();
101+
const double begin = timer();
102+
RunTaskPipeline(task);
103+
return MaxElapsedTimeAcrossMpiRanks(timer() - begin, task_type);
104+
}
105+
106+
task->Validation();
107+
task->PreProcessing();
108+
SynchronizeMpiRanks();
109+
const double begin = timer();
110+
task->Run();
111+
const double elapsed = timer() - begin;
112+
task->PostProcessing();
113+
return MaxElapsedTimeAcrossMpiRanks(elapsed, task_type);
114+
}
115+
116+
inline std::string MakeBenchmarkName(const std::string &test_name, ppc::performance::PerfResults::TypeOfRunning mode) {
117+
return test_name + "/" + ppc::performance::GetStringParamName(mode);
118+
}
119+
120+
} // namespace detail
26121

27122
template <typename InType, typename OutType>
28123
using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string,
@@ -47,27 +142,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
47142
virtual InType GetTestInputData() = 0;
48143

49144
virtual void SetPerfAttributes(ppc::performance::PerfAttr &perf_attrs) {
50-
if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kMPI ||
51-
task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kALL) {
52-
const double t0 = GetTimeMPI();
53-
perf_attrs.current_timer = [t0] { return GetTimeMPI() - t0; };
54-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kOMP) {
55-
const double t0 = omp_get_wtime();
56-
perf_attrs.current_timer = [t0] { return omp_get_wtime() - t0; };
57-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSEQ ||
58-
task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSTL) {
59-
const auto t0 = std::chrono::high_resolution_clock::now();
60-
perf_attrs.current_timer = [t0] {
61-
auto now = std::chrono::high_resolution_clock::now();
62-
auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
63-
return static_cast<double>(ns) * 1e-9;
64-
};
65-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kTBB) {
66-
const auto t0 = tbb::tick_count::now();
67-
perf_attrs.current_timer = [t0] { return (tbb::tick_count::now() - t0).seconds(); };
68-
} else {
69-
throw std::runtime_error("The task type is not supported for performance testing.");
70-
}
145+
perf_attrs.current_timer = detail::MakeTechnologyTimer(task_->GetDynamicTypeOfTask());
71146
}
72147

73148
void ExecuteTest(const PerfTestParam<InType, OutType> &perf_test_param) {
@@ -80,35 +155,42 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
80155
// A single perf test body may execute several implementations; do not abort the enabled ones.
81156
return;
82157
}
158+
if (!detail::ShouldRunBenchmark(test_name)) {
159+
return;
160+
}
161+
detail::CheckPerfMode(mode);
83162

84163
const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest(test_name);
85164

86-
task_ = task_getter(GetTestInputData());
87-
ppc::performance::Perf perf(task_);
88-
ppc::performance::PerfAttr perf_attr;
165+
const auto input_data = GetTestInputData();
166+
task_ = task_getter(input_data);
89167
SynchronizeMpiRanks();
90-
SetPerfAttributes(perf_attr);
91-
92-
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) {
93-
perf.PipelineRun(perf_attr);
94-
} else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) {
95-
perf.TaskRun(perf_attr);
96-
} else {
97-
std::stringstream err_msg;
98-
err_msg << '\n' << "The type of performance check for the task was not selected.\n";
99-
throw std::runtime_error(err_msg.str().c_str());
100-
}
101-
102-
if (GetMPIRank() == 0) {
103-
perf.PrintPerfStatistic(test_name);
104-
}
168+
detail::RunTaskPipeline(task_);
105169

106170
OutType output_data = task_->GetOutput();
107171
ASSERT_TRUE(CheckTestOutputData(output_data));
172+
173+
ppc::performance::PerfAttr perf_attr;
174+
SetPerfAttributes(perf_attr);
175+
const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running;
176+
177+
const auto benchmark_name = detail::MakeBenchmarkName(test_name, mode);
178+
benchmark::RegisterBenchmark(benchmark_name,
179+
[task_getter, input_data, mode](benchmark::State &state) {
180+
for (auto _ : state) {
181+
auto task = task_getter(input_data);
182+
const double elapsed = detail::RunTaskForBenchmark(task, mode);
183+
state.SetIterationTime(elapsed);
184+
benchmark::DoNotOptimize(task->GetOutput());
185+
}
186+
})
187+
->UseManualTime()
188+
->Unit(benchmark::kSecond)
189+
->Iterations(static_cast<int64_t>(num_iterations));
108190
}
109191

110192
private:
111-
ppc::task::TaskPtr<InType, OutType> task_;
193+
ppc::task::TaskPtr<InType, OutType> task_{};
112194
};
113195

114196
template <typename TaskType, typename InputType>
@@ -129,7 +211,7 @@ auto TupleToGTestValuesImpl(const Tuple &tup, std::index_sequence<I...> /*unused
129211

130212
template <typename Tuple>
131213
auto TupleToGTestValues(Tuple &&tup) {
132-
constexpr size_t kSize = std::tuple_size_v<std::decay_t<Tuple>>;
214+
constexpr std::size_t kSize{std::tuple_size_v<std::decay_t<Tuple>>};
133215
return TupleToGTestValuesImpl(std::forward<Tuple>(tup), std::make_index_sequence<kSize>{});
134216
}
135217

modules/util/include/util.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ int GetNumThreads();
7575
int GetNumProc();
7676
double GetTaskMaxTime();
7777
double GetPerfMaxTime();
78+
double GetTimeMPI();
79+
int GetMPIRank();
7880
void SynchronizeMpiRanks();
7981

8082
template <typename T>

modules/util/src/func_test_util.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <mpi.h>
22

3-
#include "util/include/perf_test_util.hpp"
3+
#include "util/include/util.hpp"
44

55
double ppc::util::GetTimeMPI() {
66
return MPI_Wtime();

0 commit comments

Comments
 (0)