Skip to content

Commit 15cd272

Browse files
committed
Use Google Benchmark for performance tables
1 parent c358e80 commit 15cd272

20 files changed

Lines changed: 790 additions & 804 deletions

.github/workflows/perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
tar -xzvf ubuntu-gcc-install-ubuntu-24.04.tar.gz -C install
3131
- name: Run perf tests
3232
run: |
33-
bash -e scripts/generate_perf_results.sh
33+
scripts/run_tests.py --running-type=performance
3434
env:
3535
PPC_NUM_PROC: 2
3636
PPC_NUM_THREADS: 2
@@ -68,7 +68,7 @@ jobs:
6868
tar -xzvf macos-clang-install.tar.gz -C install
6969
- name: Run perf tests
7070
run: |
71-
bash -e scripts/generate_perf_results.sh
71+
scripts/run_tests.py --running-type=performance
7272
env:
7373
PPC_NUM_PROC: 1
7474
PPC_NUM_THREADS: 2

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@
1313
[submodule "3rdparty/libenvpp"]
1414
path = 3rdparty/libenvpp
1515
url = https://github.com/ph3at/libenvpp
16+
[submodule "3rdparty/benchmark"]
17+
path = 3rdparty/benchmark
18+
url = https://github.com/google/benchmark

3rdparty/benchmark

Submodule benchmark added at a846068

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ endforeach()
4444

4545
message( STATUS "PPC step: Setup external projects" )
4646
include(cmake/gtest.cmake)
47+
include(cmake/benchmark.cmake)
4748

4849
############################## Modules ##############################
4950

cmake/benchmark.cmake

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
include_guard()
2+
3+
include(ExternalProject)
4+
5+
ExternalProject_Add(
6+
ppc_benchmark
7+
SOURCE_DIR "${CMAKE_SOURCE_DIR}/3rdparty/benchmark"
8+
PREFIX "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark"
9+
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
10+
INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
11+
EXCLUDE_FROM_ALL TRUE
12+
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
13+
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
14+
-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
15+
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
16+
-DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}
17+
-DCMAKE_CXX_STANDARD_REQUIRED=${CMAKE_CXX_STANDARD_REQUIRED}
18+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
19+
${PPC_EXTERNAL_PROJECT_CMAKE_ARGS}
20+
-DCMAKE_C_FLAGS=-w
21+
-DCMAKE_CXX_FLAGS=-w
22+
-DBENCHMARK_ENABLE_TESTING=OFF
23+
-DBENCHMARK_ENABLE_GTEST_TESTS=OFF
24+
-DBENCHMARK_ENABLE_WERROR=OFF
25+
-DBENCHMARK_ENABLE_INSTALL=ON
26+
-DBENCHMARK_ENABLE_LIBPFM=OFF
27+
BUILD_COMMAND
28+
"${CMAKE_COMMAND}" --build "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
29+
--config $<CONFIG> --parallel
30+
INSTALL_COMMAND
31+
"${CMAKE_COMMAND}" --install
32+
"${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build" --config $<CONFIG>
33+
--prefix "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
34+
${PPC_EXTERNAL_PROJECT_LOG_ARGS})
35+
36+
function(ppc_include_benchmark target_name)
37+
target_include_directories(
38+
${target_name} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/benchmark/include)
39+
target_compile_definitions(${target_name} PUBLIC BENCHMARK_STATIC_DEFINE)
40+
endfunction()
41+
42+
function(ppc_link_benchmark target_name)
43+
ppc_include_benchmark(${target_name})
44+
add_dependencies(${target_name} ppc_benchmark)
45+
target_link_directories(${target_name} PUBLIC
46+
"${CMAKE_BINARY_DIR}/ppc_benchmark/install/lib")
47+
target_link_libraries(${target_name} PUBLIC benchmark Threads::Threads)
48+
if(WIN32)
49+
target_link_libraries(${target_name} PUBLIC shlwapi)
50+
endif()
51+
endfunction()

modules/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set_target_properties(${exec_func_lib} PROPERTIES LINKER_LANGUAGE CXX)
2626
target_include_directories(
2727
${exec_func_lib} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty
2828
${CMAKE_SOURCE_DIR}/modules ${CMAKE_SOURCE_DIR}/tasks)
29+
ppc_include_benchmark(${exec_func_lib})
2930

3031
foreach(
3132
link

modules/util/include/perf_test_util.hpp

Lines changed: 127 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
#pragma once
22

33
#include <gtest/gtest.h>
4+
// Keep Google Test first to match clang-tidy's LLVM include order.
5+
#include <benchmark/benchmark.h>
6+
#include <mpi.h>
47
#include <omp.h>
58
#include <tbb/tick_count.h>
69

710
#include <chrono>
811
#include <cstddef>
12+
#include <cstdint>
913
#include <functional>
1014
#include <sstream>
1115
#include <stdexcept>
@@ -21,8 +25,101 @@
2125

2226
namespace ppc::util {
2327

24-
double GetTimeMPI();
25-
int GetMPIRank();
28+
namespace detail {
29+
30+
inline bool ContainsFilterToken(std::string_view value, std::string_view filter) {
31+
if (filter.empty()) {
32+
return true;
33+
}
34+
return value.contains(filter);
35+
}
36+
37+
inline bool ShouldRunBenchmark(std::string_view test_name) {
38+
const auto impl_filter = env::get<std::string>("PPC_PERF_IMPL_FILTER");
39+
const auto category_filter = env::get<std::string>("PPC_PERF_CATEGORY_FILTER");
40+
const auto impl_filter_value = impl_filter.has_value() ? std::string_view(impl_filter.value()) : std::string_view{};
41+
const auto category_filter_value =
42+
category_filter.has_value() ? std::string_view(category_filter.value()) : std::string_view{};
43+
return ContainsFilterToken(test_name, impl_filter_value) && ContainsFilterToken(test_name, category_filter_value);
44+
}
45+
46+
inline void CheckPerfMode(ppc::performance::PerfResults::TypeOfRunning mode) {
47+
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline ||
48+
mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) {
49+
return;
50+
}
51+
std::stringstream err_msg{};
52+
err_msg << '\n' << "The type of performance check for the task was not selected.\n";
53+
throw std::runtime_error(err_msg.str().c_str());
54+
}
55+
56+
template <typename InType, typename OutType>
57+
void RunTaskPipeline(const ppc::task::TaskPtr<InType, OutType> &task) {
58+
task->Validation();
59+
task->PreProcessing();
60+
task->Run();
61+
task->PostProcessing();
62+
}
63+
64+
inline std::function<double()> MakeTechnologyTimer(ppc::task::TypeOfTask task_type) {
65+
if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL) {
66+
return [] { return GetTimeMPI(); };
67+
}
68+
if (task_type == ppc::task::TypeOfTask::kOMP) {
69+
return [] { return omp_get_wtime(); };
70+
}
71+
if (task_type == ppc::task::TypeOfTask::kTBB) {
72+
const auto t0 = tbb::tick_count::now();
73+
return [t0] { return (tbb::tick_count::now() - t0).seconds(); };
74+
}
75+
if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL) {
76+
const auto t0 = std::chrono::high_resolution_clock::now();
77+
return [t0] {
78+
const auto now = std::chrono::high_resolution_clock::now();
79+
const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
80+
return static_cast<double>(ns) * 1e-9;
81+
};
82+
}
83+
throw std::runtime_error("The task type is not supported for performance testing.");
84+
}
85+
86+
inline double MaxElapsedTimeAcrossMpiRanks(double elapsed, ppc::task::TypeOfTask task_type) {
87+
if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL) {
88+
return elapsed;
89+
}
90+
double max_elapsed = elapsed;
91+
MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
92+
return max_elapsed;
93+
}
94+
95+
template <typename InType, typename OutType>
96+
double RunTaskForBenchmark(const ppc::task::TaskPtr<InType, OutType> &task,
97+
ppc::performance::PerfResults::TypeOfRunning mode) {
98+
const auto task_type = task->GetDynamicTypeOfTask();
99+
const auto timer = MakeTechnologyTimer(task_type);
100+
task->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf;
101+
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) {
102+
SynchronizeMpiRanks();
103+
const double begin = timer();
104+
RunTaskPipeline(task);
105+
return MaxElapsedTimeAcrossMpiRanks(timer() - begin, task_type);
106+
}
107+
108+
task->Validation();
109+
task->PreProcessing();
110+
SynchronizeMpiRanks();
111+
const double begin = timer();
112+
task->Run();
113+
const double elapsed = timer() - begin;
114+
task->PostProcessing();
115+
return MaxElapsedTimeAcrossMpiRanks(elapsed, task_type);
116+
}
117+
118+
inline std::string MakeBenchmarkName(const std::string &test_name, ppc::performance::PerfResults::TypeOfRunning mode) {
119+
return test_name + "/" + ppc::performance::GetStringParamName(mode);
120+
}
121+
122+
} // namespace detail
26123

27124
template <typename InType, typename OutType>
28125
using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string,
@@ -47,27 +144,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
47144
virtual InType GetTestInputData() = 0;
48145

49146
virtual void SetPerfAttributes(ppc::performance::PerfAttr &perf_attrs) {
50-
if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kMPI ||
51-
task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kALL) {
52-
const double t0 = GetTimeMPI();
53-
perf_attrs.current_timer = [t0] { return GetTimeMPI() - t0; };
54-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kOMP) {
55-
const double t0 = omp_get_wtime();
56-
perf_attrs.current_timer = [t0] { return omp_get_wtime() - t0; };
57-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSEQ ||
58-
task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSTL) {
59-
const auto t0 = std::chrono::high_resolution_clock::now();
60-
perf_attrs.current_timer = [t0] {
61-
auto now = std::chrono::high_resolution_clock::now();
62-
auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
63-
return static_cast<double>(ns) * 1e-9;
64-
};
65-
} else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kTBB) {
66-
const auto t0 = tbb::tick_count::now();
67-
perf_attrs.current_timer = [t0] { return (tbb::tick_count::now() - t0).seconds(); };
68-
} else {
69-
throw std::runtime_error("The task type is not supported for performance testing.");
70-
}
147+
perf_attrs.current_timer = detail::MakeTechnologyTimer(task_->GetDynamicTypeOfTask());
71148
}
72149

73150
void ExecuteTest(const PerfTestParam<InType, OutType> &perf_test_param) {
@@ -80,35 +157,42 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
80157
// A single perf test body may execute several implementations; do not abort the enabled ones.
81158
return;
82159
}
160+
if (!detail::ShouldRunBenchmark(test_name)) {
161+
return;
162+
}
163+
detail::CheckPerfMode(mode);
83164

84165
const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest(test_name);
85166

86-
task_ = task_getter(GetTestInputData());
87-
ppc::performance::Perf perf(task_);
88-
ppc::performance::PerfAttr perf_attr;
167+
const auto input_data = GetTestInputData();
168+
task_ = task_getter(input_data);
89169
SynchronizeMpiRanks();
90-
SetPerfAttributes(perf_attr);
91-
92-
if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) {
93-
perf.PipelineRun(perf_attr);
94-
} else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) {
95-
perf.TaskRun(perf_attr);
96-
} else {
97-
std::stringstream err_msg;
98-
err_msg << '\n' << "The type of performance check for the task was not selected.\n";
99-
throw std::runtime_error(err_msg.str().c_str());
100-
}
101-
102-
if (GetMPIRank() == 0) {
103-
perf.PrintPerfStatistic(test_name);
104-
}
170+
detail::RunTaskPipeline(task_);
105171

106172
OutType output_data = task_->GetOutput();
107173
ASSERT_TRUE(CheckTestOutputData(output_data));
174+
175+
ppc::performance::PerfAttr perf_attr;
176+
SetPerfAttributes(perf_attr);
177+
const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running;
178+
179+
const auto benchmark_name = detail::MakeBenchmarkName(test_name, mode);
180+
benchmark::RegisterBenchmark(benchmark_name,
181+
[task_getter, input_data, mode](benchmark::State &state) {
182+
for (auto _ : state) {
183+
auto task = task_getter(input_data);
184+
const double elapsed = detail::RunTaskForBenchmark(task, mode);
185+
state.SetIterationTime(elapsed);
186+
benchmark::DoNotOptimize(task->GetOutput());
187+
}
188+
})
189+
->UseManualTime()
190+
->Unit(benchmark::kSecond)
191+
->Iterations(static_cast<std::int64_t>(num_iterations));
108192
}
109193

110194
private:
111-
ppc::task::TaskPtr<InType, OutType> task_;
195+
ppc::task::TaskPtr<InType, OutType> task_{};
112196
};
113197

114198
template <typename TaskType, typename InputType>
@@ -129,7 +213,7 @@ auto TupleToGTestValuesImpl(const Tuple &tup, std::index_sequence<I...> /*unused
129213

130214
template <typename Tuple>
131215
auto TupleToGTestValues(Tuple &&tup) {
132-
constexpr size_t kSize = std::tuple_size_v<std::decay_t<Tuple>>;
216+
constexpr std::size_t kSize{std::tuple_size_v<std::decay_t<Tuple>>};
133217
return TupleToGTestValuesImpl(std::forward<Tuple>(tup), std::make_index_sequence<kSize>{});
134218
}
135219

modules/util/include/util.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ int GetNumThreads();
7575
int GetNumProc();
7676
double GetTaskMaxTime();
7777
double GetPerfMaxTime();
78+
double GetTimeMPI();
79+
int GetMPIRank();
7880
void SynchronizeMpiRanks();
7981

8082
template <typename T>

modules/util/src/func_test_util.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <mpi.h>
22

3-
#include "util/include/perf_test_util.hpp"
3+
#include "util/include/util.hpp"
44

55
double ppc::util::GetTimeMPI() {
66
return MPI_Wtime();

0 commit comments

Comments
 (0)