Skip to content

Commit 1ca9370

Browse files
Flatten apply functions
1 parent 8ce1c69 commit 1ca9370

2 files changed

Lines changed: 11 additions & 12 deletions

File tree

.github/workflows/standalone-benchmark.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
STANDALONE_DIR: /root/standalone
1919
BENCHMARK_CSV: standalone_cpu.csv
2020
PROFILER_CSV: profiler_cpu.csv
21-
TIMING_CA: ./ca -e 50kHz -c --seed 0 --sync --runsInit 0 --PROCresetTimers 1 --PROCdebugMarkdown 1 --debug 1 # Add --runs 42 for benchmark runs
21+
TIMING_CA: ./ca -e 50kHz -c --seed 0 --sync --runsInit 0 --PROCresetTimers 1 --PROCdebugMarkdown 1 # Add --runs 42 for benchmark runs
2222
CC: /opt/gcc/bin/gcc #/opt/clang-p2996/bin/clang
2323
CXX: /opt/gcc/bin/g++ #/opt/clang-p2996/bin/clang++
2424

@@ -73,7 +73,7 @@ jobs:
7373
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
7474
export PATH=/opt/gcc/bin:$PATH && export LD_LIBRARY_PATH=/opt/gcc/lib64:/opt/gcc/lib:$LD_LIBRARY_PATH
7575
cd ${STANDALONE_DIR}
76-
${TIMING_CA} --runs 42 --PROCdebugCSV /root/${BENCHMARK_CSV}
76+
${TIMING_CA} --runs 12 --PROCdebugCSV /root/${BENCHMARK_CSV} --debug 1
7777
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_standalone.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/summary_${BENCHMARK_CSV}
7878
rm -rf ${STANDALONE_DIR}/*.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
7979

GPU/Common/MemLayout.h

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,33 +100,33 @@ constexpr auto apply_unary(Self &self, FunctionObject&& f) {
100100

101101
// apply on skeleton struct S<F>
102102
template <class FunctionObject, template <template <class> class> class S, template <class> class F>
103-
constexpr auto apply(S<F> &self, FunctionObject&& f) {
103+
__attribute__((flatten)) constexpr auto apply(S<F> &self, FunctionObject&& f) {
104104
return apply_unary(self, std::forward<FunctionObject&&>(f));
105105
}
106106

107107
template <class FunctionObject, template <template <class> class> class S, template <class> class F>
108-
constexpr auto apply(const S<F> &self, FunctionObject&& f) {
108+
__attribute__((flatten)) constexpr auto apply(const S<F> &self, FunctionObject&& f) {
109109
return apply_unary(self, std::forward<FunctionObject&&>(f));
110110
}
111111

112112
// apply on wrappers, forwarding to the base type
113113
template <class FunctionObject, class Self>
114114
requires requires { typename Self::Base; }
115-
constexpr auto apply(Self &self, FunctionObject&& f) {
115+
__attribute__((flatten)) constexpr auto apply(Self &self, FunctionObject&& f) {
116116
return apply_unary<typename Self::Base>(self, std::forward<FunctionObject&&>(f));
117117
}
118118

119119
template <class FunctionObject, class Self>
120120
requires requires { typename Self::Base; }
121-
constexpr auto apply(const Self &self, FunctionObject&& f) {
121+
__attribute__((flatten)) constexpr auto apply(const Self &self, FunctionObject&& f) {
122122
return apply_unary<const typename Self::Base>(self, std::forward<FunctionObject&&>(f));
123123
}
124124

125125

126126
// template <class FunctionObject, class Self, class Other>
127127
// constexpr auto apply(Self &self, Other &other, FunctionObject&& f) {
128128
template <class Self, class Other, class FunctionObject>
129-
constexpr auto apply_binary(Self &self, Other &other, FunctionObject&& f) {
129+
__attribute__((flatten)) constexpr auto apply_binary(Self &self, Other &other, FunctionObject&& f) {
130130
auto construct_output = [&]<size_t... Is>(std::index_sequence<Is...>) -> Self {
131131
return {f(
132132
self.[:nsdms(^^Self)[Is]:], other.[:nsdms(^^Other)[Is]:])...};
@@ -136,25 +136,24 @@ constexpr auto apply_binary(Self &self, Other &other, FunctionObject&& f) {
136136
}
137137

138138
template <class FunctionObject, template <template <class> class> class S, template <class> class F_self, template <class> class F_other>
139-
constexpr auto apply(S<F_self> &self, S<F_other> &other, FunctionObject&& f) {
139+
__attribute__((flatten)) constexpr auto apply(S<F_self> &self, S<F_other> &other, FunctionObject&& f) {
140140
return apply_binary(self, other, std::forward<FunctionObject&&>(f));
141141
}
142142

143143
template <class FunctionObject, template <template <class> class> class S, template <class> class F_self, template <class> class F_other>
144-
constexpr auto apply(S<F_self> &self, const S<F_other> &other, FunctionObject&& f) {
144+
__attribute__((flatten)) constexpr auto apply(S<F_self> &self, const S<F_other> &other, FunctionObject&& f) {
145145
return apply_binary(self, other, std::forward<FunctionObject&&>(f));
146146
}
147147

148148
template <class Self, class Other, class FunctionObject>
149149
requires requires { typename Self::Base; typename Other::Base; }
150-
constexpr auto apply(Self &self, Other &other, FunctionObject&& f) {
150+
__attribute__((flatten)) constexpr auto apply(Self &self, Other &other, FunctionObject&& f) {
151151
return apply_binary<typename Self::Base, typename Other::Base>(self, other, std::forward<FunctionObject&&>(f));
152152
}
153153

154154
template <class Self, class Other, class FunctionObject>
155155
requires requires { typename Self::Base; typename Other::Base; }
156-
constexpr auto apply(Self &self, const Other &other, FunctionObject&& f) {
157-
static_assert(count_members<typename Self::Base>() == 4);
156+
__attribute__((flatten)) constexpr auto apply(Self &self, const Other &other, FunctionObject&& f) {
158157
return apply_binary<typename Self::Base, const typename Other::Base>(self, other, std::forward<FunctionObject&&>(f));
159158
}
160159

0 commit comments

Comments
 (0)