Skip to content

Commit 73cce9e

Browse files
committed
Merge commit '6683303a1981545a0a95b6855eb31354897a016c' into HEAD
2 parents 90fd79c + 6683303 commit 73cce9e

25 files changed

Lines changed: 498 additions & 258 deletions

Jenkinsfile

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -160,57 +160,40 @@ pipeline {
160160
}
161161
}
162162
}
163-
stage('Headers checks') {
163+
stage('Headers check') {
164164
when {
165165
expression {
166166
!skipRemainingStages
167167
}
168168
}
169-
parallel {
170-
stage('Headers check') {
171-
agent any
172-
steps {
173-
deleteDir()
174-
unstash 'MathSetup'
175-
sh "echo CXX=${env.CXX} -Werror > make/local"
176-
sh "make -j${env.PARALLEL} test-headers"
177-
}
178-
post { always { deleteDir() } }
179-
}
180-
stage('Headers check with OpenCL') {
181-
agent { label "gpu" }
182-
steps {
183-
deleteDir()
184-
unstash 'MathSetup'
185-
sh "echo CXX=${env.CXX} -Werror > make/local"
186-
sh "echo STAN_OPENCL=true>> make/local"
187-
sh "echo OPENCL_PLATFORM_ID=0>> make/local"
188-
sh "echo OPENCL_DEVICE_ID=${OPENCL_DEVICE_ID}>> make/local"
189-
sh "make -j${env.PARALLEL} test-headers"
190-
}
191-
post { always { deleteDir() } }
192-
}
193-
}
169+
agent any
170+
steps {
171+
deleteDir()
172+
unstash 'MathSetup'
173+
sh "echo CXX=${env.CXX} -Werror > make/local"
174+
sh "make -j${env.PARALLEL} test-headers"
175+
}
176+
post { always { deleteDir() } }
177+
}
178+
stage('Linux Unit with MPI') {
179+
agent { label 'linux && mpi' }
180+
steps {
181+
deleteDir()
182+
unstash 'MathSetup'
183+
sh "echo CXX=${MPICXX} >> make/local"
184+
sh "echo CXX_TYPE=gcc >> make/local"
185+
sh "echo STAN_MPI=true >> make/local"
186+
runTests("test/unit")
187+
}
188+
post { always { retry(3) { deleteDir() } } }
194189
}
195-
stage('Always-run tests part 1') {
190+
stage('Always-run tests') {
196191
when {
197192
expression {
198193
!skipRemainingStages
199194
}
200195
}
201196
parallel {
202-
stage('Linux Unit with MPI') {
203-
agent { label 'linux && mpi' }
204-
steps {
205-
deleteDir()
206-
unstash 'MathSetup'
207-
sh "echo CXX=${MPICXX} >> make/local"
208-
sh "echo CXX_TYPE=gcc >> make/local"
209-
sh "echo STAN_MPI=true >> make/local"
210-
runTests("test/unit")
211-
}
212-
post { always { retry(3) { deleteDir() } } }
213-
}
214197
stage('Full unit with GPU') {
215198
agent { label "gpu" }
216199
steps {
@@ -220,19 +203,11 @@ pipeline {
220203
sh "echo STAN_OPENCL=true>> make/local"
221204
sh "echo OPENCL_PLATFORM_ID=0>> make/local"
222205
sh "echo OPENCL_DEVICE_ID=${OPENCL_DEVICE_ID}>> make/local"
206+
sh "make -j${env.PARALLEL} test-headers"
223207
runTests("test/unit")
224208
}
225209
post { always { retry(3) { deleteDir() } } }
226210
}
227-
}
228-
}
229-
stage('Always-run tests part 2') {
230-
when {
231-
expression {
232-
!skipRemainingStages
233-
}
234-
}
235-
parallel {
236211
stage('Distribution tests') {
237212
agent { label "distribution-tests" }
238213
steps {

stan/math/opencl/cholesky_decompose.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include <stan/math/opencl/sub_block.hpp>
1111
#include <stan/math/opencl/kernels/cholesky_decompose.hpp>
1212
#include <stan/math/opencl/kernel_generator.hpp>
13-
#include <stan/math/opencl/prim/transpose.hpp>
1413
#include <stan/math/prim/meta.hpp>
1514
#include <CL/cl2.hpp>
1615
#include <algorithm>

stan/math/opencl/kernel_generator.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <stan/math/opencl/kernel_generator/select.hpp>
1818
#include <stan/math/opencl/kernel_generator/rowwise_reduction.hpp>
1919
#include <stan/math/opencl/kernel_generator/colwise_reduction.hpp>
20+
#include <stan/math/opencl/kernel_generator/transpose.hpp>
2021

2122
#include <stan/math/opencl/kernel_generator/multi_result_kernel.hpp>
2223
#include <stan/math/opencl/kernel_generator/get_kernel_source_for_evaluating_into.hpp>

stan/math/opencl/kernel_generator/binary_operation.hpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ class binary_operation : public operation_cl<Derived, T_res, T_a, T_b> {
8181
* @return view
8282
*/
8383
inline matrix_cl_view view() const {
84-
return either(std::get<0>(arguments_).view(),
85-
std::get<1>(arguments_).view());
84+
return either(this->template get_arg<0>().view(),
85+
this->template get_arg<1>().view());
8686
}
8787
};
8888

@@ -116,9 +116,9 @@ class binary_operation : public operation_cl<Derived, T_res, T_a, T_b> {
116116
public: \
117117
class_name(T_a&& a, T_b&& b) /* NOLINT */ \
118118
: base(std::forward<T_a>(a), std::forward<T_b>(b), operation) {} \
119-
inline auto deep_copy() { \
120-
auto&& a_copy = std::get<0>(arguments_).deep_copy(); \
121-
auto&& b_copy = std::get<1>(arguments_).deep_copy(); \
119+
inline auto deep_copy() const { \
120+
auto&& a_copy = this->template get_arg<0>().deep_copy(); \
121+
auto&& b_copy = this->template get_arg<1>().deep_copy(); \
122122
return class_name<std::remove_reference_t<decltype(a_copy)>, \
123123
std::remove_reference_t<decltype(b_copy)>>( \
124124
std::move(a_copy), std::move(b_copy)); \
@@ -163,9 +163,9 @@ class binary_operation : public operation_cl<Derived, T_res, T_a, T_b> {
163163
public: \
164164
class_name(T_a&& a, T_b&& b) /* NOLINT */ \
165165
: base(std::forward<T_a>(a), std::forward<T_b>(b), operation) {} \
166-
inline auto deep_copy() { \
167-
auto&& a_copy = std::get<0>(arguments_).deep_copy(); \
168-
auto&& b_copy = std::get<1>(arguments_).deep_copy(); \
166+
inline auto deep_copy() const { \
167+
auto&& a_copy = this->template get_arg<0>().deep_copy(); \
168+
auto&& b_copy = this->template get_arg<1>().deep_copy(); \
169169
return class_name<std::remove_reference_t<decltype(a_copy)>, \
170170
std::remove_reference_t<decltype(b_copy)>>( \
171171
std::move(a_copy), std::move(b_copy)); \
@@ -189,14 +189,14 @@ ADD_BINARY_OPERATION_WITH_CUSTOM_VIEW(
189189
common_scalar_t<T_a COMMA T_b>, "*",
190190
using base = binary_operation<elewise_multiplication_<T_a, T_b>,
191191
common_scalar_t<T_a, T_b>, T_a, T_b>;
192-
return both(std::get<0>(base::arguments_).view(),
193-
std::get<1>(base::arguments_).view()););
192+
return both(this->template get_arg<0>().view(),
193+
this->template get_arg<1>().view()););
194194
ADD_BINARY_OPERATION_WITH_CUSTOM_VIEW(
195195
elewise_division_, elewise_division, common_scalar_t<T_a COMMA T_b>, "/",
196196
using base = binary_operation<elewise_division_<T_a, T_b>,
197197
common_scalar_t<T_a, T_b>, T_a, T_b>;
198-
return either(std::get<0>(base::arguments_).view(),
199-
invert(std::get<1>(base::arguments_).view())););
198+
return either(this->template get_arg<0>().view(),
199+
invert(this->template get_arg<1>().view())););
200200
ADD_BINARY_OPERATION(less_than_, operator<, bool, "<");
201201
ADD_BINARY_OPERATION_WITH_CUSTOM_VIEW(less_than_or_equal_, operator<=, bool,
202202
"<=", return matrix_cl_view::Entire);

stan/math/opencl/kernel_generator/block.hpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ class block_
3434

3535
protected:
3636
int start_row_, start_col_, rows_, cols_;
37-
using base::arguments_;
3837

3938
public:
4039
/**
@@ -61,8 +60,8 @@ class block_
6160
* Creates a deep copy of this expression.
6261
* @return copy of \c *this
6362
*/
64-
inline auto deep_copy() {
65-
auto&& arg_copy = std::get<0>(arguments_).deep_copy();
63+
inline auto deep_copy() const {
64+
auto&& arg_copy = this->template get_arg<0>().deep_copy();
6665
return block_<std::remove_reference_t<decltype(arg_copy)>>{
6766
std::move(arg_copy), start_row_, start_col_, rows_, cols_};
6867
}
@@ -122,7 +121,7 @@ class block_
122121
cl::Kernel& kernel, int& arg_num) const {
123122
if (generated.count(this) == 0) {
124123
generated.insert(this);
125-
std::get<0>(arguments_).set_args(generated, kernel, arg_num);
124+
this->template get_arg<0>().set_args(generated, kernel, arg_num);
126125
kernel.setArg(arg_num++, start_row_);
127126
kernel.setArg(arg_num++, start_col_);
128127
}
@@ -175,9 +174,9 @@ class block_
175174
inline void set_view(int bottom_diagonal, int top_diagonal,
176175
int bottom_zero_diagonal, int top_zero_diagonal) const {
177176
int change = start_col_ - start_row_;
178-
std::get<0>(arguments_)
179-
.set_view(bottom_diagonal + change, top_diagonal + change,
180-
bottom_zero_diagonal + change, top_zero_diagonal + change);
177+
this->template get_arg<0>().set_view(
178+
bottom_diagonal + change, top_diagonal + change,
179+
bottom_zero_diagonal + change, top_zero_diagonal + change);
181180
}
182181

183182
/**
@@ -186,7 +185,7 @@ class block_
186185
*/
187186
inline int bottom_diagonal() const {
188187
return std::max(
189-
std::get<0>(arguments_).bottom_diagonal() - start_col_ + start_row_,
188+
this->template get_arg<0>().bottom_diagonal() - start_col_ + start_row_,
190189
1 - rows_);
191190
}
192191

@@ -196,7 +195,7 @@ class block_
196195
*/
197196
inline int top_diagonal() const {
198197
return std::min(
199-
std::get<0>(arguments_).top_diagonal() - start_col_ + start_row_,
198+
this->template get_arg<0>().top_diagonal() - start_col_ + start_row_,
200199
cols_ - 1);
201200
}
202201

@@ -234,6 +233,14 @@ class block_
234233

235234
/**
236235
* Block of a kernel generator expression.
236+
*
237+
* Block operation modifies how its argument is indexed. If a matrix is both an
238+
* argument and result of such an operation (such as in <code> block(a, row1,
239+
* col1, rows, cols) = block(a, row2, col2, rows, cols);
240+
* </code>), the result can be wrong due to aliasing. In such case the
241+
* expression should be evaluating in a temporary by doing <code> block(a, row1,
242+
* col1, rows, cols) = block(a, row2, col2, rows, cols).eval();</code>. This is
243+
* not necessary if the bolcks do not overlap or if they are the same block.
237244
* @tparam T type of argument
238245
* @param a input argument
239246
* @param start_row first row of block

stan/math/opencl/kernel_generator/calc_if.hpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@ class calc_if_
3131
using base = operation_cl<calc_if_<Do_Calculate, T>, Scalar, T>;
3232
using base::var_name;
3333

34-
protected:
35-
using base::arguments_;
36-
37-
public:
3834
/**
3935
* Constructor
4036
* @param a expression to calc_if
@@ -66,8 +62,8 @@ class calc_if_
6662
const std::string& i, const std::string& j,
6763
const T_result& result) const {
6864
if (Do_Calculate) {
69-
return std::get<0>(arguments_)
70-
.get_whole_kernel_parts(generated, ng, i, j, result);
65+
return this->template get_arg<0>().get_whole_kernel_parts(generated, ng,
66+
i, j, result);
7167
} else {
7268
return {};
7369
}
@@ -84,15 +80,17 @@ class calc_if_
8480
inline void set_args(std::set<const operation_cl_base*>& generated,
8581
cl::Kernel& kernel, int& arg_num) const {
8682
if (Do_Calculate) {
87-
std::get<0>(arguments_).set_args(generated, kernel, arg_num);
83+
this->template get_arg<0>().set_args(generated, kernel, arg_num);
8884
}
8985
}
9086

9187
/**
9288
* View of a matrix that would be the result of evaluating this expression.
9389
* @return view
9490
*/
95-
inline matrix_cl_view view() const { return std::get<0>(arguments_).view(); }
91+
inline matrix_cl_view view() const {
92+
return this->template get_arg<0>().view();
93+
}
9694
};
9795

9896
template <bool Do_Calculate, typename T,

stan/math/opencl/kernel_generator/colwise_reduction.hpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ class colwise_reduction
4444

4545
protected:
4646
std::string init_;
47-
using base::arguments_;
4847
using base::derived;
4948

5049
public:
@@ -121,15 +120,15 @@ class colwise_reduction
121120
inline int rows() const {
122121
int local_rows = opencl_context.base_opts().at("LOCAL_SIZE_");
123122
int wgs_rows
124-
= (std::get<0>(arguments_).rows() + local_rows - 1) / local_rows;
123+
= (this->template get_arg<0>().rows() + local_rows - 1) / local_rows;
125124
return wgs_rows;
126125
}
127126

128127
/**
129128
* Number of rows threads need to be launched for.
130129
* @return number of rows
131130
*/
132-
inline int thread_rows() const { return std::get<0>(arguments_).rows(); }
131+
inline int thread_rows() const { return this->template get_arg<0>().rows(); }
133132

134133
/**
135134
* View of a matrix that would be the result of evaluating this expression.
@@ -161,8 +160,8 @@ class colwise_sum_ : public colwise_reduction<colwise_sum_<T>, T, sum_op> {
161160
* Creates a deep copy of this expression.
162161
* @return copy of \c *this
163162
*/
164-
inline auto deep_copy() {
165-
auto&& arg_copy = std::get<0>(arguments_).deep_copy();
163+
inline auto deep_copy() const {
164+
auto&& arg_copy = this->template get_arg<0>().deep_copy();
166165
return colwise_sum_<std::remove_reference_t<decltype(arg_copy)>>(
167166
std::move(arg_copy));
168167
}
@@ -209,8 +208,8 @@ class colwise_max_ : public colwise_reduction<
209208
* Creates a deep copy of this expression.
210209
* @return copy of \c *this
211210
*/
212-
inline auto deep_copy() {
213-
auto&& arg_copy = std::get<0>(arguments_).deep_copy();
211+
inline auto deep_copy() const {
212+
auto&& arg_copy = this->template get_arg<0>().deep_copy();
214213
return colwise_max_<std::remove_reference_t<decltype(arg_copy)>>(
215214
std::move(arg_copy));
216215
}
@@ -257,8 +256,8 @@ class colwise_min_ : public colwise_reduction<
257256
* Creates a deep copy of this expression.
258257
* @return copy of \c *this
259258
*/
260-
inline auto deep_copy() {
261-
auto&& arg_copy = std::get<0>(arguments_).deep_copy();
259+
inline auto deep_copy() const {
260+
auto&& arg_copy = this->template get_arg<0>().deep_copy();
262261
return colwise_min_<std::remove_reference_t<decltype(arg_copy)>>(
263262
std::move(arg_copy));
264263
}

stan/math/opencl/kernel_generator/load.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ class load_
4848
* Creates a deep copy of this expression.
4949
* @return copy of \c *this
5050
*/
51-
inline load_<T&> deep_copy() { return load_<T&>(a_); }
51+
inline load_<T&> deep_copy() const & { return load_<T&>(a_); }
52+
inline load_<T> deep_copy() && { return load_<T>(std::forward<T>(a_)); }
5253

5354
/**
5455
* generates kernel code for this expression.

stan/math/opencl/kernel_generator/multi_result_kernel.hpp

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define STAN_MATH_OPENCL_KERNEL_GENERATOR_MULTI_RESULT_KERNEL_HPP
33
#ifdef STAN_OPENCL
44

5+
#include <stan/math/opencl/kernel_generator/wrapper.hpp>
56
#include <stan/math/opencl/kernel_generator/is_valid_expression.hpp>
67
#include <stan/math/opencl/kernel_generator/name_generator.hpp>
78
#include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>
@@ -18,21 +19,6 @@ namespace math {
1819

1920
namespace internal {
2021

21-
/**
22-
* A wrapper for references. This is used to wrap references when putting them
23-
* in tuples.
24-
*/
25-
template <typename T>
26-
struct wrapper {
27-
T x;
28-
explicit wrapper(T&& x) : x(std::forward<T>(x)) {}
29-
};
30-
31-
template <typename T>
32-
wrapper<T> make_wrapper(T&& x) {
33-
return wrapper<T>(std::forward<T>(x));
34-
}
35-
3622
// Template parameter pack can only be at the end of the template list in
3723
// structs. We need 2 packs for expressions and results, so we nest structs.
3824
template <int n, typename... T_results>

0 commit comments

Comments
 (0)