Skip to content

Commit cbeffda

Browse files
authored
Merge pull request #1490 from NVIDIA/assorted-goodies
Assorted goodies
2 parents 512a48a + 14ae102 commit cbeffda

98 files changed

Lines changed: 1249 additions & 933 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.clang-format

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,12 @@ KeepEmptyLinesAtTheStartOfBlocks: true
8383
LambdaBodyIndentation: Signature
8484
LineEnding: LF
8585
Macros: [
86-
'STDEXEC_MEMFN_DECL(X)=X',
87-
'STDEXEC_MEMFN_DECL(X,Y)=X,Y',
88-
'STDEXEC_MEMFN_DECL(X,Y,Z)=X,Y,Z',
86+
'STDEXEC_MEMFN_DECL(...)=__VA_ARGS__',
8987
'STDEXEC_ATTRIBUTE(X)=[[]]',
90-
'STDEXEC_NO_UNIQUE_ADDRESS=[[no_unique_address]]',
9188
'STDEXEC_IMMOVABLE_NO_UNIQUE_ADDRESS=[[no_unique_address]]',
9289
'STDEXEC_MISSING_MEMBER(X,Y)=true',
93-
'STDEXEC_DEFINE_MEMBER(X)=void foo() {}'
90+
'STDEXEC_DEFINE_MEMBER(X)=void foo() {}',
91+
'STDEXEC_AUTO_RETURN(...)=->decltype(auto){ return __VA_ARGS__; }',
9492
]
9593
MaxEmptyLinesToKeep: 2
9694
NamespaceIndentation: All

.clang-tidy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ HeaderFileExtensions:
77
- hh
88
- hpp
99
- hxx
10+
- cuh
1011
ImplementationFileExtensions:
1112
- c
1213
- cc

.clangd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ CompileFlags:
5151
- "-ferror-limit=0"
5252
- "-fmacro-backtrace-limit=0"
5353
- "-ftemplate-backtrace-limit=0"
54+
- "-std=gnu++20"
5455
Remove:
5556
- "-stdpar*"
5657
# strip CUDA fatbin args

examples/benchmark/static_thread_pool_bulk_enqueue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ struct RunThread {
4545
auto env = exec::make_env(stdexec::prop{stdexec::get_allocator, alloc});
4646
auto [start, end] = exec::_pool_::even_share(total_scheds, tid, pool.available_parallelism());
4747
auto iterate = exec::schedule_all(pool, std::views::iota(start, end))
48-
| exec::ignore_all_values() | exec::write(env);
48+
| exec::ignore_all_values() | exec::write_env(env);
4949
# else
5050
auto [start, end] = exec::_pool_::even_share(total_scheds, tid, pool.available_parallelism());
5151
auto iterate = exec::schedule_all(pool, std::views::iota(start, end))

examples/benchmark/static_thread_pool_bulk_enqueue_nested.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ struct RunThread {
4646
auto env = exec::make_env(stdexec::prop{stdexec::get_allocator, alloc});
4747
auto [start, end] = exec::_pool_::even_share(total_scheds, tid, pool.available_parallelism());
4848
auto iterate = exec::iterate(std::views::iota(start, end)) | exec::ignore_all_values()
49-
| exec::write(env);
49+
| exec::write_env(env);
5050
# else
5151
auto [start, end] = exec::_pool_::even_share(total_scheds, tid, pool.available_parallelism());
5252
auto iterate = exec::iterate(std::views::iota(start, end)) | exec::ignore_all_values();

examples/nvexec/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ function(def_cpu_example example)
9292
stdexec_executable_flags
9393
)
9494
set_source_files_properties(${source} PROPERTIES LANGUAGE ${_lang_cxx})
95+
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
96+
target_link_options(${target} PRIVATE -lc++abi -ltbb)
97+
endif()
9598
endfunction()
9699

97100
foreach(example ${nvexec_cpu_examples})

examples/nvexec/_clangd_helper_file.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@
2828
// than those in the parent (for example, if the subdirectory has a CMakeList.txt that defines additional executables).
2929
// This ensures clangd provides useful intellisense for headers in any subdirectory with a CMakeList.txt.
3030

31-
int main(void) {
31+
auto main() -> int {
3232
}

examples/nvexec/bulk.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
namespace ex = stdexec;
2323

24-
int main() {
24+
auto main() -> int {
2525
using nvexec::is_on_gpu;
2626

2727
nvexec::stream_context stream_ctx{};

examples/nvexec/launch.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include <algorithm>
1718
#include <nvexec/stream_context.cuh>
1819
#include <stdexec/execution.hpp>
1920

@@ -26,16 +27,18 @@ constexpr std::size_t N = 2 * 1024;
2627
constexpr std::size_t THREAD_BLOCK_SIZE = 128u;
2728
constexpr std::size_t NUM_BLOCKS = (N + THREAD_BLOCK_SIZE - 1) / THREAD_BLOCK_SIZE;
2829

29-
#define scaling 2
30+
enum {
31+
scaling = 2
32+
};
3033

31-
int bench() {
34+
auto bench() -> int {
3235
std::vector<int> input(N, 0);
3336
std::iota(input.begin(), input.end(), 1);
34-
std::transform(input.begin(), input.end(), input.begin(), [](int i) { return i * scaling; });
37+
std::ranges::transform(input, input.begin(), [](int i) { return i * scaling; });
3538
return std::accumulate(input.begin(), input.end(), 0);
3639
}
3740

38-
int main() {
41+
auto main() -> int {
3942
thrust::device_vector<int> input(N, 0);
4043
std::iota(input.begin(), input.end(), 1);
4144
int* first = thrust::raw_pointer_cast(input.data());
@@ -45,7 +48,7 @@ int main() {
4548

4649
auto snd = stdexec::transfer_just(stream.get_scheduler(), first, last)
4750
| nvexec::launch(
48-
{NUM_BLOCKS, THREAD_BLOCK_SIZE},
51+
{.grid_size = NUM_BLOCKS, .block_size = THREAD_BLOCK_SIZE},
4952
[](cudaStream_t stm, int* first, int* last) {
5053
assert(nvexec::is_on_gpu());
5154
int32_t idx = blockIdx.x * blockDim.x + threadIdx.x;

examples/nvexec/maxwell/common.cuh

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16+
17+
// clang-format Language: Cpp
18+
1619
#pragma once
1720

1821
#include "stdexec/__detail/__config.hpp"
@@ -25,9 +28,9 @@
2528
#include <string_view>
2629
#include <memory>
2730
#include <vector>
28-
#include <string.h>
31+
#include <cstring>
2932

30-
#include <math.h>
33+
#include <cmath>
3134

3235
#if defined(_NVHPC_CUDA) || defined(__CUDACC__)
3336
# define STDEXEC_STDERR
@@ -89,7 +92,7 @@ struct fields_accessor {
8992

9093
float *base_ptr;
9194

92-
STDEXEC_ATTRIBUTE((nodiscard, host, device)) float *get(field_id id) const {
95+
STDEXEC_ATTRIBUTE((nodiscard, host, device)) auto get(field_id id) const -> float * {
9396
return base_ptr + static_cast<int>(id) * cells;
9497
}
9598
};
@@ -116,8 +119,15 @@ struct grid_t {
116119
}
117120

118121
[[nodiscard]]
119-
fields_accessor accessor() const {
120-
return {height / n, width / n, width, height, n, cells, fields_.get()};
122+
auto accessor() const -> fields_accessor {
123+
return {
124+
.dx = height / n,
125+
.dy = width / n,
126+
.width = width,
127+
.height = height,
128+
.n = n,
129+
.cells = cells,
130+
.base_ptr = fields_.get()};
121131
}
122132
};
123133

@@ -129,7 +139,7 @@ STDEXEC_ATTRIBUTE((host, device)) inline bool
129139
return ((x - object_x) * (x - object_x) + (y - object_y) * (y - object_y) <= os2);
130140
}
131141

132-
inline float calculate_dt(float dx, float dy) {
142+
inline auto calculate_dt(float dx, float dy) -> float {
133143
const float cfl = 0.3;
134144
return cfl * std::min(dx, dy) / C0;
135145
}
@@ -177,8 +187,8 @@ struct grid_initializer_t {
177187
}
178188
};
179189

180-
inline grid_initializer_t grid_initializer(float dt, fields_accessor accessor) {
181-
return {dt, accessor};
190+
inline auto grid_initializer(float dt, fields_accessor accessor) -> grid_initializer_t {
191+
return {.dt = dt, .accessor = accessor};
182192
}
183193

184194
STDEXEC_ATTRIBUTE((host, device)) inline std::size_t right_nid(std::size_t cell_id, std::size_t col, std::size_t N) {
@@ -216,7 +226,7 @@ struct h_field_calculator_t {
216226
}
217227
};
218228

219-
inline h_field_calculator_t update_h(fields_accessor accessor) {
229+
inline auto update_h(fields_accessor accessor) -> h_field_calculator_t {
220230
return {accessor};
221231
}
222232

@@ -226,11 +236,11 @@ struct e_field_calculator_t {
226236
fields_accessor accessor;
227237
std::size_t source_position;
228238

229-
STDEXEC_ATTRIBUTE((nodiscard, host, device)) float gaussian_pulse(float t, float t_0, float tau) const {
239+
STDEXEC_ATTRIBUTE((nodiscard, host, device)) auto gaussian_pulse(float t, float t_0, float tau) const -> float {
230240
return exp(-(((t - t_0) / tau) * (t - t_0) / tau));
231241
}
232242

233-
STDEXEC_ATTRIBUTE((nodiscard, host, device)) float calculate_source(float t, float frequency) const {
243+
STDEXEC_ATTRIBUTE((nodiscard, host, device)) auto calculate_source(float t, float frequency) const -> float {
234244
const float tau = 0.5f / frequency;
235245
const float t_0 = 6.0f * tau;
236246
return gaussian_pulse(t, t_0, tau);
@@ -264,9 +274,9 @@ struct e_field_calculator_t {
264274
}
265275
};
266276

267-
inline e_field_calculator_t update_e(float *time, float dt, fields_accessor accessor) {
277+
inline auto update_e(float *time, float dt, fields_accessor accessor) -> e_field_calculator_t {
268278
std::size_t source_position = accessor.n / 2 + (accessor.n * (accessor.n / 2));
269-
return {dt, time, accessor, source_position};
279+
return {.dt = dt, .time = time, .accessor = accessor, .source_position = source_position};
270280
}
271281

272282
class result_dumper_t {
@@ -358,7 +368,7 @@ class result_dumper_t {
358368
}
359369
};
360370

361-
inline result_dumper_t dump_vtk(bool write_results, fields_accessor accessor) {
371+
inline auto dump_vtk(bool write_results, fields_accessor accessor) -> result_dumper_t {
362372
return {write_results, accessor};
363373
}
364374

@@ -371,12 +381,12 @@ class time_storage_t {
371381
}
372382

373383
[[nodiscard]]
374-
float *get() const {
384+
auto get() const -> float * {
375385
return time_.get();
376386
}
377387
};
378388

379-
std::string bin_name(int node_id) {
389+
auto bin_name(int node_id) -> std::string {
380390
return "out_" + std::to_string(node_id) + ".bin";
381391
}
382392

@@ -418,17 +428,17 @@ void report_performance(
418428
report_performance(cells, iterations, method, elapsed);
419429
}
420430

421-
bool contains(std::string_view str, char c) {
431+
auto contains(std::string_view str, char c) -> bool {
422432
return str.find(c) != std::string_view::npos;
423433
}
424434

425-
std::pair<std::string_view, std::string_view> split(std::string_view str, char by = '=') {
435+
auto split(std::string_view str, char by = '=') -> std::pair<std::string_view, std::string_view> {
426436
auto it = str.find(by);
427437
return std::make_pair(str.substr(0, it), str.substr(it + 1, str.size() - it - 1));
428438
}
429439

430440
[[nodiscard]]
431-
std::map<std::string_view, std::size_t> parse_cmd(int argc, char *argv[]) {
441+
auto parse_cmd(int argc, char *argv[]) -> std::map<std::string_view, std::size_t> {
432442
std::map<std::string_view, std::size_t> params;
433443
const std::vector<std::string_view> args(argv + 1, argv + argc);
434444

@@ -453,10 +463,10 @@ std::map<std::string_view, std::size_t> parse_cmd(int argc, char *argv[]) {
453463
}
454464

455465
[[nodiscard]]
456-
std::size_t value(
466+
auto value(
457467
const std::map<std::string_view, std::size_t> &params,
458468
std::string_view name,
459-
std::size_t default_value = 0) {
469+
std::size_t default_value = 0) -> std::size_t {
460470
if (params.count(name)) {
461471
return params.at(name);
462472
}

0 commit comments

Comments
 (0)