|
20 | 20 | #include <cuco/detail/pair/helpers.cuh> |
21 | 21 | #include <cuco/detail/utility/strong_type.cuh> |
22 | 22 |
|
| 23 | +#include <cuda/functional> |
| 24 | +#include <cuda/std/span> |
| 25 | +#include <thrust/device_vector.h> |
23 | 26 | #include <thrust/execution_policy.h> |
24 | 27 | #include <thrust/iterator/counting_iterator.h> |
25 | 28 | #include <thrust/iterator/iterator_traits.h> |
26 | 29 | #include <thrust/random.h> |
| 30 | +#include <thrust/reduce.h> |
| 31 | +#include <thrust/scan.h> |
27 | 32 | #include <thrust/sequence.h> |
28 | 33 | #include <thrust/shuffle.h> |
29 | 34 | #include <thrust/system/detail/generic/select_system.h> |
|
34 | 39 |
|
35 | 40 | #include <cstdint> |
36 | 41 | #include <iterator> |
| 42 | +#include <tuple> |
37 | 43 | #include <type_traits> |
38 | 44 |
|
39 | 45 | namespace cuco::utility { |
@@ -414,4 +420,90 @@ class key_generator { |
414 | 420 | RNG rng_; ///< Random number generator |
415 | 421 | }; |
416 | 422 |
|
| 423 | +/** |
| 424 | + * @brief Generates sequences of random byte strings with random lengths |
| 425 | + * |
| 426 | + * @tparam RNG Pseudo-random number generator |
| 427 | + * |
| 428 | + * @throws If the minimum/maximum sequence lengths are implausible |
| 429 | + * |
| 430 | + * @param n_sequences Number of byte sequences to generate |
| 431 | + * @param min_sequence_length Minimum sequence length |
| 432 | + * @param max_sequence_length Maximum sequence length |
| 433 | + * @param seed Random seed |
| 434 | + * @param stream CUDA stream in which this operation is executed in |
| 435 | + * |
| 436 | + * @return A pair consisting of (1) a vector of spans pointing to each sequence and (2) a byte |
| 437 | + * vector holding the actual data |
| 438 | + */ |
| 439 | +template <typename RNG = thrust::default_random_engine> |
| 440 | +std::pair<thrust::device_vector<cuda::std::span<std::byte>>, thrust::device_vector<std::byte>> |
| 441 | +generate_random_byte_sequences(std::size_t n_sequences, |
| 442 | + std::size_t min_sequence_length, |
| 443 | + std::size_t max_sequence_length, |
| 444 | + std::size_t seed = 0, |
| 445 | + cudaStream_t stream = 0) |
| 446 | +{ |
| 447 | + CUCO_EXPECTS(max_sequence_length > 0, "Maximum sequence lengths cannot be 0"); |
| 448 | + CUCO_EXPECTS(min_sequence_length > 0, "Minimum sequence lengths cannot be 0"); |
| 449 | + CUCO_EXPECTS(min_sequence_length <= max_sequence_length, |
| 450 | + "Maximum sequence lengths cannot be smaller than minimum sequence length"); |
| 451 | + |
| 452 | + auto const exec_pol = thrust::cuda::par.on(stream); |
| 453 | + // holds the (random) length of each sequence |
| 454 | + thrust::device_vector<std::size_t> lengths(n_sequences); |
| 455 | + |
| 456 | + // generate random lengths |
| 457 | + thrust::transform(exec_pol, |
| 458 | + thrust::counting_iterator<std::size_t>(0), |
| 459 | + thrust::counting_iterator<std::size_t>(lengths.size()), |
| 460 | + lengths.begin(), |
| 461 | + cuda::proclaim_return_type<std::size_t>( |
| 462 | + [min_sequence_length, max_sequence_length, seed] __device__(std::size_t idx) { |
| 463 | + RNG rng; |
| 464 | + thrust::uniform_int_distribution<std::size_t> offset_distribution{ |
| 465 | + min_sequence_length, max_sequence_length}; |
| 466 | + rng.seed(seed + idx); |
| 467 | + return offset_distribution(rng); |
| 468 | + })); |
| 469 | + |
| 470 | + // holds the pointer offset for each sequence |
| 471 | + thrust::device_vector<std::size_t> offsets(n_sequences); |
| 472 | + // use prefix sum to get the start offset for each sequence |
| 473 | + thrust::exclusive_scan(exec_pol, lengths.begin(), lengths.end(), offsets.begin()); |
| 474 | + |
| 475 | + // the total number of bytes required to store the sequences |
| 476 | + auto const n_bytes = thrust::reduce(exec_pol, lengths.begin(), lengths.end()); |
| 477 | + // the byte vector holding the actual sequence data |
| 478 | + thrust::device_vector<std::byte> bytes(n_bytes); |
| 479 | + |
| 480 | + auto offsets_and_lengths = |
| 481 | + thrust::make_zip_iterator(thrust::make_tuple(offsets.begin(), lengths.begin())); |
| 482 | + thrust::device_vector<cuda::std::span<std::byte>> sequences(n_sequences); |
| 483 | + // create the span object for each sequence |
| 484 | + thrust::transform( |
| 485 | + exec_pol, |
| 486 | + offsets_and_lengths, |
| 487 | + offsets_and_lengths + n_sequences, |
| 488 | + sequences.begin(), |
| 489 | + cuda::proclaim_return_type<cuda::std::span<std::byte>>( |
| 490 | + [bytes_ptr = thrust::raw_pointer_cast(bytes.data())] __device__(auto const& seq) { |
| 491 | + return cuda::std::span<std::byte>{bytes_ptr + thrust::get<0>(seq), thrust::get<1>(seq)}; |
| 492 | + })); |
| 493 | + |
| 494 | + // fill the byte buffer with random data |
| 495 | + thrust::transform(exec_pol, |
| 496 | + thrust::counting_iterator<std::size_t>(0), |
| 497 | + thrust::counting_iterator<std::size_t>(bytes.size()), |
| 498 | + bytes.begin(), |
| 499 | + cuda::proclaim_return_type<std::byte>([seed] __device__(std::size_t idx) { |
| 500 | + RNG rng; |
| 501 | + thrust::uniform_int_distribution<int> byte_distribution{0, 255}; |
| 502 | + rng.seed(seed + idx); |
| 503 | + return static_cast<std::byte>(byte_distribution(rng)); |
| 504 | + })); |
| 505 | + |
| 506 | + return {std::move(sequences), std::move(bytes)}; |
| 507 | +} |
| 508 | + |
417 | 509 | } // namespace cuco::utility |
0 commit comments