Skip to content

Using cuda::std::inclusive_scan with std::vector fails to compile #8469

@oleksandr-pavlyk

Description

@oleksandr-pavlyk

Using

template <typename OffsetT>
std::tuple<std::vector<OffsetT>, std::vector<OffsetT>> 
make_in_out_offsets(const std::vector<OffsetT> sizes, OffsetT gap)
{
  std::vector<OffsetT> offsets;

  std::size_t segment_count = sizes.size();

  static constexpr OffsetT zero{0};

  offsets.resize(segment_count + 1);
  offsets[0] = zero;

  cuda::std::inclusive_scan(sizes.begin(), sizes.end(), offsets.begin() + 1);

  std::vector<OffsetT> sizes_with_gaps;
  sizes_with_gaps.resize(segment_count);
  for (std::size_t i = 0; i < segment_count; ++i)
  {
    const auto s       = sizes[i];
    sizes_with_gaps[i] = (s == 0) ? gap : s;
  }

  std::vector<OffsetT> offsets_with_gaps;
  offsets_with_gaps.resize(segment_count + 1);
  offsets_with_gaps[0] = zero;
  cuda::std::inclusive_scan(
    sizes_with_gaps.begin(),
    sizes_with_gaps.end(),
    offsets_with_gaps.begin() + 1);

  return {offsets, offsets_with_gaps};
}

I am getting the following errors with CTK 12.0:

Compilation errors
/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(51): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

Remark: The warnings can be suppressed with "-diag-suppress <warning-number>"

/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(52): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(52): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(53): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

4 errors detected in the compilation of "/home/coder/cccl/cub/test/catch2_test_device_segmented_scan_multi_segment.cu".
ninja: build stopped: subcommand failed.

At the same time, compiling the following snippet based on the above code with nvcc inclusive_scan.cu --std=c++17 -arch=sm_86 -I libcudacxx/include/ -I cub/ -I thrust/ works fine.

C++ snippet
// filename: inclusive_scan.cu
#include <cuda/std/numeric>

#include <iostream>
#include <tuple>
#include <vector>

template <typename OffsetT>
std::tuple<std::vector<OffsetT>, std::vector<OffsetT>> make_in_out_offsets(const std::vector<OffsetT> sizes, OffsetT gap)
{
  std::vector<OffsetT> offsets;

  std::size_t segment_count = sizes.size();

  static constexpr OffsetT zero{0};

  offsets.resize(segment_count + 1);
  offsets[0] = zero;

  // cuda::std::plus<> plus_t{};

  cuda::std::inclusive_scan(sizes.begin(), sizes.end(), offsets.begin() + 1);

  std::vector<OffsetT> sizes_with_gaps;
  sizes_with_gaps.resize(segment_count);
  for (std::size_t i = 0; i < segment_count; ++i)
  {
    const auto s       = sizes[i];
    sizes_with_gaps[i] = (s == 0) ? gap : s;
  }

  std::vector<OffsetT> offsets_with_gaps;
  offsets_with_gaps.resize(segment_count + 1);
  offsets_with_gaps[0] = zero;
  cuda::std::inclusive_scan(sizes_with_gaps.begin(), sizes_with_gaps.end(), offsets_with_gaps.begin() + 1);

  return {offsets, offsets_with_gaps};
}

int main()
{
  std::vector<unsigned int> sizes = {2, 4, 0, 5, 1, 0, 0, 1};

  auto [a, b] = make_in_out_offsets(sizes, 4u);

  for (auto el : b)
  {
    std::cout << el << " ";
  }

  std::cout << "\n";

  return 0;
}

Originally posted by @oleksandr-pavlyk in #6712 (comment)

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

Status

Done

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions