diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index abd96b68426..6efc2e3d364 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -58,6 +58,15 @@ option(USE_RAFT_STATIC "Build raft as a static library" OFF) option(CUGRAPH_COMPILE_RAFT_LIB "Compile the raft library instead of using it header-only" ON) option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF) +option(BUILD_CUGRAPH_COMPONENTS_ALGORITHMS "Enable components algorithms" ON) +option(BUILD_CUGRAPH_SAMPLING_ALGORITHMS "Enable sampling algorithms" ON) +option(BUILD_CUGRAPH_CENTRALITY_ALGORITHMS "Enable centrality algorithms" ON) +option(BUILD_CUGRAPH_COMMUNITY_ALGORITHMS "Enable community algorithms" ON) +option(BUILD_CUGRAPH_TRAVERSAL_ALGORITHMS "Enable traversal algorithms" ON) +option(BUILD_CUGRAPH_TREE_ALGORITHMS "Enable tree algorithms" ON) +option(BUILD_CUGRAPH_LINK_ANALYSIS_ALGORITHMS "Enable link analysis algorithms" ON) +option(BUILD_CUGRAPH_LINK_PREDICTION_ALGORITHMS "Enable link prediction algorithms" ON) + message(VERBOSE "CUGRAPH: CUDA_STATIC_RUNTIME=${CUDA_STATIC_RUNTIME}") ################################################################################ @@ -494,6 +503,7 @@ target_link_libraries(cugraph rmm::rmm raft::raft $ + cuda PRIVATE ${COMPILED_RAFT_LIB} cuco::cuco diff --git a/cpp/cmake/thirdparty/cccl_override.json b/cpp/cmake/thirdparty/cccl_override.json new file mode 100644 index 00000000000..0226e08a7bb --- /dev/null +++ b/cpp/cmake/thirdparty/cccl_override.json @@ -0,0 +1,9 @@ +{ + "packages": { + "cccl": { + "version": "2.8.0", + "git_url": "https://github.com/NVIDIA/cccl.git", + "git_tag": "main" + } + } +} diff --git a/cpp/cmake/thirdparty/get_cccl.cmake b/cpp/cmake/thirdparty/get_cccl.cmake index 72b53d4c833..1ee8c351968 100644 --- a/cpp/cmake/thirdparty/get_cccl.cmake +++ b/cpp/cmake/thirdparty/get_cccl.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -15,6 +15,13 @@ # This function finds CCCL and sets any additional necessary environment variables. function(find_and_configure_cccl) include(${rapids-cmake-dir}/cpm/cccl.cmake) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + + rapids_cpm_package_override("${CMAKE_CURRENT_FUNCTION_LIST_DIR}/cccl_override.json") + + # Enable cudax namespace install + set(CCCL_ENABLE_UNSTABLE ON) + rapids_cpm_cccl(BUILD_EXPORT_SET cugraph-exports INSTALL_EXPORT_SET cugraph-exports) endfunction() diff --git a/cpp/src/prims/detail/extract_transform_if_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_if_v_frontier_e.cuh index 42c93a05c2e..aa0157e4a57 100644 --- a/cpp/src/prims/detail/extract_transform_if_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_if_v_frontier_e.cuh @@ -59,6 +59,11 @@ #include #include +#include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { namespace detail { @@ -761,6 +766,9 @@ extract_transform_if_v_frontier_e(raft::handle_t const& handle, constexpr bool try_bitmap = GraphViewType::is_multi_gpu && std::is_same_v && KeyBucketType::is_sorted_unique; + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); + if (do_expensive_check) { auto frontier_vertex_first = thrust_tuple_get_or_identity(frontier.begin()); @@ -1658,6 +1666,8 @@ extract_transform_if_v_frontier_e(raft::handle_t const& handle, if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } } + cudastf_ctx.finalize(); + return std::make_tuple(std::move(key_buffer), std::move(value_buffer)); } diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh index 924b6e89608..cdb68a61a2c 100644 --- a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh +++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh @@ -64,6 +64,11 @@ #include #include +#include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { namespace detail { @@ -1164,6 +1169,15 @@ void per_v_transform_reduce_e_edge_partition( std::optional> key_segment_offsets, std::optional> const& edge_partition_stream_pool_indices) { + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); + + token output_tokens[4]; + for (size_t i = 0; i < 4; i++) + { + output_tokens[i] = cudastf_ctx.token(); + } + constexpr bool use_input_key = !std::is_same_v; using vertex_t = typename GraphViewType::vertex_type; @@ -1187,10 +1201,13 @@ void per_v_transform_reduce_e_edge_partition( if constexpr (update_major && !use_input_key) { // this is necessary as we don't visit // every vertex in the hypersparse segment - thrust::fill(rmm::exec_policy_nosync(exec_stream), - output_buffer + (*key_segment_offsets)[3], - output_buffer + (*key_segment_offsets)[4], - major_init); + // TODO task write output_token[3] + cudastf_ctx.task(output_tokens[3].write())->*[=](cudaStream_t stream) { + thrust::fill(rmm::exec_policy_nosync(stream), + output_buffer + (*key_segment_offsets)[3], + output_buffer + (*key_segment_offsets)[4], + major_init); + }; } auto segment_size = use_input_key @@ -1200,8 +1217,9 @@ void per_v_transform_reduce_e_edge_partition( raft::grid_1d_thread_t update_grid(segment_size, detail::per_v_transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + size_t token_idx = 0; auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[3]; } + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[3]; token_idx +=3; } auto segment_key_first = edge_partition_key_first; auto segment_key_last = edge_partition_key_last; if constexpr (use_input_key) { @@ -1212,20 +1230,22 @@ void per_v_transform_reduce_e_edge_partition( assert(segment_key_first == nullptr); assert(segment_key_last == nullptr); } - detail::per_v_transform_reduce_e_hypersparse - <<>>( - edge_partition, - segment_key_first, - segment_key_last, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - segment_output_buffer, - e_op, - major_init, - reduce_op, - pred_op); + cudastf_ctx.task(output_tokens[token_idx].rw())->*[=](cudaStream_t stream) { + detail::per_v_transform_reduce_e_hypersparse + <<>>( + edge_partition, + segment_key_first, + segment_key_last, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + segment_output_buffer, + e_op, + major_init, + reduce_op, + pred_op); + }; } } if ((*key_segment_offsets)[3] - (*key_segment_offsets)[2]) { @@ -1236,8 +1256,9 @@ void per_v_transform_reduce_e_edge_partition( raft::grid_1d_thread_t update_grid((*key_segment_offsets)[3] - (*key_segment_offsets)[2], detail::per_v_transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + size_t token_idx = 0; auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[2]; } + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[2]; token_idx += 2; } std::optional segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor // is a deleted function, segment_key_first should always have a value @@ -1247,8 +1268,10 @@ void per_v_transform_reduce_e_edge_partition( segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); } *segment_key_first += (*key_segment_offsets)[2]; + + cudastf_ctx.task(output_tokens[token_idx].rw())->*[=](cudaStream_t stream) { detail::per_v_transform_reduce_e_low_degree - <<>>( + <<>>( edge_partition, *segment_key_first, *segment_key_first + ((*key_segment_offsets)[3] - (*key_segment_offsets)[2]), @@ -1261,6 +1284,7 @@ void per_v_transform_reduce_e_edge_partition( major_init, reduce_op, pred_op); + }; } if ((*key_segment_offsets)[2] - (*key_segment_offsets)[1] > 0) { auto exec_stream = edge_partition_stream_pool_indices @@ -1270,8 +1294,9 @@ void per_v_transform_reduce_e_edge_partition( raft::grid_1d_warp_t update_grid((*key_segment_offsets)[2] - (*key_segment_offsets)[1], detail::per_v_transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + size_t token_idx = 0; auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[1]; } + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[1]; token_idx += 1;} std::optional segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor // is a deleted function, segment_key_first should always have a value @@ -1281,8 +1306,10 @@ void per_v_transform_reduce_e_edge_partition( segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); } *segment_key_first += (*key_segment_offsets)[1]; + + cudastf_ctx.task(output_tokens[token_idx].rw())->*[=](cudaStream_t stream) { detail::per_v_transform_reduce_e_mid_degree - <<>>( + <<>>( edge_partition, *segment_key_first, *segment_key_first + ((*key_segment_offsets)[2] - (*key_segment_offsets)[1]), @@ -1296,6 +1323,7 @@ void per_v_transform_reduce_e_edge_partition( major_identity_element, reduce_op, pred_op); + }; } if ((*key_segment_offsets)[1] > 0) { auto exec_stream = edge_partition_stream_pool_indices @@ -1316,8 +1344,9 @@ void per_v_transform_reduce_e_edge_partition( } else { segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); } + cudastf_ctx.task(output_tokens[0].rw())->*[=](cudaStream_t stream) { detail::per_v_transform_reduce_e_high_degree - <<>>( + <<>>( edge_partition, *segment_key_first, *segment_key_first + (*key_segment_offsets)[1], @@ -1331,6 +1360,7 @@ void per_v_transform_reduce_e_edge_partition( major_identity_element, reduce_op, pred_op); + }; } } else { auto exec_stream = edge_partition_stream_pool_indices @@ -1374,6 +1404,8 @@ void per_v_transform_reduce_e_edge_partition( pred_op); } } + + cudastf_ctx.finalize(); } template +#include +#include + +using namespace cuda::experimental::stf; + + namespace cugraph { namespace detail { @@ -409,6 +415,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, typename EdgeValueInputWrapper::value_iterator, typename EdgeValueInputWrapper::value_type>>; + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); + auto edge_mask_view = graph_view.edge_mask_view(); // 1. update aggregate_local_frontier_local_degree_offsets @@ -504,10 +513,15 @@ auto transform_v_frontier_e(raft::handle_t const& handle, } auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + // CUDASTF logical data buffer for transform reduce phase + std::vector l_tv_buffers(5); + for (size_t segment_i = 0; segment_i < 5; segment_i++) { + l_tv_buffers[segment_i] = cudastf_ctx.token(); + } + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); if (segment_offsets) { - auto [edge_partition_key_indices, edge_partition_v_frontier_partition_offsets] = - partition_v_frontier( + auto res_partition_v_frontier = partition_v_frontier( handle, edge_partition_frontier_major_first, edge_partition_frontier_major_first + @@ -516,6 +530,10 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition.major_range_first() + (*segment_offsets)[2], edge_partition.major_range_first() + (*segment_offsets)[3]}); + // We cannot capture structured binding before C++20 so we create these variables manually + auto& edge_partition_key_indices = ::std::get<0>(res_partition_v_frontier); + auto& edge_partition_v_frontier_partition_offsets = ::std::get<1>(res_partition_v_frontier); + // FIXME: we may further improve performance by 1) concurrently running kernels on different // segments; 2) individually tuning block sizes for different segments; and 3) adding one // more segment for very high degree vertices and running segmented reduction @@ -525,8 +543,11 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_block_t update_grid(high_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[0].write())->*[&](cudaStream_t stream) { + + detail::transform_v_frontier_e_high_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[0], @@ -538,6 +559,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } auto mid_size = edge_partition_v_frontier_partition_offsets[2] - edge_partition_v_frontier_partition_offsets[1]; @@ -545,8 +567,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_warp_t update_grid(mid_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[1].write())->*[&](cudaStream_t stream) { detail::transform_v_frontier_e_mid_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[1], @@ -558,6 +581,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } auto low_size = edge_partition_v_frontier_partition_offsets[3] - edge_partition_v_frontier_partition_offsets[2]; @@ -565,8 +589,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(low_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[2].write())->*[&](cudaStream_t stream) { detail::transform_v_frontier_e_hypersparse_or_low_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[2], @@ -578,6 +603,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } auto hypersparse_size = edge_partition_v_frontier_partition_offsets[4] - edge_partition_v_frontier_partition_offsets[3]; @@ -585,8 +611,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(hypersparse_size, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[3].write())->*[&](cudaStream_t stream) { detail::transform_v_frontier_e_hypersparse_or_low_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, edge_partition_key_indices.begin() + edge_partition_v_frontier_partition_offsets[3], @@ -598,6 +625,7 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } } else { raft::grid_1d_thread_t update_grid( @@ -605,8 +633,9 @@ auto transform_v_frontier_e(raft::handle_t const& handle, detail::transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tv_buffers[4].write())->*[&,i](cudaStream_t stream) { detail::transform_v_frontier_e_hypersparse_or_low_degree - <<>>( + <<>>( edge_partition, edge_partition_frontier_key_first, thrust::make_counting_iterator(size_t{0}), @@ -618,9 +647,12 @@ auto transform_v_frontier_e(raft::handle_t const& handle, edge_partition_frontier_local_degree_offsets, e_op, get_dataframe_buffer_begin(aggregate_value_buffer)); + }; } } + cudastf_ctx.finalize(); + return std::make_tuple(std::move(aggregate_value_buffer), std::move(aggregate_local_frontier_local_degree_offsets)); } diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 5ba7edec894..32f1d1fc8e1 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -27,6 +27,10 @@ #include #include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { /** diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh index 1e45fea0608..0c1e45ded3d 100644 --- a/cpp/src/prims/transform_reduce_e.cuh +++ b/cpp/src/prims/transform_reduce_e.cuh @@ -47,6 +47,11 @@ #include #include +#include +#include + +using namespace cuda::experimental::stf; + namespace cugraph { namespace detail { @@ -470,6 +475,9 @@ T transform_reduce_e(raft::handle_t const& handle, // currently, nothing to do } + async_resources_handle& cudastf_handle = *raft::resource::get_custom_resource(handle); + stream_ctx cudastf_ctx(handle.get_stream(), cudastf_handle); + property_op edge_property_add{}; auto result_buffer = allocate_dataframe_buffer(1, handle.get_stream()); @@ -504,6 +512,11 @@ T transform_reduce_e(raft::handle_t const& handle, } auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + // CUDASTF logical data buffer for transform_reduce phase + std::vector l_tr_buffers(5); + for (size_t segment_i = 0; segment_i < 5; segment_i++) { l_tr_buffers[segment_i] = cudastf_ctx.token(); + } + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); if (segment_offsets) { // FIXME: we may further improve performance by 1) concurrently running kernels on different @@ -514,8 +527,9 @@ T transform_reduce_e(raft::handle_t const& handle, raft::grid_1d_block_t update_grid((*segment_offsets)[1], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[0].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_high_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first(), edge_partition.major_range_first() + (*segment_offsets)[1], @@ -525,13 +539,15 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[1].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_mid_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first() + (*segment_offsets)[1], edge_partition.major_range_first() + (*segment_offsets)[2], @@ -541,13 +557,15 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[2].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_low_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first() + (*segment_offsets)[2], edge_partition.major_range_first() + (*segment_offsets)[3], @@ -557,13 +575,15 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } if (edge_partition.dcs_nzd_vertex_count() && (*(edge_partition.dcs_nzd_vertex_count()) > 0)) { raft::grid_1d_thread_t update_grid(*(edge_partition.dcs_nzd_vertex_count()), detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[3].write())->*[&](cudaStream_t stream) { detail::transform_reduce_e_hypersparse - <<>>( + <<>>( edge_partition, edge_partition_src_value_input, edge_partition_dst_value_input, @@ -571,6 +591,7 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } } else { if (edge_partition.major_range_size() > 0) { @@ -578,8 +599,10 @@ T transform_reduce_e(raft::handle_t const& handle, detail::transform_reduce_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); + cudastf_ctx.task(l_tr_buffers[4].write())->*[&](cudaStream_t stream) { + detail::transform_reduce_e_low_degree - <<>>( + <<>>( edge_partition, edge_partition.major_range_first(), edge_partition.major_range_last(), @@ -589,10 +612,13 @@ T transform_reduce_e(raft::handle_t const& handle, edge_partition_e_mask, get_dataframe_buffer_begin(result_buffer), e_op); + }; } } } + cudastf_ctx.finalize(); + auto result = thrust::reduce( handle.get_thrust_policy(), get_dataframe_buffer_begin(result_buffer), diff --git a/cpp/tests/utilities/check_utilities.hpp b/cpp/tests/utilities/check_utilities.hpp index 6974d14be04..d945e54ec79 100644 --- a/cpp/tests/utilities/check_utilities.hpp +++ b/cpp/tests/utilities/check_utilities.hpp @@ -19,6 +19,7 @@ #include #include +#include #include