Skip to content

Commit 5b7dcd7

Browse files
authored
Add topological sort algorithm (#5492)
This PR is to add topological sort algorithm to cuGraph. Authors: - https://github.com/ngokulakrish - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: #5492
1 parent ccc5bda commit 5b7dcd7

14 files changed

Lines changed: 845 additions & 0 deletions

cpp/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ set(CUGRAPH_SG_SOURCES
292292
src/traversal/sssp_sg_v32_e32.cu
293293
src/traversal/od_shortest_distances_sg_v64_e64.cu
294294
src/traversal/od_shortest_distances_sg_v32_e32.cu
295+
src/dag/topological_sort_sg_v64_e64.cu
296+
src/dag/topological_sort_sg_v32_e32.cu
295297
src/link_analysis/hits_sg_v64_e64.cu
296298
src/link_analysis/hits_sg_v32_e32.cu
297299
src/link_analysis/pagerank_sg_v64_e64.cu
@@ -426,6 +428,8 @@ set(CUGRAPH_MG_SOURCES
426428
src/traversal/bfs_mg_v32_e32.cu
427429
src/traversal/sssp_mg_v64_e64.cu
428430
src/traversal/sssp_mg_v32_e32.cu
431+
src/dag/topological_sort_mg_v64_e64.cu
432+
src/dag/topological_sort_mg_v32_e32.cu
429433
src/link_analysis/hits_mg_v64_e64.cu
430434
src/link_analysis/hits_mg_v32_e32.cu
431435
src/link_analysis/pagerank_mg_v64_e64.cu

cpp/include/cugraph/algorithms.hpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
/** @defgroup community_cpp C++ community Algorithms
3232
*/
3333

34+
/** @defgroup dag_cpp C++ DAG Algorithms
35+
*/
36+
3437
/** @defgroup sampling_cpp C++ sampling algorithms
3538
*/
3639

@@ -1092,6 +1095,32 @@ void bfs(raft::handle_t const& handle,
10921095
vertex_t depth_limit = std::numeric_limits<vertex_t>::max(),
10931096
bool do_expensive_check = false);
10941097

1098+
/**
1099+
* @ingroup dag_cpp
1100+
* @brief Compute a topological ordering of a directed acyclic graph (DAG).
1101+
* For every directed edge (u, v), u appears before v in the returned ordering.
1102+
*
1103+
* @throws cugraph::logic_error on erroneous input arguments, if the graph contains a cycle or
1104+
* if the graph is symmetric (undirected).
1105+
*
1106+
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
1107+
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
1108+
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
1109+
* or multi-GPU (true).
1110+
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
1111+
* handles to various CUDA libraries) to run graph algorithms.
1112+
* @param graph_view Graph view object.
1113+
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
1114+
* @return Device vector containing the topological sorting levels. For each local vertex (indexed
1115+
* by local vertex partition offset), stores the topological level. Disconnected vertices are
1116+
* assigned level 0.
1117+
*/
1118+
template <typename vertex_t, typename edge_t, bool multi_gpu>
1119+
rmm::device_uvector<vertex_t> topological_sort(
1120+
raft::handle_t const& handle,
1121+
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
1122+
bool do_expensive_check = false);
1123+
10951124
/**
10961125
* @ingroup traversal_cpp
10971126
* @brief Extract paths from breadth-first search output
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#pragma once
7+
8+
#include <cugraph/algorithms.hpp>
9+
#include <cugraph/arithmetic_variant_types.hpp>
10+
#include <cugraph/edge_src_dst_property.hpp>
11+
#include <cugraph/graph_view.hpp>
12+
#include <cugraph/prims/reduce_op.cuh>
13+
#include <cugraph/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh>
14+
#include <cugraph/prims/vertex_frontier.cuh>
15+
#include <cugraph/shuffle_functions.hpp>
16+
#include <cugraph/utilities/error.hpp>
17+
#include <cugraph/utilities/host_scalar_comm.hpp>
18+
19+
#include <raft/core/handle.hpp>
20+
21+
#include <rmm/device_uvector.hpp>
22+
23+
#include <cuda/functional>
24+
#include <cuda/std/iterator>
25+
#include <cuda/std/tuple>
26+
#include <thrust/copy.h>
27+
#include <thrust/fill.h>
28+
#include <thrust/for_each.h>
29+
#include <thrust/iterator/constant_iterator.h>
30+
#include <thrust/iterator/counting_iterator.h>
31+
#include <thrust/iterator/zip_iterator.h>
32+
#include <thrust/reduce.h>
33+
#include <thrust/sort.h>
34+
35+
namespace cugraph {
36+
37+
template <typename vertex_t, typename edge_t, bool multi_gpu>
38+
rmm::device_uvector<vertex_t> topological_sort(
39+
raft::handle_t const& handle,
40+
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
41+
bool do_expensive_check)
42+
{
43+
// Topological sort exists only if graph is directed and does not contain any loops
44+
CUGRAPH_EXPECTS(!graph_view.is_symmetric(),
45+
"Invalid input argument: topological sort requires graph to be directed");
46+
47+
if (do_expensive_check) {
48+
auto num_self_loops = graph_view.count_self_loops(handle);
49+
CUGRAPH_EXPECTS(num_self_loops == 0,
50+
"Invalid input argument: topological sort requires graph without self loops");
51+
52+
auto components = strongly_connected_components(handle, graph_view, true);
53+
54+
thrust::sort(handle.get_thrust_policy(), components.begin(), components.end());
55+
CUGRAPH_EXPECTS(
56+
static_cast<size_t>(thrust::unique_count(
57+
handle.get_thrust_policy(), components.begin(), components.end())) == components.size(),
58+
"Invalid input argument: topological sort requires graph without cycles");
59+
60+
if constexpr (multi_gpu) {
61+
std::tie(components, std::ignore) = shuffle_ext_vertices(
62+
handle, std::move(components), std::vector<arithmetic_device_uvector_t>{});
63+
64+
thrust::sort(handle.get_thrust_policy(), components.begin(), components.end());
65+
CUGRAPH_EXPECTS(
66+
static_cast<size_t>(thrust::unique_count(
67+
handle.get_thrust_policy(), components.begin(), components.end())) == components.size(),
68+
"Invalid input argument: topological sort requires graph without cycles");
69+
}
70+
}
71+
72+
rmm::device_uvector<vertex_t> frontier_vertices(graph_view.local_vertex_partition_range_size(),
73+
handle.get_stream());
74+
auto in_degrees = graph_view.compute_in_degrees(handle);
75+
76+
frontier_vertices.resize(
77+
cuda::std::distance(
78+
frontier_vertices.begin(),
79+
thrust::copy_if(
80+
handle.get_thrust_policy(),
81+
thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()),
82+
thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()),
83+
frontier_vertices.begin(),
84+
cuda::proclaim_return_type<bool>(
85+
[in_degrees = raft::device_span<edge_t const>(in_degrees.data(), in_degrees.size()),
86+
v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
87+
auto v_offset = v - v_first;
88+
return in_degrees[v_offset] == 0;
89+
}))),
90+
handle.get_stream());
91+
92+
rmm::device_uvector<vertex_t> topological_levels(graph_view.local_vertex_partition_range_size(),
93+
handle.get_stream());
94+
thrust::fill(
95+
handle.get_thrust_policy(), topological_levels.begin(), topological_levels.end(), vertex_t{0});
96+
97+
auto level = 0;
98+
auto sum_aggregate_frontier_size = 0;
99+
100+
while (true) {
101+
auto aggregate_frontier_size = frontier_vertices.size();
102+
if constexpr (multi_gpu) {
103+
aggregate_frontier_size = host_scalar_allreduce(
104+
handle.get_comms(), aggregate_frontier_size, raft::comms::op_t::SUM, handle.get_stream());
105+
}
106+
if (aggregate_frontier_size == 0) { break; }
107+
108+
sum_aggregate_frontier_size += aggregate_frontier_size;
109+
110+
key_bucket_view_t<vertex_t, void, multi_gpu, true> frontier(
111+
handle,
112+
raft::device_span<vertex_t const>(frontier_vertices.data(), frontier_vertices.size()));
113+
114+
auto [dst_vertices, decrement_counts] = cugraph::transform_reduce_v_frontier_outgoing_e_by_dst(
115+
handle,
116+
graph_view,
117+
frontier,
118+
edge_src_dummy_property_t{}.view(),
119+
edge_dst_dummy_property_t{}.view(),
120+
edge_dummy_property_t{}.view(),
121+
cuda::proclaim_return_type<edge_t>(
122+
[] __device__(auto src, auto dst, auto, auto, auto) { return edge_t{1}; }),
123+
reduce_op::plus<edge_t>());
124+
125+
thrust::for_each(
126+
handle.get_thrust_policy(),
127+
thrust::make_zip_iterator(dst_vertices.begin(), decrement_counts.begin()),
128+
thrust::make_zip_iterator(dst_vertices.end(), decrement_counts.end()),
129+
[in_degrees = raft::device_span<edge_t>(in_degrees.data(), in_degrees.size()),
130+
v_first = graph_view.local_vertex_partition_range_first()] __device__(auto pair) {
131+
auto v_offset = cuda::std::get<0>(pair) - v_first;
132+
auto decrement_count = cuda::std::get<1>(pair);
133+
in_degrees[v_offset] -= decrement_count;
134+
});
135+
136+
rmm::device_uvector<vertex_t> new_frontier_vertices(dst_vertices.size(), handle.get_stream());
137+
138+
new_frontier_vertices.resize(
139+
cuda::std::distance(
140+
new_frontier_vertices.begin(),
141+
thrust::copy_if(
142+
handle.get_thrust_policy(),
143+
dst_vertices.begin(),
144+
dst_vertices.end(),
145+
new_frontier_vertices.begin(),
146+
[in_degrees = raft::device_span<edge_t const>(in_degrees.data(), in_degrees.size()),
147+
v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
148+
auto v_offset = v - v_first;
149+
return in_degrees[v_offset] == 0;
150+
})),
151+
handle.get_stream());
152+
new_frontier_vertices.shrink_to_fit(handle.get_stream());
153+
154+
frontier_vertices = std::move(new_frontier_vertices);
155+
level++;
156+
157+
thrust::for_each(handle.get_thrust_policy(),
158+
frontier_vertices.begin(),
159+
frontier_vertices.end(),
160+
[topological_levels = raft::device_span<vertex_t>(topological_levels.data(),
161+
topological_levels.size()),
162+
v_first = graph_view.local_vertex_partition_range_first(),
163+
level = level] __device__(auto v) {
164+
auto v_offset = v - v_first;
165+
topological_levels[v_offset] = level;
166+
});
167+
}
168+
169+
CUGRAPH_EXPECTS(sum_aggregate_frontier_size == graph_view.number_of_vertices(),
170+
"Invalid input argument: graph may contain cycles");
171+
172+
return topological_levels;
173+
}
174+
175+
} // namespace cugraph
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#include "dag/topological_sort_impl.cuh"
7+
8+
namespace cugraph {
9+
10+
template rmm::device_uvector<int32_t> topological_sort(
11+
raft::handle_t const& handle,
12+
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
13+
bool do_expensive_check);
14+
15+
} // namespace cugraph
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#include "dag/topological_sort_impl.cuh"
7+
8+
namespace cugraph {
9+
10+
template rmm::device_uvector<int64_t> topological_sort(
11+
raft::handle_t const& handle,
12+
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
13+
bool do_expensive_check);
14+
15+
} // namespace cugraph
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#include "dag/topological_sort_impl.cuh"
7+
8+
namespace cugraph {
9+
10+
template rmm::device_uvector<int32_t> topological_sort(
11+
raft::handle_t const& handle,
12+
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
13+
bool do_expensive_check);
14+
15+
} // namespace cugraph
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#include "dag/topological_sort_impl.cuh"
7+
8+
namespace cugraph {
9+
10+
template rmm::device_uvector<int64_t> topological_sort(
11+
raft::handle_t const& handle,
12+
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
13+
bool do_expensive_check);
14+
15+
} // namespace cugraph

cpp/tests/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,10 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo
473473
# - STRONGLY CONNECTED COMPONENTS tests -------------------------------------------------------------
474474
ConfigureTest(STRONGLY_CONNECTED_COMPONENTS_TEST components/strongly_connected_components_test.cpp)
475475

476+
###################################################################################################
477+
# - TOPOLOGICAL SORT tests ------------------------------------------------------------------------
478+
ConfigureTest(TOPOLOGICAL_SORT_TEST dag/topological_sort_test.cpp dag/dag_test_utilities_sg.cu)
479+
476480
###################################################################################################
477481
# - MIS tests -------------------------------------------------------------------------------------
478482
ConfigureTest(MIS_TEST components/mis_test.cu)
@@ -690,6 +694,11 @@ if(BUILD_CUGRAPH_MG_TESTS)
690694
ConfigureTestMG(MG_STRONGLY_CONNECTED_COMPONENTS_TEST
691695
components/mg_strongly_connected_components_test.cpp)
692696

697+
###############################################################################################
698+
# - MG TOPOLOGICAL SORT tests -----------------------------------------------------------------
699+
ConfigureTestMG(MG_TOPOLOGICAL_SORT_TEST dag/mg_topological_sort_test.cpp
700+
dag/dag_test_utilities_mg.cu)
701+
693702
###############################################################################################
694703
# - MG EDGE SOURCE DESTINATION LOOKUP tests ---------------------------------------------------
695704
ConfigureTestMG(MG_LOOKUP_SRC_DST_TEST lookup/mg_lookup_src_dst_test.cpp)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#pragma once
7+
8+
#include <cugraph/edge_property.hpp>
9+
#include <cugraph/graph_view.hpp>
10+
11+
#include <raft/core/handle.hpp>
12+
13+
namespace cugraph {
14+
namespace test {
15+
16+
// Build an edge mask that drops every edge whose source or destination lies in a non-trivial
17+
// strongly connected component, plus every self-loop. Intended for DAG algorithm tests (e.g.
18+
// topological_sort) so a cyclic test dataset can be masked down to a DAG before the algorithm
19+
// is invoked.
20+
21+
template <typename vertex_t, typename edge_t, bool multi_gpu>
22+
cugraph::edge_property_t<edge_t, bool> build_acyclic_edge_mask(
23+
raft::handle_t const& handle,
24+
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view);
25+
26+
} // namespace test
27+
} // namespace cugraph

0 commit comments

Comments
 (0)