rapidsai
diff --git a/‎cpp/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎cpp/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎cpp/include/cugraph/algorithms.hpp‎
Lines changed: 29 additions & 0 deletions b/‎cpp/include/cugraph/algorithms.hpp‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎cpp/src/dag/topological_sort_impl.cuh‎
Lines changed: 175 additions & 0 deletions b/‎cpp/src/dag/topological_sort_impl.cuh‎
Lines changed: 175 additions & 0 deletions
diff --git a/‎cpp/src/dag/topological_sort_mg_v32_e32.cu‎
Lines changed: 15 additions & 0 deletions b/‎cpp/src/dag/topological_sort_mg_v32_e32.cu‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎cpp/src/dag/topological_sort_mg_v64_e64.cu‎
Lines changed: 15 additions & 0 deletions b/‎cpp/src/dag/topological_sort_mg_v64_e64.cu‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎cpp/src/dag/topological_sort_sg_v32_e32.cu‎
Lines changed: 15 additions & 0 deletions b/‎cpp/src/dag/topological_sort_sg_v32_e32.cu‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎cpp/src/dag/topological_sort_sg_v64_e64.cu‎
Lines changed: 15 additions & 0 deletions b/‎cpp/src/dag/topological_sort_sg_v64_e64.cu‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎cpp/tests/CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions b/‎cpp/tests/CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎cpp/tests/dag/dag_test_utilities.hpp‎
Lines changed: 27 additions & 0 deletions b/‎cpp/tests/dag/dag_test_utilities.hpp‎
Lines changed: 27 additions & 0 deletions
@@ -292,6 +292,8 @@ set(CUGRAPH_SG_SOURCES
     src/traversal/sssp_sg_v32_e32.cu
     src/traversal/od_shortest_distances_sg_v64_e64.cu
     src/traversal/od_shortest_distances_sg_v32_e32.cu
+    src/dag/topological_sort_sg_v64_e64.cu
+    src/dag/topological_sort_sg_v32_e32.cu
     src/link_analysis/hits_sg_v64_e64.cu
     src/link_analysis/hits_sg_v32_e32.cu
     src/link_analysis/pagerank_sg_v64_e64.cu
@@ -426,6 +428,8 @@ set(CUGRAPH_MG_SOURCES
     src/traversal/bfs_mg_v32_e32.cu
     src/traversal/sssp_mg_v64_e64.cu
     src/traversal/sssp_mg_v32_e32.cu
+    src/dag/topological_sort_mg_v64_e64.cu
+    src/dag/topological_sort_mg_v32_e32.cu
     src/link_analysis/hits_mg_v64_e64.cu
     src/link_analysis/hits_mg_v32_e32.cu
     src/link_analysis/pagerank_mg_v64_e64.cu
 
@@ -31,6 +31,9 @@
 /** @defgroup community_cpp C++ community Algorithms
  */
 
+/** @defgroup dag_cpp C++ DAG Algorithms
+ */
+
 /** @defgroup sampling_cpp C++ sampling algorithms
  */
 
@@ -1092,6 +1095,32 @@ void bfs(raft::handle_t const& handle,
          vertex_t depth_limit      = std::numeric_limits<vertex_t>::max(),
          bool do_expensive_check   = false);
 
+/**
+ * @ingroup dag_cpp
+ * @brief Compute a topological ordering of a directed acyclic graph (DAG).
+ * For every directed edge (u, v), u appears before v in the returned ordering.
+ *
+ * @throws cugraph::logic_error on erroneous input arguments, if the graph contains a cycle or
+ * if the graph is symmetric (undirected).
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return Device vector containing the topological sorting levels. For each local vertex (indexed
+ * by local vertex partition offset), stores the topological level. Disconnected vertices are
+ * assigned level 0.
+ */
+template <typename vertex_t, typename edge_t, bool multi_gpu>
+rmm::device_uvector<vertex_t> topological_sort(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+  bool do_expensive_check = false);
+
 /**
  * @ingroup traversal_cpp
  * @brief Extract paths from breadth-first search output
 
@@ -0,0 +1,175 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/arithmetic_variant_types.hpp>
+#include <cugraph/edge_src_dst_property.hpp>
+#include <cugraph/graph_view.hpp>
+#include <cugraph/prims/reduce_op.cuh>
+#include <cugraph/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh>
+#include <cugraph/prims/vertex_frontier.cuh>
+#include <cugraph/shuffle_functions.hpp>
+#include <cugraph/utilities/error.hpp>
+#include <cugraph/utilities/host_scalar_comm.hpp>
+
+#include <raft/core/handle.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <cuda/functional>
+#include <cuda/std/iterator>
+#include <cuda/std/tuple>
+#include <thrust/copy.h>
+#include <thrust/fill.h>
+#include <thrust/for_each.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/sort.h>
+
+namespace cugraph {
+
+template <typename vertex_t, typename edge_t, bool multi_gpu>
+rmm::device_uvector<vertex_t> topological_sort(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+  bool do_expensive_check)
+{
+  // Topological sort exists only if graph is directed and does not contain any loops
+  CUGRAPH_EXPECTS(!graph_view.is_symmetric(),
+                  "Invalid input argument: topological sort requires graph to be directed");
+
+  if (do_expensive_check) {
+    auto num_self_loops = graph_view.count_self_loops(handle);
+    CUGRAPH_EXPECTS(num_self_loops == 0,
+                    "Invalid input argument: topological sort requires graph without self loops");
+
+    auto components = strongly_connected_components(handle, graph_view, true);
+
+    thrust::sort(handle.get_thrust_policy(), components.begin(), components.end());
+    CUGRAPH_EXPECTS(
+      static_cast<size_t>(thrust::unique_count(
+        handle.get_thrust_policy(), components.begin(), components.end())) == components.size(),
+      "Invalid input argument: topological sort requires graph without cycles");
+
+    if constexpr (multi_gpu) {
+      std::tie(components, std::ignore) = shuffle_ext_vertices(
+        handle, std::move(components), std::vector<arithmetic_device_uvector_t>{});
+
+      thrust::sort(handle.get_thrust_policy(), components.begin(), components.end());
+      CUGRAPH_EXPECTS(
+        static_cast<size_t>(thrust::unique_count(
+          handle.get_thrust_policy(), components.begin(), components.end())) == components.size(),
+        "Invalid input argument: topological sort requires graph without cycles");
+    }
+  }
+
+  rmm::device_uvector<vertex_t> frontier_vertices(graph_view.local_vertex_partition_range_size(),
+                                                  handle.get_stream());
+  auto in_degrees = graph_view.compute_in_degrees(handle);
+
+  frontier_vertices.resize(
+    cuda::std::distance(
+      frontier_vertices.begin(),
+      thrust::copy_if(
+        handle.get_thrust_policy(),
+        thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()),
+        thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()),
+        frontier_vertices.begin(),
+        cuda::proclaim_return_type<bool>(
+          [in_degrees = raft::device_span<edge_t const>(in_degrees.data(), in_degrees.size()),
+           v_first    = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
+            auto v_offset = v - v_first;
+            return in_degrees[v_offset] == 0;
+          }))),
+    handle.get_stream());
+
+  rmm::device_uvector<vertex_t> topological_levels(graph_view.local_vertex_partition_range_size(),
+                                                   handle.get_stream());
+  thrust::fill(
+    handle.get_thrust_policy(), topological_levels.begin(), topological_levels.end(), vertex_t{0});
+
+  auto level                       = 0;
+  auto sum_aggregate_frontier_size = 0;
+
+  while (true) {
+    auto aggregate_frontier_size = frontier_vertices.size();
+    if constexpr (multi_gpu) {
+      aggregate_frontier_size = host_scalar_allreduce(
+        handle.get_comms(), aggregate_frontier_size, raft::comms::op_t::SUM, handle.get_stream());
+    }
+    if (aggregate_frontier_size == 0) { break; }
+
+    sum_aggregate_frontier_size += aggregate_frontier_size;
+
+    key_bucket_view_t<vertex_t, void, multi_gpu, true> frontier(
+      handle,
+      raft::device_span<vertex_t const>(frontier_vertices.data(), frontier_vertices.size()));
+
+    auto [dst_vertices, decrement_counts] = cugraph::transform_reduce_v_frontier_outgoing_e_by_dst(
+      handle,
+      graph_view,
+      frontier,
+      edge_src_dummy_property_t{}.view(),
+      edge_dst_dummy_property_t{}.view(),
+      edge_dummy_property_t{}.view(),
+      cuda::proclaim_return_type<edge_t>(
+        [] __device__(auto src, auto dst, auto, auto, auto) { return edge_t{1}; }),
+      reduce_op::plus<edge_t>());
+
+    thrust::for_each(
+      handle.get_thrust_policy(),
+      thrust::make_zip_iterator(dst_vertices.begin(), decrement_counts.begin()),
+      thrust::make_zip_iterator(dst_vertices.end(), decrement_counts.end()),
+      [in_degrees = raft::device_span<edge_t>(in_degrees.data(), in_degrees.size()),
+       v_first    = graph_view.local_vertex_partition_range_first()] __device__(auto pair) {
+        auto v_offset        = cuda::std::get<0>(pair) - v_first;
+        auto decrement_count = cuda::std::get<1>(pair);
+        in_degrees[v_offset] -= decrement_count;
+      });
+
+    rmm::device_uvector<vertex_t> new_frontier_vertices(dst_vertices.size(), handle.get_stream());
+
+    new_frontier_vertices.resize(
+      cuda::std::distance(
+        new_frontier_vertices.begin(),
+        thrust::copy_if(
+          handle.get_thrust_policy(),
+          dst_vertices.begin(),
+          dst_vertices.end(),
+          new_frontier_vertices.begin(),
+          [in_degrees = raft::device_span<edge_t const>(in_degrees.data(), in_degrees.size()),
+           v_first    = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
+            auto v_offset = v - v_first;
+            return in_degrees[v_offset] == 0;
+          })),
+      handle.get_stream());
+    new_frontier_vertices.shrink_to_fit(handle.get_stream());
+
+    frontier_vertices = std::move(new_frontier_vertices);
+    level++;
+
+    thrust::for_each(handle.get_thrust_policy(),
+                     frontier_vertices.begin(),
+                     frontier_vertices.end(),
+                     [topological_levels = raft::device_span<vertex_t>(topological_levels.data(),
+                                                                       topological_levels.size()),
+                      v_first            = graph_view.local_vertex_partition_range_first(),
+                      level              = level] __device__(auto v) {
+                       auto v_offset                = v - v_first;
+                       topological_levels[v_offset] = level;
+                     });
+  }
+
+  CUGRAPH_EXPECTS(sum_aggregate_frontier_size == graph_view.number_of_vertices(),
+                  "Invalid input argument: graph may contain cycles");
+
+  return topological_levels;
+}
+
+}  // namespace cugraph
@@ -0,0 +1,15 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dag/topological_sort_impl.cuh"
+
+namespace cugraph {
+
+template rmm::device_uvector<int32_t> topological_sort(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  bool do_expensive_check);
+
+}  // namespace cugraph
@@ -0,0 +1,15 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dag/topological_sort_impl.cuh"
+
+namespace cugraph {
+
+template rmm::device_uvector<int64_t> topological_sort(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  bool do_expensive_check);
+
+}  // namespace cugraph
@@ -0,0 +1,15 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dag/topological_sort_impl.cuh"
+
+namespace cugraph {
+
+template rmm::device_uvector<int32_t> topological_sort(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  bool do_expensive_check);
+
+}  // namespace cugraph
@@ -0,0 +1,15 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dag/topological_sort_impl.cuh"
+
+namespace cugraph {
+
+template rmm::device_uvector<int64_t> topological_sort(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  bool do_expensive_check);
+
+}  // namespace cugraph
@@ -473,6 +473,10 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo
 # - STRONGLY CONNECTED COMPONENTS tests -------------------------------------------------------------
 ConfigureTest(STRONGLY_CONNECTED_COMPONENTS_TEST components/strongly_connected_components_test.cpp)
 
+###################################################################################################
+# - TOPOLOGICAL SORT tests ------------------------------------------------------------------------
+ConfigureTest(TOPOLOGICAL_SORT_TEST dag/topological_sort_test.cpp dag/dag_test_utilities_sg.cu)
+
 ###################################################################################################
 # - MIS tests -------------------------------------------------------------------------------------
 ConfigureTest(MIS_TEST components/mis_test.cu)
@@ -690,6 +694,11 @@ if(BUILD_CUGRAPH_MG_TESTS)
     ConfigureTestMG(MG_STRONGLY_CONNECTED_COMPONENTS_TEST
                     components/mg_strongly_connected_components_test.cpp)
 
+    ###############################################################################################
+    # - MG TOPOLOGICAL SORT tests -----------------------------------------------------------------
+    ConfigureTestMG(MG_TOPOLOGICAL_SORT_TEST dag/mg_topological_sort_test.cpp
+                    dag/dag_test_utilities_mg.cu)
+
     ###############################################################################################
     # - MG EDGE SOURCE DESTINATION LOOKUP tests ---------------------------------------------------
     ConfigureTestMG(MG_LOOKUP_SRC_DST_TEST lookup/mg_lookup_src_dst_test.cpp)
 
@@ -0,0 +1,27 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cugraph/edge_property.hpp>
+#include <cugraph/graph_view.hpp>
+
+#include <raft/core/handle.hpp>
+
+namespace cugraph {
+namespace test {
+
+// Build an edge mask that drops every edge whose source or destination lies in a non-trivial
+// strongly connected component, plus every self-loop. Intended for DAG algorithm tests (e.g.
+// topological_sort) so a cyclic test dataset can be masked down to a DAG before the algorithm
+// is invoked.
+
+template <typename vertex_t, typename edge_t, bool multi_gpu>
+cugraph::edge_property_t<edge_t, bool> build_acyclic_edge_mask(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view);
+
+}  // namespace test
+}  // namespace cugraph