From 08e00c9bf0802f8c8638f6545f2553a812fd02da Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 6 May 2026 13:22:57 -0700
Subject: [PATCH 01/12] Correctly handle blocks with "block byte size" fields
 in the Avro reader (#22387)

When the number of elements in the Avro block is stored as a negative number, the block also includes its size in bytes. This PR allows the reader to correctly parse such files.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/22387
---
 cpp/src/io/avro/avro.cpp                      |  14 ++++++--
 .../cudf/cudf/tests/data/avro/hang_input.avro | Bin 0 -> 101 bytes
 .../cudf/cudf/tests/input_output/test_avro.py |  34 +++++++++++++++++-
 3 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 python/cudf/cudf/tests/data/avro/hang_input.avro

diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp
index bf7d983d481..4639ea6ba23 100644
--- a/cpp/src/io/avro/avro.cpp
+++ b/cpp/src/io/avro/avro.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -64,8 +64,16 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row)
   sig4 |= get_raw<uint8_t>() << 24;
   if (sig4 != avro_magic) { return false; }
   for (;;) {
-    auto num_md_items = static_cast<uint32_t>(get_encoded<int64_t>());
-    if (num_md_items == 0) { break; }
+    auto md_items_signed = get_encoded<int64_t>();
+    if (md_items_signed == 0) { break; }
+    if (md_items_signed < 0) {
+      // A negative count means a block's byte size follows. Read it and discard it.
+      [[maybe_unused]] auto const md_block_size = get_encoded<int64_t>();
+      md_items_signed                           = -md_items_signed;
+    }
+    // Check that the claimed item count can fit in the remaining input
+    if (md_items_signed > (m_end - m_cur) / 2) { return false; }
+    auto const num_md_items = static_cast<uint32_t>(md_items_signed);
     for (uint32_t i = 0; i < num_md_items; i++) {
       auto const key   = get_encoded<std::string>();
       auto const value = get_encoded<std::string>();
diff --git a/python/cudf/cudf/tests/data/avro/hang_input.avro b/python/cudf/cudf/tests/data/avro/hang_input.avro
new file mode 100644
index 0000000000000000000000000000000000000000..b26cb797fe8e1343a7560135f4d79e31902ca1d8
GIT binary patch
literal 101
zcmeZI%3@>^ODrqO*DFrWNX<>`VyspwsVqoUvQjEaP0lY$QPNS$OUwoHfy}hb)SQ%J
pC9CLam}psIPH8Gorlis(G_Aa2CKFT0s@-SqzWdK0sALQg0sxX>BQF2|

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/input_output/test_avro.py b/python/cudf/cudf/tests/input_output/test_avro.py
index 8664851f2e3..d0f31828b3d 100644
--- a/python/cudf/cudf/tests/input_output/test_avro.py
+++ b/python/cudf/cudf/tests/input_output/test_avro.py
@@ -1,10 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
 import datetime
 import io
 import pathlib
+import subprocess
+import sys
+import textwrap
 
 import fastavro
 import numpy as np
@@ -644,3 +647,32 @@ def test_avro_reader_multiblock(
     actual_df = cudf.read_avro(buffer, skiprows=skip_rows, num_rows=num_rows)
 
     assert_eq(expected_df, actual_df)
+
+
+def test_avro_reader_no_hang_on_truncated_schema(datadir):
+    path = datadir / "avro" / "hang_input.avro"
+    assert path.is_file(), path
+
+    script = textwrap.dedent(
+        f"""
+        import cudf
+        try:
+            cudf.read_avro({str(path)!r})
+        except Exception:
+            pass
+        """
+    )
+
+    timeout_s = 10
+    try:
+        subprocess.run(
+            [sys.executable, "-c", script],
+            timeout=timeout_s,
+            check=False,
+            capture_output=True,
+        )
+    except subprocess.TimeoutExpired:
+        pytest.fail(
+            f"cudf.read_avro hung on malformed input {path.name!r} "
+            f"(no completion within {timeout_s}s)"
+        )

From 112830f01f3cf492a1d25c25999b4dedc44acd32 Mon Sep 17 00:00:00 2001
From: Paul Taylor <178183+trxcllnt@users.noreply.github.com>
Date: Wed, 6 May 2026 13:29:07 -0700
Subject: [PATCH 02/12] Use `token.rapids.nvidia.com` when issuing S3 bucket
 creds in devcontainers (#22338)

Set AWS_IDP_URL and update AWS_ROLE_ARN to use `token.rapids.nvidia.com`

Authors:
  - Paul Taylor (https://github.com/trxcllnt)

Approvers:
  - Gil Forsyth (https://github.com/gforsyth)

URL: https://github.com/rapidsai/cudf/pull/22338
---
 .devcontainer/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 4be36d4402c..b4b2ecb69e0 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -37,7 +37,8 @@ ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAG
 ###
 # sccache configuration
 ###
-ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
+ENV AWS_IDP_URL="https://token.rapids.nvidia.com"
+ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/rapids-token-sccache-devs"
 ENV SCCACHE_REGION="us-east-2"
 ENV SCCACHE_BUCKET="rapids-sccache-devs"
 ENV SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true

From d24f7703fa7a086bd75a6a7886bd5044768e1c0a Mon Sep 17 00:00:00 2001
From: Kyle Edwards <kyedwards@nvidia.com>
Date: Wed, 6 May 2026 16:41:36 -0400
Subject: [PATCH 03/12] Use static cudart by default (#22397)

Issue: https://github.com/rapidsai/build-planning/issues/235

Authors:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/22397
---
 .agents/skills/build-test-cudf-java/SKILL.md    |  2 +-
 conda/recipes/cudf/recipe.yaml                  |  1 -
 cpp/CMakeLists.txt                              | 16 ++++++----------
 cpp/cmake/Modules/JitifyPreprocessKernels.cmake |  2 +-
 cpp/tests/CMakeLists.txt                        |  2 +-
 java/README.md                                  |  7 -------
 java/ci/build-in-docker.sh                      |  6 +-----
 java/pom.xml                                    |  2 --
 java/src/main/native/CMakeLists.txt             | 10 +---------
 python/libcudf/CMakeLists.txt                   |  4 +---
 10 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/.agents/skills/build-test-cudf-java/SKILL.md b/.agents/skills/build-test-cudf-java/SKILL.md
index 6284a5e4230..ca9eb575c37 100644
--- a/.agents/skills/build-test-cudf-java/SKILL.md
+++ b/.agents/skills/build-test-cudf-java/SKILL.md
@@ -51,7 +51,7 @@ export MAVEN_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.
 Export `MVN_COMMON_OPTS` to match the CI build configuration in `java/ci/build-in-docker.sh`. For example:
 
 ```bash
-export MVN_COMMON_OPTS="-DCUDF_CPP_BUILD_DIR=$CUDF_CPP_BUILD_DIR -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON -DCUDA_STATIC_CUFILE=ON -DCUDA_STATIC_RUNTIME=ON -DCUDF_JNI_LIBCUDF_STATIC=ON"
+export MVN_COMMON_OPTS="-DCUDF_CPP_BUILD_DIR=$CUDF_CPP_BUILD_DIR -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON -DCUDA_STATIC_CUFILE=ON -DCUDF_JNI_LIBCUDF_STATIC=ON"
 ```
 
 ## Building cudf-java
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index 4d8a9f2f241..d4c8b5edb9d 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -98,7 +98,6 @@ requirements:
     - pylibcudf =${{ version }}
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
-    - cuda-cudart
     - if: cuda_major == "12"
       then: cuda-python >=12.9.2,<13.0
       else: cuda-python >=13.0.1,<14.0
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c2485171c71..6d684af8d99 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -58,9 +58,6 @@ option(CUDA_ENABLE_LINEINFO
 )
 option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
 
-# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
-option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
-
 set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
 
 if(NOT BUILD_SHARED_LIBS)
@@ -97,7 +94,6 @@ message(
   VERBOSE
   "CUDF: Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler): ${CUDA_ENABLE_LINEINFO}"
 )
-message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
 message(VERBOSE
         "CUDF: Build with remote IO (e.g. AWS S3) support through KvikIO: ${CUDF_KVIKIO_REMOTE_IO}"
 )
@@ -1012,7 +1008,7 @@ if(TARGET conda_env)
   target_link_libraries(cudf PRIVATE conda_env)
 endif()
 
-rapids_cuda_set_runtime(cudf USE_STATIC ${CUDA_STATIC_RUNTIME})
+rapids_cuda_set_runtime(cudf USE_STATIC ON)
 
 file(
   WRITE "${CUDF_BINARY_DIR}/fatbin.ld"
@@ -1059,7 +1055,7 @@ if(CUDF_BUILD_TESTUTIL)
     PUBLIC cudf
     PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
   )
-  rapids_cuda_set_runtime(cudftest_default_stream USE_STATIC ${CUDA_STATIC_RUNTIME})
+  rapids_cuda_set_runtime(cudftest_default_stream USE_STATIC ON)
 
   add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream)
 
@@ -1090,7 +1086,7 @@ if(CUDF_BUILD_TESTUTIL)
     cudftestutil INTERFACE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
                            "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
   )
-  rapids_cuda_set_runtime(cudftestutil USE_STATIC ${CUDA_STATIC_RUNTIME})
+  rapids_cuda_set_runtime(cudftestutil USE_STATIC ON)
   add_library(cudf::cudftestutil ALIAS cudftestutil)
 
   add_library(cudftestutil_impl INTERFACE)
@@ -1151,7 +1147,7 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
     endif()
 
     set(sanitizer_relative_genex
-        "$<PATH:RELATIVE_PATH,$<TARGET_FILE_DIR:CUDA::sanitizer>,$<TARGET_FILE_DIR:CUDA::cudart>>"
+        "$<PATH:RELATIVE_PATH,$<TARGET_FILE_DIR:CUDA::sanitizer>,$<TARGET_FILE_DIR:CUDA::cudart_static>>"
     )
     set_target_properties(
       ${_tgt}
@@ -1166,9 +1162,9 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
       ${_tgt} PRIVATE "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
     )
     target_include_directories(${_tgt} PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/include>")
-    target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm CUDA::sanitizer)
+    target_link_libraries(${_tgt} PUBLIC CUDA::cudart_static rmm::rmm CUDA::sanitizer)
 
-    rapids_cuda_set_runtime(${_tgt} USE_STATIC ${CUDA_STATIC_RUNTIME})
+    rapids_cuda_set_runtime(${_tgt} USE_STATIC ON)
     add_library(cudf::${_tgt} ALIAS ${_tgt})
 
     if("${_mode}" STREQUAL "testing")
diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
index 10ba33eb397..d035e1ea6ab 100644
--- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
+++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
@@ -9,7 +9,7 @@
 add_executable(jitify_preprocess "${JITIFY_INCLUDE_DIR}/jitify2_preprocess.cpp")
 
 target_compile_definitions(jitify_preprocess PRIVATE "_FILE_OFFSET_BITS=64")
-rapids_cuda_set_runtime(jitify_preprocess USE_STATIC ${CUDA_STATIC_RUNTIME})
+rapids_cuda_set_runtime(jitify_preprocess USE_STATIC ON)
 target_link_libraries(jitify_preprocess PUBLIC ${CMAKE_DL_LIBS})
 
 # Take a list of files to JIT-compile and run them through jitify_preprocess.
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 68cde65c57b..a45b7280127 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -61,7 +61,7 @@ function(ConfigureTest CMAKE_TEST_NAME)
     ${CMAKE_TEST_NAME} PRIVATE cudf::cudftestutil_objects nvtx3::nvtx3-cpp
                                $<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIBS}"
   )
-  rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME})
+  rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ON)
   rapids_test_add(
     NAME ${CMAKE_TEST_NAME}
     COMMAND ${CMAKE_TEST_NAME}
diff --git a/java/README.md b/java/README.md
index 7b33f303cf3..e1552712587 100644
--- a/java/README.md
+++ b/java/README.md
@@ -79,13 +79,6 @@ If you decide to build without Docker and the build script, examining the cmake
 settings in the [Java CI build script](ci/build-in-docker.sh) can be helpful if you are
 encountering difficulties during the build.
 
-## Statically Linking the CUDA Runtime
-
-If you use the default cmake options libcudart will be dynamically linked to libcudf and libcudfjni.
-To build with a static CUDA runtime, build libcudf with the `-DCUDA_STATIC_RUNTIME=ON` as a cmake
-parameter, and similarly build with `-DCUDA_STATIC_RUNTIME=ON` when building the Java bindings
-with Maven.
-
 ### Building with a libcudf Archive
 
 When statically linking the CUDA runtime, it is recommended to build cuDF as an archive rather than
diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh
index 66140f387fd..e15536c8b6b 100755
--- a/java/ci/build-in-docker.sh
+++ b/java/ci/build-in-docker.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 #
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -10,7 +10,6 @@ gcc --version
 
 SKIP_JAVA_TESTS=${SKIP_JAVA_TESTS:-true}
 BUILD_CPP_TESTS=${BUILD_CPP_TESTS:-OFF}
-ENABLE_CUDA_STATIC_RUNTIME=${ENABLE_CUDA_STATIC_RUNTIME:-ON}
 ENABLE_PTDS=${ENABLE_PTDS:-ON}
 RMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL:-OFF}
 ENABLE_NVTX=${ENABLE_NVTX:-ON}
@@ -27,7 +26,6 @@ OUT_PATH="$WORKSPACE/$OUT"
 echo "SIGN_FILE: $SIGN_FILE,\
  SKIP_JAVA_TESTS: $SKIP_JAVA_TESTS,\
  BUILD_CPP_TESTS: $BUILD_CPP_TESTS,\
- ENABLE_CUDA_STATIC_RUNTIME: $ENABLE_CUDA_STATIC_RUNTIME,\
  ENABLED_PTDS: $ENABLE_PTDS,\
  ENABLE_NVTX: $ENABLE_NVTX,\
  ENABLE_GDS: $ENABLE_GDS,\
@@ -47,7 +45,6 @@ mkdir -p "$LIBCUDF_BUILD_PATH"
 cd "$LIBCUDF_BUILD_PATH"
 cmake .. -G"${CMAKE_GENERATOR}" \
          -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \
-         -DCUDA_STATIC_RUNTIME="$ENABLE_CUDA_STATIC_RUNTIME" \
          -DUSE_NVTX="$ENABLE_NVTX" \
          -DCUDF_LARGE_STRINGS_DISABLED=ON \
          -DCUDF_USE_ARROW_STATIC=ON \
@@ -70,7 +67,6 @@ BUILD_ARG=(
   "-Dmaven.repo.local=$WORKSPACE/.m2"
   "-DskipTests=$SKIP_JAVA_TESTS"
   "-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS"
-  "-DCUDA_STATIC_RUNTIME=$ENABLE_CUDA_STATIC_RUNTIME"
   "-DCUDF_JNI_LIBCUDF_STATIC=ON"
   "-DUSE_GDS=$ENABLE_GDS"
   "-Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest"
diff --git a/java/pom.xml b/java/pom.xml
index 5df61ec4352..12af51eba71 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -153,7 +153,6 @@
         <skipNativeCopy>false</skipNativeCopy>
         <cxx.flags/>
         <CMAKE_EXPORT_COMPILE_COMMANDS>OFF</CMAKE_EXPORT_COMPILE_COMMANDS>
-        <CUDA_STATIC_RUNTIME>OFF</CUDA_STATIC_RUNTIME>
         <CUDF_USE_PER_THREAD_DEFAULT_STREAM>OFF</CUDF_USE_PER_THREAD_DEFAULT_STREAM>
         <USE_GDS>OFF</USE_GDS>
         <CMAKE_CUDA_ARCHITECTURES>RAPIDS</CMAKE_CUDA_ARCHITECTURES>
@@ -484,7 +483,6 @@
                                     <env key="CUDF_CPP_BUILD_DIR" value="${CUDF_CPP_BUILD_DIR}"/>
                                     <arg value="${basedir}/src/main/native"/>
                                     <arg line="${cmake.ccache.opts}"/>
-                                    <arg value="-DCUDA_STATIC_RUNTIME=${CUDA_STATIC_RUNTIME}" />
                                     <arg value="-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM}" />
                                     <arg value="-DUSE_GDS=${USE_GDS}" />
                                     <arg value="-DCMAKE_CXX_FLAGS=${cxx.flags}"/>
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 1e7df3802b9..208bc4035c9 100644
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -29,7 +29,6 @@ option(USE_NVTX "Build with NVTX support" ON)
 option(BUILD_SHARED_LIBS "Build cuDF JNI shared libraries" ON)
 option(BUILD_TESTS "Configure CMake to build tests" ON)
 option(CUDF_USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF)
-option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
 option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF)
 option(CUDF_JNI_LIBCUDF_STATIC "Link with libcudf.a" OFF)
 option(CUDF_JNI_ENABLE_PROFILING "Build with profiling support" ON)
@@ -41,7 +40,6 @@ message(VERBOSE "CUDF_JNI: Configure CMake to build tests: ${BUILD_TESTS}")
 message(VERBOSE
         "CUDF_JNI: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}"
 )
-message(VERBOSE "CUDF_JNI: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
 message(VERBOSE "CUDF_JNI: Build with GPUDirect Storage support: ${USE_GDS}")
 message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}")
 
@@ -279,13 +277,7 @@ target_link_libraries(
 #   cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic
 #   linking
 
-if(CUDA_STATIC_RUNTIME)
-  # Tell CMake what CUDA language runtime to use
-  set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Static)
-else()
-  # Tell CMake what CUDA language runtime to use
-  set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Shared)
-endif()
+set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Static)
 
 # ##################################################################################################
 # * install shared libraries ----------------------------------------------------------------------
diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
index 6feea8e8ba6..7f5176048ad 100644
--- a/python/libcudf/CMakeLists.txt
+++ b/python/libcudf/CMakeLists.txt
@@ -1,6 +1,6 @@
 # =============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 # =============================================================================
@@ -63,8 +63,6 @@ if(NOT USE_NVCOMP_RUNTIME_WHEEL)
   endif()
 endif()
 
-set(CUDA_STATIC_RUNTIME ON)
-
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
 
 add_subdirectory(../../cpp cudf-cpp)

From 41be3965f343dc82df3fb49cb39fe9b9ac27f232 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 6 May 2026 17:33:21 -0500
Subject: [PATCH 04/12] Fix `to_array` to return non-corrupted data (#22342)

Fixes #22136

This PR gueared the homogeneous numeric `DataFrame.to_cupy` fast path  so it only uses `table_to_array` when `dtype` is `None` or exactly matches the source column `dtype`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - https://github.com/apps/pre-commit-ci

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/22342
---
 python/cudf/cudf/core/frame.py                     |  1 +
 .../cudf/tests/dataframe/methods/test_to_cupy.py   | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index fc634cadf43..05bd1be095f 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -826,6 +826,7 @@ def to_cupy(
             self._num_columns > 1
             and na_value is None
             and self._columns[0].dtype.kind in {"i", "u", "f", "b"}
+            and (dtype is None or dtype == self._columns[0].dtype)
             and all(
                 not col.nullable and col.dtype == self._columns[0].dtype
                 for col in self._columns
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
index 44ee7a4278d..3eb69e0e928 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import cupy as cp
@@ -64,6 +64,18 @@ def test_dataframe_to_cupy():
         np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])
 
 
+@pytest.mark.parametrize("in_dtype", ["int32", "int64", "float32", "float64"])
+@pytest.mark.parametrize("out_dtype", ["int32", "int64", "float32", "float64"])
+def test_dataframe_to_cupy_dtype(in_dtype, out_dtype):
+    data = np.arange(12, dtype=in_dtype).reshape(3, 4)
+    df = cudf.DataFrame(data)
+
+    result = df.to_cupy(dtype=out_dtype)
+
+    assert result.dtype == np.dtype(out_dtype)
+    np.testing.assert_allclose(result.get(), data.astype(out_dtype))
+
+
 @pytest.mark.parametrize("has_nulls", [False, True])
 @pytest.mark.parametrize("use_na_value", [False, True])
 def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):

From 05ab76205f33e8e45ed88e9bfc15cb9d20cbb923 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 May 2026 16:21:33 -0700
Subject: [PATCH 05/12] Use cudaStream_t instead of cuda_stream_view in
 pylibcudf Cython (#22368)

Contributes to https://github.com/rapidsai/rmm/issues/2359

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/22368
---
 .../cudf_polars/utils/cuda_stream.py          |   5 +-
 python/pylibcudf/pylibcudf/binaryop.pxd       |   5 +-
 python/pylibcudf/pylibcudf/binaryop.pyi       |   6 +-
 python/pylibcudf/pylibcudf/binaryop.pyx       |  16 +-
 python/pylibcudf/pylibcudf/column.pxd         |  19 +-
 python/pylibcudf/pylibcudf/column.pyi         |  32 ++-
 python/pylibcudf/pylibcudf/column.pyx         | 118 ++++----
 .../pylibcudf/pylibcudf/column_factories.pxd  |  15 +-
 .../pylibcudf/pylibcudf/column_factories.pyi  |  16 +-
 .../pylibcudf/pylibcudf/column_factories.pyx  |  58 ++--
 python/pylibcudf/pylibcudf/concatenate.pxd    |   5 +-
 python/pylibcudf/pylibcudf/concatenate.pyi    |   6 +-
 python/pylibcudf/pylibcudf/concatenate.pyx    |  16 +-
 .../pylibcudf/pylibcudf/contiguous_split.pxd  |   8 +-
 .../pylibcudf/pylibcudf/contiguous_split.pyi  |  14 +-
 .../pylibcudf/pylibcudf/contiguous_split.pyx  |  46 ++--
 python/pylibcudf/pylibcudf/copying.pxd        |  27 +-
 python/pylibcudf/pylibcudf/copying.pyi        |  32 ++-
 python/pylibcudf/pylibcudf/copying.pyx        | 116 ++++----
 python/pylibcudf/pylibcudf/datetime.pxd       |  23 +-
 python/pylibcudf/pylibcudf/datetime.pyi       |  24 +-
 python/pylibcudf/pylibcudf/datetime.pyx       |  93 ++++---
 .../pylibcudf/experimental/_join_streams.pxd  |   5 +-
 .../pylibcudf/experimental/_join_streams.pyi  |   8 +-
 .../pylibcudf/experimental/_join_streams.pyx  |  20 +-
 python/pylibcudf/pylibcudf/filling.pxd        |  13 +-
 python/pylibcudf/pylibcudf/filling.pyi        |  17 +-
 python/pylibcudf/pylibcudf/filling.pyx        |  48 ++--
 python/pylibcudf/pylibcudf/groupby.pxd        |  15 +-
 python/pylibcudf/pylibcudf/groupby.pyi        |  14 +-
 python/pylibcudf/pylibcudf/groupby.pyx        |  68 +++--
 python/pylibcudf/pylibcudf/hashing.pxd        |  23 +-
 python/pylibcudf/pylibcudf/hashing.pyi        |  24 +-
 python/pylibcudf/pylibcudf/hashing.pyx        |  93 ++++---
 python/pylibcudf/pylibcudf/interop.pxd        |   7 +-
 python/pylibcudf/pylibcudf/interop.pyi        |  12 +-
 python/pylibcudf/pylibcudf/interop.pyx        |  17 +-
 python/pylibcudf/pylibcudf/io/avro.pxd        |   5 +-
 python/pylibcudf/pylibcudf/io/avro.pyi        |   6 +-
 python/pylibcudf/pylibcudf/io/avro.pyx        |   8 +-
 python/pylibcudf/pylibcudf/io/csv.pxd         |   7 +-
 python/pylibcudf/pylibcudf/io/csv.pyi         |   8 +-
 python/pylibcudf/pylibcudf/io/csv.pyx         |  13 +-
 .../pylibcudf/io/experimental/hybrid_scan.pxd |   2 +-
 .../pylibcudf/io/experimental/hybrid_scan.pyi |  20 +-
 .../pylibcudf/io/experimental/hybrid_scan.pyx |  66 ++---
 python/pylibcudf/pylibcudf/io/json.pxd        |  11 +-
 python/pylibcudf/pylibcudf/io/json.pyi        |  10 +-
 python/pylibcudf/pylibcudf/io/json.pyx        |  31 ++-
 python/pylibcudf/pylibcudf/io/orc.pxd         |   9 +-
 python/pylibcudf/pylibcudf/io/orc.pyi         |  10 +-
 python/pylibcudf/pylibcudf/io/orc.pyx         |  24 +-
 python/pylibcudf/pylibcudf/io/parquet.pxd     |   8 +-
 python/pylibcudf/pylibcudf/io/parquet.pyi     |  11 +-
 python/pylibcudf/pylibcudf/io/parquet.pyx     |  26 +-
 python/pylibcudf/pylibcudf/io/text.pxd        |   5 +-
 python/pylibcudf/pylibcudf/io/text.pyi        |   6 +-
 python/pylibcudf/pylibcudf/io/text.pyx        |  14 +-
 python/pylibcudf/pylibcudf/io/timezone.pxd    |   6 +-
 python/pylibcudf/pylibcudf/io/timezone.pyi    |   6 +-
 python/pylibcudf/pylibcudf/io/timezone.pyx    |  14 +-
 python/pylibcudf/pylibcudf/io/types.pxd       |   5 +-
 python/pylibcudf/pylibcudf/io/types.pyx       |   3 +-
 python/pylibcudf/pylibcudf/join.pxd           |  35 ++-
 python/pylibcudf/pylibcudf/join.pyi           |  40 +--
 python/pylibcudf/pylibcudf/join.pyx           | 211 ++++++++------
 python/pylibcudf/pylibcudf/json.pxd           |   5 +-
 python/pylibcudf/pylibcudf/json.pyi           |   6 +-
 python/pylibcudf/pylibcudf/json.pyx           |  12 +-
 python/pylibcudf/pylibcudf/labeling.pxd       |   5 +-
 python/pylibcudf/pylibcudf/labeling.pyi       |   6 +-
 python/pylibcudf/pylibcudf/labeling.pyx       |  12 +-
 .../pylibcudf/pylibcudf/libcudf/binaryop.pxd  |  10 +-
 .../pylibcudf/libcudf/column/column.pxd       |   6 +-
 .../libcudf/column/column_factories.pxd       |  28 +-
 .../pylibcudf/libcudf/concatenate.pxd         |   6 +-
 .../pylibcudf/libcudf/contiguous_split.pxd    |   8 +-
 .../pylibcudf/pylibcudf/libcudf/copying.pxd   |  40 +--
 .../pylibcudf/pylibcudf/libcudf/datetime.pxd  |  24 +-
 .../libcudf/detail/utilities/stream_pool.pxd  |  29 +-
 .../pylibcudf/libcudf/distinct_count.pxd      |   6 +-
 .../pylibcudf/pylibcudf/libcudf/filling.pxd   |  14 +-
 .../pylibcudf/pylibcudf/libcudf/groupby.pxd   |  12 +-
 python/pylibcudf/pylibcudf/libcudf/hash.pxd   |  22 +-
 .../pylibcudf/pylibcudf/libcudf/interop.pxd   |  24 +-
 .../pylibcudf/pylibcudf/libcudf/io/avro.pxd   |   4 +-
 python/pylibcudf/pylibcudf/libcudf/io/csv.pxd |   6 +-
 .../pylibcudf/libcudf/io/hybrid_scan.pxd      |  20 +-
 .../pylibcudf/pylibcudf/libcudf/io/json.pxd   |   6 +-
 python/pylibcudf/pylibcudf/libcudf/io/orc.pxd |   8 +-
 .../pylibcudf/libcudf/io/orc_metadata.pxd     |   6 +-
 .../pylibcudf/libcudf/io/parquet.pxd          |  12 +-
 .../pylibcudf/pylibcudf/libcudf/io/text.pxd   |   4 +-
 .../pylibcudf/libcudf/io/timezone.pxd         |   4 +-
 python/pylibcudf/pylibcudf/libcudf/join.pxd   |  52 ++--
 python/pylibcudf/pylibcudf/libcudf/json.pxd   |   4 +-
 .../pylibcudf/pylibcudf/libcudf/labeling.pxd  |   4 +-
 .../pylibcudf/libcudf/lists/combine.pxd       |   8 +-
 .../pylibcudf/libcudf/lists/contains.pxd      |  12 +-
 .../libcudf/lists/count_elements.pxd          |   4 +-
 .../pylibcudf/libcudf/lists/explode.pxd       |   4 +-
 .../pylibcudf/libcudf/lists/extract.pxd       |   6 +-
 .../pylibcudf/libcudf/lists/filling.pxd       |   6 +-
 .../pylibcudf/libcudf/lists/gather.pxd        |   4 +-
 .../libcudf/lists/lists_column_view.pxd       |   4 +-
 .../pylibcudf/libcudf/lists/reverse.pxd       |   4 +-
 .../libcudf/lists/set_operations.pxd          |  10 +-
 .../pylibcudf/libcudf/lists/sorting.pxd       |   6 +-
 .../libcudf/lists/stream_compaction.pxd       |   6 +-
 python/pylibcudf/pylibcudf/libcudf/merge.pxd  |   4 +-
 .../pylibcudf/pylibcudf/libcudf/null_mask.pxd |  16 +-
 .../libcudf/nvtext/byte_pair_encode.pxd       |   6 +-
 .../pylibcudf/libcudf/nvtext/deduplicate.pxd  |   8 +-
 .../libcudf/nvtext/edit_distance.pxd          |   6 +-
 .../libcudf/nvtext/generate_ngrams.pxd        |   8 +-
 .../pylibcudf/libcudf/nvtext/jaccard.pxd      |   4 +-
 .../pylibcudf/libcudf/nvtext/minhash.pxd      |  10 +-
 .../libcudf/nvtext/ngrams_tokenize.pxd        |   4 +-
 .../pylibcudf/libcudf/nvtext/normalize.pxd    |   8 +-
 .../pylibcudf/libcudf/nvtext/replace.pxd      |   6 +-
 .../pylibcudf/libcudf/nvtext/stemmer.pxd      |   8 +-
 .../pylibcudf/libcudf/nvtext/tokenize.pxd     |  18 +-
 .../libcudf/nvtext/wordpiece_tokenize.pxd     |   6 +-
 .../pylibcudf/libcudf/partitioning.pxd        |  10 +-
 .../pylibcudf/pylibcudf/libcudf/quantiles.pxd |   6 +-
 python/pylibcudf/pylibcudf/libcudf/reduce.pxd |   8 +-
 .../pylibcudf/pylibcudf/libcudf/replace.pxd   |  18 +-
 .../pylibcudf/pylibcudf/libcudf/reshape.pxd   |   8 +-
 .../pylibcudf/pylibcudf/libcudf/rolling.pxd   |  10 +-
 python/pylibcudf/pylibcudf/libcudf/round.pxd  |   6 +-
 .../pylibcudf/libcudf/scalar/scalar.pxd       |  18 +-
 .../libcudf/scalar/scalar_factories.pxd       |  18 +-
 python/pylibcudf/pylibcudf/libcudf/search.pxd |   8 +-
 .../pylibcudf/pylibcudf/libcudf/sorting.pxd   |  26 +-
 .../pylibcudf/libcudf/stream_compaction.pxd   |  18 +-
 .../pylibcudf/libcudf/strings/attributes.pxd  |   8 +-
 .../pylibcudf/libcudf/strings/capitalize.pxd  |   8 +-
 .../pylibcudf/libcudf/strings/case.pxd        |   8 +-
 .../pylibcudf/libcudf/strings/char_types.pxd  |   6 +-
 .../pylibcudf/libcudf/strings/combine.pxd     |  12 +-
 .../pylibcudf/libcudf/strings/contains.pxd    |  12 +-
 .../strings/convert/convert_booleans.pxd      |   6 +-
 .../strings/convert/convert_datetime.pxd      |   8 +-
 .../strings/convert/convert_durations.pxd     |   6 +-
 .../strings/convert/convert_fixed_point.pxd   |   8 +-
 .../strings/convert/convert_floats.pxd        |   8 +-
 .../strings/convert/convert_integers.pxd      |  16 +-
 .../libcudf/strings/convert/convert_ipv4.pxd  |   8 +-
 .../libcudf/strings/convert/convert_lists.pxd |   4 +-
 .../libcudf/strings/convert/convert_urls.pxd  |   6 +-
 .../pylibcudf/libcudf/strings/extract.pxd     |   8 +-
 .../pylibcudf/libcudf/strings/find.pxd        |  20 +-
 .../libcudf/strings/find_multiple.pxd         |   6 +-
 .../pylibcudf/libcudf/strings/findall.pxd     |   6 +-
 .../pylibcudf/libcudf/strings/padding.pxd     |   8 +-
 .../pylibcudf/libcudf/strings/repeat.pxd      |   6 +-
 .../pylibcudf/libcudf/strings/replace.pxd     |   8 +-
 .../pylibcudf/libcudf/strings/replace_re.pxd  |   8 +-
 .../pylibcudf/libcudf/strings/reverse.pxd     |   4 +-
 .../libcudf/strings/split/partition.pxd       |   6 +-
 .../pylibcudf/libcudf/strings/split/split.pxd |  20 +-
 .../libcudf/strings/strings_column_view.pxd   |   6 +-
 .../pylibcudf/libcudf/strings/strip.pxd       |   4 +-
 .../pylibcudf/libcudf/strings/substring.pxd   |   6 +-
 .../pylibcudf/libcudf/strings/translate.pxd   |   6 +-
 .../pylibcudf/libcudf/strings/wrap.pxd        |   4 +-
 .../libcudf/structs/structs_column_view.pxd   |   4 +-
 .../pylibcudf/libcudf/table/table.pxd         |   6 +-
 .../pylibcudf/pylibcudf/libcudf/transform.pxd |  20 +-
 .../pylibcudf/pylibcudf/libcudf/transpose.pxd |   4 +-
 python/pylibcudf/pylibcudf/libcudf/unary.pxd  |  14 +-
 .../pylibcudf/libcudf/unique_count.pxd        |   6 +-
 .../libcudf/utilities/default_stream.pxd      |   7 +-
 python/pylibcudf/pylibcudf/lists.pxd          |  39 ++-
 python/pylibcudf/pylibcudf/lists.pyi          |  40 +--
 python/pylibcudf/pylibcudf/lists.pyx          | 167 ++++++-----
 python/pylibcudf/pylibcudf/merge.pxd          |   5 +-
 python/pylibcudf/pylibcudf/merge.pyi          |   6 +-
 python/pylibcudf/pylibcudf/merge.pyx          |  12 +-
 python/pylibcudf/pylibcudf/null_mask.pxd      |  17 +-
 python/pylibcudf/pylibcudf/null_mask.pyi      |  16 +-
 python/pylibcudf/pylibcudf/null_mask.pyx      |  64 +++--
 .../pylibcudf/nvtext/byte_pair_encode.pxd     |   5 +-
 .../pylibcudf/nvtext/byte_pair_encode.pyi     |   8 +-
 .../pylibcudf/nvtext/byte_pair_encode.pyx     |  23 +-
 .../pylibcudf/nvtext/deduplicate.pxd          |   9 +-
 .../pylibcudf/nvtext/deduplicate.pyi          |  10 +-
 .../pylibcudf/nvtext/deduplicate.pyx          |  34 +--
 .../pylibcudf/nvtext/edit_distance.pxd        |   7 +-
 .../pylibcudf/nvtext/edit_distance.pyi        |   8 +-
 .../pylibcudf/nvtext/edit_distance.pyx        |  19 +-
 .../pylibcudf/nvtext/generate_ngrams.pxd      |   9 +-
 .../pylibcudf/nvtext/generate_ngrams.pyi      |  10 +-
 .../pylibcudf/nvtext/generate_ngrams.pyx      |  30 +-
 python/pylibcudf/pylibcudf/nvtext/jaccard.pxd |   5 +-
 python/pylibcudf/pylibcudf/nvtext/jaccard.pyi |   6 +-
 python/pylibcudf/pylibcudf/nvtext/jaccard.pyx |  12 +-
 python/pylibcudf/pylibcudf/nvtext/minhash.pxd |  11 +-
 python/pylibcudf/pylibcudf/nvtext/minhash.pyi |  12 +-
 python/pylibcudf/pylibcudf/nvtext/minhash.pyx |  39 +--
 .../pylibcudf/nvtext/ngrams_tokenize.pxd      |   5 +-
 .../pylibcudf/nvtext/ngrams_tokenize.pyi      |   6 +-
 .../pylibcudf/nvtext/ngrams_tokenize.pyx      |  12 +-
 .../pylibcudf/pylibcudf/nvtext/normalize.pxd  |   9 +-
 .../pylibcudf/pylibcudf/nvtext/normalize.pyi  |  10 +-
 .../pylibcudf/pylibcudf/nvtext/normalize.pyx  |  28 +-
 python/pylibcudf/pylibcudf/nvtext/replace.pxd |   7 +-
 python/pylibcudf/pylibcudf/nvtext/replace.pyi |   8 +-
 python/pylibcudf/pylibcudf/nvtext/replace.pyx |  27 +-
 python/pylibcudf/pylibcudf/nvtext/stemmer.pxd |   7 +-
 python/pylibcudf/pylibcudf/nvtext/stemmer.pyi |   8 +-
 python/pylibcudf/pylibcudf/nvtext/stemmer.pyx |  21 +-
 .../pylibcudf/pylibcudf/nvtext/tokenize.pxd   |  17 +-
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyi   |  20 +-
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyx   |  79 +++---
 .../pylibcudf/nvtext/wordpiece_tokenize.pxd   |   5 +-
 .../pylibcudf/nvtext/wordpiece_tokenize.pyi   |   8 +-
 .../pylibcudf/nvtext/wordpiece_tokenize.pyx   |  19 +-
 python/pylibcudf/pylibcudf/partitioning.pxd   |   7 +-
 python/pylibcudf/pylibcudf/partitioning.pyi   |   8 +-
 python/pylibcudf/pylibcudf/partitioning.pyx   |  30 +-
 python/pylibcudf/pylibcudf/quantiles.pxd      |   7 +-
 python/pylibcudf/pylibcudf/quantiles.pyi      |   8 +-
 python/pylibcudf/pylibcudf/quantiles.pyx      |  21 +-
 python/pylibcudf/pylibcudf/reduce.pxd         |  11 +-
 python/pylibcudf/pylibcudf/reduce.pyi         |  12 +-
 python/pylibcudf/pylibcudf/reduce.pyx         |  36 +--
 python/pylibcudf/pylibcudf/replace.pxd        |  11 +-
 python/pylibcudf/pylibcudf/replace.pyi        |  12 +-
 python/pylibcudf/pylibcudf/replace.pyx        |  51 ++--
 python/pylibcudf/pylibcudf/reshape.pxd        |   9 +-
 python/pylibcudf/pylibcudf/reshape.pyi        |  10 +-
 python/pylibcudf/pylibcudf/reshape.pyx        |  28 +-
 python/pylibcudf/pylibcudf/rolling.pxd        |   9 +-
 python/pylibcudf/pylibcudf/rolling.pyi        |  10 +-
 python/pylibcudf/pylibcudf/rolling.pyx        |  34 ++-
 python/pylibcudf/pylibcudf/round.pxd          |   5 +-
 python/pylibcudf/pylibcudf/round.pyi          |   6 +-
 python/pylibcudf/pylibcudf/round.pyx          |  19 +-
 python/pylibcudf/pylibcudf/scalar.pxd         |   7 +-
 python/pylibcudf/pylibcudf/scalar.pyi         |  19 +-
 python/pylibcudf/pylibcudf/scalar.pyx         | 259 +++++++++++-------
 python/pylibcudf/pylibcudf/search.pxd         |   9 +-
 python/pylibcudf/pylibcudf/search.pyi         |  10 +-
 python/pylibcudf/pylibcudf/search.pyx         |  30 +-
 python/pylibcudf/pylibcudf/sorting.pxd        |  27 +-
 python/pylibcudf/pylibcudf/sorting.pyi        |  28 +-
 python/pylibcudf/pylibcudf/sorting.pyx        | 109 ++++----
 .../pylibcudf/pylibcudf/stream_compaction.pxd |  17 +-
 .../pylibcudf/pylibcudf/stream_compaction.pyi |  18 +-
 .../pylibcudf/pylibcudf/stream_compaction.pyx |  73 ++---
 .../pylibcudf/strings/attributes.pxd          |   9 +-
 .../pylibcudf/strings/attributes.pyi          |  10 +-
 .../pylibcudf/strings/attributes.pyx          |  30 +-
 .../pylibcudf/strings/capitalize.pxd          |   9 +-
 .../pylibcudf/strings/capitalize.pyi          |  10 +-
 .../pylibcudf/strings/capitalize.pyx          |  32 ++-
 python/pylibcudf/pylibcudf/strings/case.pxd   |   9 +-
 python/pylibcudf/pylibcudf/strings/case.pyi   |  10 +-
 python/pylibcudf/pylibcudf/strings/case.pyx   |  30 +-
 .../pylibcudf/strings/char_types.pxd          |   7 +-
 .../pylibcudf/strings/char_types.pyi          |   8 +-
 .../pylibcudf/strings/char_types.pyx          |  21 +-
 .../pylibcudf/pylibcudf/strings/combine.pxd   |   9 +-
 .../pylibcudf/pylibcudf/strings/combine.pyi   |  10 +-
 .../pylibcudf/pylibcudf/strings/combine.pyx   |  38 +--
 .../pylibcudf/pylibcudf/strings/contains.pxd  |  11 +-
 .../pylibcudf/pylibcudf/strings/contains.pyi  |  12 +-
 .../pylibcudf/pylibcudf/strings/contains.pyx  |  43 +--
 .../strings/convert/convert_booleans.pxd      |   7 +-
 .../strings/convert/convert_booleans.pyi      |   8 +-
 .../strings/convert/convert_booleans.pyx      |  21 +-
 .../strings/convert/convert_datetime.pxd      |   9 +-
 .../strings/convert/convert_datetime.pyi      |  10 +-
 .../strings/convert/convert_datetime.pyx      |  30 +-
 .../strings/convert/convert_durations.pxd     |   7 +-
 .../strings/convert/convert_durations.pyi     |   8 +-
 .../strings/convert/convert_durations.pyx     |  21 +-
 .../strings/convert/convert_fixed_point.pxd   |   9 +-
 .../strings/convert/convert_fixed_point.pyi   |  10 +-
 .../strings/convert/convert_fixed_point.pyx   |  30 +-
 .../strings/convert/convert_floats.pxd        |   9 +-
 .../strings/convert/convert_floats.pyi        |  10 +-
 .../strings/convert/convert_floats.pyx        |  32 ++-
 .../strings/convert/convert_integers.pxd      |  15 +-
 .../strings/convert/convert_integers.pyi      |  16 +-
 .../strings/convert/convert_integers.pyx      |  59 ++--
 .../strings/convert/convert_ipv4.pxd          |   9 +-
 .../strings/convert/convert_ipv4.pyi          |  10 +-
 .../strings/convert/convert_ipv4.pyx          |  30 +-
 .../strings/convert/convert_lists.pxd         |   5 +-
 .../strings/convert/convert_lists.pyi         |   6 +-
 .../strings/convert/convert_lists.pyx         |  14 +-
 .../strings/convert/convert_urls.pxd          |   7 +-
 .../strings/convert/convert_urls.pyi          |   8 +-
 .../strings/convert/convert_urls.pyx          |  25 +-
 .../pylibcudf/pylibcudf/strings/extract.pxd   |   9 +-
 .../pylibcudf/pylibcudf/strings/extract.pyi   |  10 +-
 .../pylibcudf/pylibcudf/strings/extract.pyx   |  30 +-
 python/pylibcudf/pylibcudf/strings/find.pxd   |  13 +-
 python/pylibcudf/pylibcudf/strings/find.pyi   |  14 +-
 python/pylibcudf/pylibcudf/strings/find.pyx   |  56 ++--
 .../pylibcudf/strings/find_multiple.pxd       |   7 +-
 .../pylibcudf/strings/find_multiple.pyi       |   8 +-
 .../pylibcudf/strings/find_multiple.pyx       |  21 +-
 .../pylibcudf/pylibcudf/strings/findall.pxd   |   7 +-
 .../pylibcudf/pylibcudf/strings/findall.pyi   |   8 +-
 .../pylibcudf/pylibcudf/strings/findall.pyx   |  21 +-
 .../pylibcudf/pylibcudf/strings/padding.pxd   |   9 +-
 .../pylibcudf/pylibcudf/strings/padding.pyi   |  10 +-
 .../pylibcudf/pylibcudf/strings/padding.pyx   |  30 +-
 python/pylibcudf/pylibcudf/strings/repeat.pxd |   5 +-
 python/pylibcudf/pylibcudf/strings/repeat.pyi |   6 +-
 python/pylibcudf/pylibcudf/strings/repeat.pyx |  14 +-
 .../pylibcudf/pylibcudf/strings/replace.pxd   |   9 +-
 .../pylibcudf/pylibcudf/strings/replace.pyi   |  10 +-
 .../pylibcudf/pylibcudf/strings/replace.pyx   |  32 ++-
 .../pylibcudf/strings/replace_re.pxd          |   7 +-
 .../pylibcudf/strings/replace_re.pyi          |  10 +-
 .../pylibcudf/strings/replace_re.pyx          |  27 +-
 .../pylibcudf/pylibcudf/strings/reverse.pyi   |   6 +-
 .../pylibcudf/pylibcudf/strings/reverse.pyx   |  12 +-
 python/pylibcudf/pylibcudf/strings/slice.pxd  |   5 +-
 python/pylibcudf/pylibcudf/strings/slice.pyi  |   6 +-
 python/pylibcudf/pylibcudf/strings/slice.pyx  |  20 +-
 .../pylibcudf/strings/split/partition.pxd     |   7 +-
 .../pylibcudf/strings/split/partition.pyi     |   8 +-
 .../pylibcudf/strings/split/partition.pyx     |  25 +-
 .../pylibcudf/strings/split/split.pxd         |  19 +-
 .../pylibcudf/strings/split/split.pyi         |  20 +-
 .../pylibcudf/strings/split/split.pyx         |  82 +++---
 python/pylibcudf/pylibcudf/strings/strip.pxd  |   5 +-
 python/pylibcudf/pylibcudf/strings/strip.pyi  |   6 +-
 python/pylibcudf/pylibcudf/strings/strip.pyx  |  14 +-
 .../pylibcudf/pylibcudf/strings/translate.pxd |   7 +-
 .../pylibcudf/pylibcudf/strings/translate.pyi |   8 +-
 .../pylibcudf/pylibcudf/strings/translate.pyx |  21 +-
 python/pylibcudf/pylibcudf/strings/wrap.pxd   |   5 +-
 python/pylibcudf/pylibcudf/strings/wrap.pyi   |   6 +-
 python/pylibcudf/pylibcudf/strings/wrap.pyx   |  12 +-
 python/pylibcudf/pylibcudf/table.pxd          |   7 +-
 python/pylibcudf/pylibcudf/table.pyi          |  10 +-
 python/pylibcudf/pylibcudf/table.pyx          |  29 +-
 python/pylibcudf/pylibcudf/transform.pxd      |  19 +-
 python/pylibcudf/pylibcudf/transform.pyi      |  20 +-
 python/pylibcudf/pylibcudf/transform.pyx      |  92 ++++---
 python/pylibcudf/pylibcudf/transpose.pxd      |   5 +-
 python/pylibcudf/pylibcudf/transpose.pyi      |   6 +-
 python/pylibcudf/pylibcudf/transpose.pyx      |  12 +-
 python/pylibcudf/pylibcudf/unary.pxd          |  15 +-
 python/pylibcudf/pylibcudf/unary.pyi          |  16 +-
 python/pylibcudf/pylibcudf/unary.pyx          |  59 ++--
 python/pylibcudf/pylibcudf/utils.pxd          |   6 +-
 python/pylibcudf/pylibcudf/utils.pyi          |   9 +-
 python/pylibcudf/pylibcudf/utils.pyx          |   6 +-
 python/pylibcudf/tests/test_experimental.py   |  23 +-
 .../pylibcudf/tests/test_stream_protocol.py   |  74 +++++
 357 files changed, 3470 insertions(+), 2967 deletions(-)
 create mode 100644 python/pylibcudf/tests/test_stream_protocol.py

diff --git a/python/cudf_polars/cudf_polars/utils/cuda_stream.py b/python/cudf_polars/cudf_polars/utils/cuda_stream.py
index a42252157b4..c0708d3bea8 100644
--- a/python/cudf_polars/cudf_polars/utils/cuda_stream.py
+++ b/python/cudf_polars/cudf_polars/utils/cuda_stream.py
@@ -13,6 +13,7 @@
 if TYPE_CHECKING:
     from collections.abc import Callable, Sequence
 
+    from pylibcudf.utils import CudaStreamLike
     from rmm.pylibrmm.stream import Stream
 
 
@@ -27,7 +28,7 @@ def get_cuda_stream() -> Stream:
 
 
 def join_cuda_streams(
-    *, downstreams: Sequence[Stream], upstreams: Sequence[Stream]
+    *, downstreams: Sequence[CudaStreamLike], upstreams: Sequence[CudaStreamLike]
 ) -> None:
     """
     Join multiple CUDA streams.
@@ -46,7 +47,7 @@ def join_cuda_streams(
 
 
 def get_joined_cuda_stream(
-    get_cuda_stream: Callable[[], Stream], *, upstreams: Sequence[Stream]
+    get_cuda_stream: Callable[[], Stream], *, upstreams: Sequence[CudaStreamLike]
 ) -> Stream:
     """
     Return a CUDA stream that is joined to the given streams.
diff --git a/python/pylibcudf/pylibcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/binaryop.pxd
index 29c9f3d98ea..a34a02b2191 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pxd
+++ b/python/pylibcudf/pylibcudf/binaryop.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
 from pylibcudf.libcudf.binaryop cimport binary_operator
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -25,7 +24,7 @@ cpdef Column binary_operation(
     RightBinaryOperand rhs,
     binary_operator op,
     DataType output_type,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi
index 52263440db3..1f3c9a2cb64 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pyi
+++ b/python/pylibcudf/pylibcudf/binaryop.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 class BinaryOperator(IntEnum):
     ADD = ...
@@ -52,7 +52,7 @@ def binary_operation(
     rhs: Column | Scalar,
     op: BinaryOperator,
     output_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_supported_operation(
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx
index a46b6aaaa81..20a69d60727 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pyx
+++ b/python/pylibcudf/pylibcudf/binaryop.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator import dereference
@@ -20,6 +20,7 @@ from .column cimport Column
 from .scalar cimport Scalar
 from .types cimport DataType
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["BinaryOperator", "binary_operation", "is_supported_operation"]
 
@@ -28,7 +29,7 @@ cpdef Column binary_operation(
     RightBinaryOperand rhs,
     binary_operator op,
     DataType output_type,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a binary operation between a column and another column or scalar.
@@ -61,7 +62,8 @@ cpdef Column binary_operation(
         The result of the binary operation
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if LeftBinaryOperand is Column and RightBinaryOperand is Column:
@@ -71,7 +73,7 @@ cpdef Column binary_operation(
                 rhs.view(),
                 op,
                 output_type.c_obj,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif LeftBinaryOperand is Column and RightBinaryOperand is Scalar:
@@ -81,7 +83,7 @@ cpdef Column binary_operation(
                 dereference(rhs.c_obj),
                 op,
                 output_type.c_obj,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif LeftBinaryOperand is Scalar and RightBinaryOperand is Column:
@@ -91,13 +93,13 @@ cpdef Column binary_operation(
                 rhs.view(),
                 op,
                 output_type.c_obj,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
         raise ValueError(f"Invalid arguments {lhs} and {rhs}")
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef bool is_supported_operation(
diff --git a/python/pylibcudf/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd
index 7348d68f6de..429f85f39b0 100644
--- a/python/pylibcudf/pylibcudf/column.pxd
+++ b/python/pylibcudf/pylibcudf/column.pxd
@@ -6,7 +6,6 @@ from libcpp.vector cimport vector
 from libc.stdint cimport uint64_t
 
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport (
@@ -27,7 +26,7 @@ cdef class OwnerWithCAI:
     cdef dict cai
 
     @staticmethod
-    cdef create(column_view cv, object owner, Stream stream)
+    cdef create(column_view cv, object owner, object stream)
 
 
 cdef class OwnerMaskWithCAI:
@@ -38,7 +37,7 @@ cdef class OwnerMaskWithCAI:
     cdef create(column_view cv, object owner)
 
 
-cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=*)
+cdef gpumemoryview _copy_array_to_device(object buf, object stream=*)
 
 
 cdef class Column:
@@ -61,7 +60,7 @@ cdef class Column:
     @staticmethod
     cdef Column from_libcudf(
         unique_ptr[column] libcudf_col,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr
     )
 
@@ -72,7 +71,7 @@ cdef class Column:
     cdef Column from_column_view_of_arbitrary(
         const column_view& cv,
         object owner,
-        Stream stream,
+        object stream,
     )
 
     @staticmethod
@@ -81,10 +80,10 @@ cdef class Column:
         tuple shape,
         DataType dtype,
         Column base=*,
-        Stream stream=*,
+        object stream=*,
     )
 
-    cpdef Scalar to_scalar(self, Stream stream=*, DeviceMemoryResource mr=*)
+    cpdef Scalar to_scalar(self, object stream=*, DeviceMemoryResource mr=*)
     cpdef DataType type(self)
     cpdef Column child(self, size_type index)
     cpdef size_type num_children(self)
@@ -95,7 +94,7 @@ cdef class Column:
     cpdef object data(self)
     cpdef object null_mask(self)
     cpdef list children(self)
-    cpdef Column copy(self, Stream stream=*, DeviceMemoryResource mr=*)
+    cpdef Column copy(self, object stream=*, DeviceMemoryResource mr=*)
     cpdef uint64_t device_buffer_size(self)
     cpdef Column with_mask(self, object, size_type, bint validate=*)
 
@@ -108,10 +107,10 @@ cdef class ListsColumnView:
     cpdef child(self)
     cpdef offsets(self)
     cdef lists_column_view view(self) nogil
-    cpdef Column get_sliced_child(self, Stream stream=*)
+    cpdef Column get_sliced_child(self, object stream=*)
 
 
 cdef class StructsColumnView:
     cdef Column _column
     cdef structs_column_view view(self) nogil
-    cpdef Column get_sliced_child(self, int index, Stream stream=*)
+    cpdef Column get_sliced_child(self, int index, object stream=*)
diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi
index 3ac4641ac13..3ff7f53f356 100644
--- a/python/pylibcudf/pylibcudf/column.pyi
+++ b/python/pylibcudf/pylibcudf/column.pyi
@@ -6,12 +6,12 @@ from typing import Any, Protocol, TypedDict
 
 from rmm.pylibrmm.device_buffer import DeviceBuffer
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf._interop_helpers import ArrowLike, ColumnMetadata
 from pylibcudf.scalar import Scalar
 from pylibcudf.span import Span
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 class ArrayInterfaceBase(TypedDict):
     shape: tuple[int, ...]
@@ -64,7 +64,7 @@ class Column:
     def num_children(self) -> int: ...
     def copy(
         self,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Column: ...
     def device_buffer_size(self) -> int: ...
@@ -77,19 +77,19 @@ class Column:
     def from_scalar(
         scalar: Scalar,
         size: int,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Column: ...
     def to_scalar(
         self,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Scalar: ...
     @staticmethod
     def all_null_like(
         like: Column,
         size: int,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Column: ...
     @staticmethod
@@ -99,32 +99,34 @@ class Column:
     def to_arrow(
         self,
         metadata: ColumnMetadata | str | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> ArrowLike: ...
     # Private methods below are included because polars is currently using them,
     # but we want to remove stubs for these private methods eventually
     def _to_schema(self, metadata: Any = None) -> Any: ...
-    def _to_host_array(self, stream: Stream) -> Any: ...
+    def _to_host_array(self, stream: CudaStreamLike) -> Any: ...
     @staticmethod
     def from_arrow(
         obj: ArrowLike,
         dtype: DataType | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Column: ...
     @classmethod
     def from_cuda_array_interface(
-        cls, obj: SupportsCudaArrayInterface, stream: Stream | None = None
+        cls,
+        obj: SupportsCudaArrayInterface,
+        stream: CudaStreamLike | None = None,
     ) -> Column: ...
     @classmethod
     def from_array_interface(
-        cls, obj: SupportsArrayInterface, stream: Stream | None = None
+        cls, obj: SupportsArrayInterface, stream: CudaStreamLike | None = None
     ) -> Column: ...
     @classmethod
     def from_array(
         cls,
         obj: SupportsCudaArrayInterface | SupportsArrayInterface,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> Column: ...
     @staticmethod
     def struct_from_children(children: Sequence[Column]) -> Column: ...
@@ -132,21 +134,23 @@ class Column:
     def from_iterable_of_py(
         obj: Iterable,
         dtype: DataType | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> Column: ...
 
 class ListsColumnView:
     def __init__(self, column: Column): ...
     def child(self) -> Column: ...
     def offsets(self) -> Column: ...
-    def get_sliced_child(self, stream: Stream | None = None) -> Column: ...
+    def get_sliced_child(
+        self, stream: CudaStreamLike | None = None
+    ) -> Column: ...
 
 class StructsColumnView:
     def __init__(self, column: Column): ...
     def child(self) -> Column: ...
     def offsets(self) -> Column: ...
     def get_sliced_child(
-        self, index: int, stream: Stream | None = None
+        self, index: int, stream: CudaStreamLike | None = None
     ) -> Column: ...
 
 def is_c_contiguous(
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 96137f96256..fc8745dae26 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -67,6 +67,7 @@ from itertools import accumulate
 import functools
 import operator
 from typing import Iterable
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 try:
     import pyarrow as pa
@@ -96,7 +97,7 @@ cdef class _ArrowColumnHolder:
 cdef class OwnerWithCAI:
     """An interface for column view's data with gpumemoryview via CAI."""
     @staticmethod
-    cdef create(column_view cv, object owner, Stream stream):
+    cdef create(column_view cv, object owner, object stream):
         obj = OwnerWithCAI()
         obj.owner = owner
         # The default size of 0 will be applied for any type that stores data in the
@@ -108,7 +109,7 @@ cdef class OwnerWithCAI:
             # Cast to Python integers before multiplying to avoid overflow.
             size = int(cv.size()) * int(cpp_size_of(cv.type()))
         elif cv.type().id() == type_id.STRING:
-            size = strings_column_view(cv).chars_size(stream.view())
+            size = strings_column_view(cv).chars_size((<Stream>stream).view().value())
 
         obj.cai = {
             "shape": (size,),
@@ -156,7 +157,7 @@ class ArrayInterfaceWrapper:
         self.__array_interface__ = iface
 
 
-cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=None):
+cdef gpumemoryview _copy_array_to_device(object buf, object stream=None):
     """
     Copy a host-side array.array buffer to device memory.
 
@@ -175,11 +176,11 @@ cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=None):
     cdef memoryview mv = memoryview(buf)
     cdef uintptr_t ptr = <uintptr_t>mv.obj.buffer_info()[0]
     cdef size_t nbytes = len(mv) * mv.itemsize
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
 
     return gpumemoryview(DeviceBuffer.to_device(
         <const unsigned char[:nbytes:1]><const unsigned char*>ptr,
-        stream
+        _stream
     ))
 
 
@@ -401,7 +402,7 @@ cdef class Column:
     def from_arrow(
         obj: ArrowLike,
         dtype: DataType | None = None,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ) -> ArrowLike:
         """
@@ -453,7 +454,8 @@ cdef class Column:
         cdef _ArrowColumnHolder result
         cdef unique_ptr[arrow_column] c_result
 
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
 
         if hasattr(obj, "__arrow_c_device_array__"):
@@ -469,7 +471,7 @@ cdef class Column:
                 c_result = make_unique[arrow_column](
                     move(dereference(c_schema)),
                     move(dereference(c_device_array)),
-                    stream.view(),
+                    _cs,
                     result.mr.get_mr(),
                 )
             result.col.swap(c_result)
@@ -477,7 +479,7 @@ cdef class Column:
             return Column.from_column_view_of_arbitrary(
                 result.col.get().view(),
                 result,
-                stream,
+                _stream,
             )
         elif hasattr(obj, "__arrow_c_array__"):
             schema, h_array = obj.__arrow_c_array__()
@@ -490,7 +492,7 @@ cdef class Column:
                 c_result = make_unique[arrow_column](
                     move(dereference(c_schema)),
                     move(dereference(c_array)),
-                    stream.view(),
+                    _cs,
                     result.mr.get_mr(),
                 )
             result.col.swap(c_result)
@@ -498,7 +500,7 @@ cdef class Column:
             return Column.from_column_view_of_arbitrary(
                 result.col.get().view(),
                 result,
-                stream,
+                _stream,
             )
         elif hasattr(obj, "__arrow_c_stream__"):
             arrow_stream = obj.__arrow_c_stream__()
@@ -514,7 +516,7 @@ cdef class Column:
             with nogil:
                 c_result = make_unique[arrow_column](
                     move(dereference(c_arrow_stream)),
-                    stream.view(),
+                    _cs,
                     result.mr.get_mr(),
                 )
             result.col.swap(c_result)
@@ -522,7 +524,7 @@ cdef class Column:
             return Column.from_column_view_of_arbitrary(
                 result.col.get().view(),
                 result,
-                stream,
+                _stream,
             )
         elif hasattr(obj, "__arrow_c_device_stream__"):
             # TODO: When we add support for this case, it should be moved above
@@ -656,7 +658,7 @@ cdef class Column:
     @staticmethod
     cdef Column from_libcudf(
         unique_ptr[column] libcudf_col,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr
     ):
         """Create a Column from a libcudf column.
@@ -667,6 +669,7 @@ cdef class Column:
         """
         assert stream is not None, "stream cannot be None"
         assert mr is not None, "mr cannot be None"
+        cdef Stream _stream = <Stream>stream
         cdef DataType dtype = DataType.from_libcudf(libcudf_col.get().type())
         cdef size_type size = libcudf_col.get().size()
 
@@ -677,13 +680,13 @@ cdef class Column:
         # Note that when converting to cudf Column objects we'll need to pull
         # out the base object.
         cdef gpumemoryview data = gpumemoryview(
-            DeviceBuffer.c_from_unique_ptr(move(contents.data), stream, mr)
+            DeviceBuffer.c_from_unique_ptr(move(contents.data), _stream, mr)
         )
 
         cdef gpumemoryview mask = None
         if null_count > 0:
             mask = gpumemoryview(
-                DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), stream, mr)
+                DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), _stream, mr)
             )
 
         children = []
@@ -772,7 +775,7 @@ cdef class Column:
     cdef Column from_column_view_of_arbitrary(
         const column_view& cv,
         object owner,
-        Stream stream,
+        object stream,
     ):
         """Create a Column from a libcudf column_view into an arbitrary owner.
 
@@ -818,7 +821,7 @@ cdef class Column:
     def from_scalar(
         Scalar slr,
         size_type size,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None,
     ):
         """Create a Column from a Scalar.
@@ -839,18 +842,19 @@ cdef class Column:
         """
         cdef const scalar* c_scalar = slr.get()
         cdef unique_ptr[column] c_result
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
             c_result = make_column_from_scalar(
                 dereference(c_scalar),
                 size,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-        return Column.from_libcudf(move(c_result), stream, mr)
+        return Column.from_libcudf(move(c_result), _stream, mr)
 
-    cpdef Scalar to_scalar(self, Stream stream=None, DeviceMemoryResource mr=None):
+    cpdef Scalar to_scalar(self, object stream=None, DeviceMemoryResource mr=None):
         """
         Return the first value of 1-element column as a Scalar.
 
@@ -873,11 +877,12 @@ cdef class Column:
 
         cdef column_view cv = self.view()
         cdef unique_ptr[scalar] result
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
 
         with nogil:
-            result = get_element(cv, 0, stream.view(), mr.get_mr())
+            result = get_element(cv, 0, _cs, mr.get_mr())
 
         return Scalar.from_libcudf(move(result))
 
@@ -885,7 +890,7 @@ cdef class Column:
     def all_null_like(
         Column like,
         size_type size,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None,
     ):
         """Create an all null column from a template.
@@ -904,18 +909,19 @@ cdef class Column:
         Column
             An all-null column of `size` rows and type matching `like`.
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
-        cdef Scalar slr = Scalar.empty_like(like, stream, mr)
+        cdef Scalar slr = Scalar.empty_like(like, _stream, mr)
         cdef unique_ptr[column] c_result
         with nogil:
             c_result = make_column_from_scalar(
                 dereference(slr.get()),
                 size,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-        return Column.from_libcudf(move(c_result), stream, mr)
+        return Column.from_libcudf(move(c_result), _stream, mr)
 
     @staticmethod
     cdef Column _wrap_nested_list_column(
@@ -923,7 +929,7 @@ cdef class Column:
         tuple shape,
         DataType dtype,
         Column base=None,
-        Stream stream=None,
+        object stream=None,
     ):
         """
         Construct a list Column from a gpumemoryview and array
@@ -937,7 +943,7 @@ cdef class Column:
         """
         ndim = len(shape)
         flat_size = functools.reduce(operator.mul, shape)
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
 
         if base is None:
             base = Column(
@@ -958,9 +964,9 @@ cdef class Column:
 
             offsets_col = sequence(
                 outer_len + 1,
-                Scalar.from_py(0, int32_dtype, stream=stream),
-                Scalar.from_py(shape[i], int32_dtype, stream=stream),
-                stream,
+                Scalar.from_py(0, int32_dtype, stream=_stream),
+                Scalar.from_py(shape[i], int32_dtype, stream=_stream),
+                _stream,
             )
 
             nested = Column(
@@ -976,7 +982,7 @@ cdef class Column:
         return nested
 
     @classmethod
-    def from_array_interface(cls, obj, Stream stream=None):
+    def from_array_interface(cls, obj, object stream=None):
         """
         Create a Column from an object implementing the NumPy Array Interface.
 
@@ -1016,21 +1022,21 @@ cdef class Column:
 
         cdef const unsigned char* ptr
         cdef const unsigned char[:] view
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
 
         if nbytes > 0:
             ptr = <const unsigned char*><uintptr_t>data_ptr
             view = (<const unsigned char[:nbytes]> ptr)[:nbytes]
-            dbuf = DeviceBuffer.to_device(view, stream)
+            dbuf = DeviceBuffer.to_device(view, _stream)
         else:
-            dbuf = DeviceBuffer(size=0, stream=stream)
+            dbuf = DeviceBuffer(size=0, stream=_stream)
 
         return Column._wrap_nested_list_column(
-            gpumemoryview(dbuf), shape, dtype, None, stream
+            gpumemoryview(dbuf), shape, dtype, None, _stream
         )
 
     @classmethod
-    def from_cuda_array_interface(cls, obj, Stream stream=None):
+    def from_cuda_array_interface(cls, obj, object stream=None):
         """
         Create a Column from an object implementing the CUDA Array Interface.
 
@@ -1069,7 +1075,7 @@ cdef class Column:
         )
 
     @classmethod
-    def from_array(cls, obj, Stream stream=None):
+    def from_array(cls, obj, object stream=None):
         """
         Create a Column from any object which supports the NumPy
         or CUDA array interface.
@@ -1115,7 +1121,7 @@ cdef class Column:
     def from_iterable_of_py(
         obj: Iterable,
         dtype: DataType | None = None,
-        Stream stream=None
+        object stream=None
     ) -> Column:
         """
         Create a Column from a Python iterable of scalar values or nested iterables.
@@ -1364,14 +1370,15 @@ cdef class Column:
         """The children of the column."""
         return self._children
 
-    cpdef Column copy(self, Stream stream=None, DeviceMemoryResource mr=None):
+    cpdef Column copy(self, object stream=None, DeviceMemoryResource mr=None):
         """Create a copy of the column."""
         cdef unique_ptr[column] c_result
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
-            c_result = make_unique[column](self.view(), stream.view(), mr.get_mr())
-        return Column.from_libcudf(move(c_result), stream, mr)
+            c_result = make_unique[column](self.view(), _cs, mr.get_mr())
+        return Column.from_libcudf(move(c_result), _stream, mr)
 
     cpdef uint64_t device_buffer_size(self):
         """
@@ -1419,10 +1426,12 @@ cdef class Column:
 
         return PyCapsule_New(<void*>raw_schema_ptr, 'arrow_schema', _release_schema)
 
-    def _to_host_array(self, Stream stream):
+    def _to_host_array(self, object stream):
         cdef ArrowArray* raw_host_array_ptr
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         with nogil:
-            raw_host_array_ptr = to_arrow_host_raw(self.view(), stream.view())
+            raw_host_array_ptr = to_arrow_host_raw(self.view(), _cs)
 
         return PyCapsule_New(<void*>raw_host_array_ptr, "arrow_array", _release_array)
 
@@ -1484,7 +1493,7 @@ cdef class ListsColumnView:
         """
         return lists_column_view(self._column.view())
 
-    cpdef Column get_sliced_child(self, Stream stream=None):
+    cpdef Column get_sliced_child(self, object stream=None):
         """
         Get the list elements child properly sliced to match parent's view.
 
@@ -1498,9 +1507,9 @@ cdef class ListsColumnView:
         Column
             The sliced elements column
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
 
-        cdef column_view c_child = self.view().get_sliced_child(stream.view())
+        cdef column_view c_child = self.view().get_sliced_child(_stream.view().value())
         return Column.from_column_view(c_child, self._column.child(1))
 
 
@@ -1522,7 +1531,7 @@ cdef class StructsColumnView:
         """
         return structs_column_view(self._column.view())
 
-    cpdef Column get_sliced_child(self, int index, Stream stream=None):
+    cpdef Column get_sliced_child(self, int index, object stream=None):
         """
         Get the struct elements child properly sliced to match parent's view.
 
@@ -1538,9 +1547,10 @@ cdef class StructsColumnView:
         Column
             The sliced elements column
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
 
-        cdef column_view c_child = self.view().get_sliced_child(index, stream.view())
+        cdef cudaStream_t _cs = _stream.view().value()
+        cdef column_view c_child = self.view().get_sliced_child(index, _cs)
         return Column.from_column_view(c_child, self._column.child(index))
 
 
diff --git a/python/pylibcudf/pylibcudf/column_factories.pxd b/python/pylibcudf/pylibcudf/column_factories.pxd
index d26b3396e30..3f9841c045d 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/column_factories.pxd
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from pylibcudf.libcudf.types cimport mask_state
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .types cimport DataType, size_type, type_id
@@ -20,7 +19,7 @@ cpdef Column make_numeric_column(
     DataType type_,
     size_type size,
     MaskArg mask,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -28,7 +27,7 @@ cpdef Column make_fixed_point_column(
     DataType type_,
     size_type size,
     MaskArg mask,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -36,7 +35,7 @@ cpdef Column make_timestamp_column(
     DataType type_,
     size_type size,
     MaskArg mask,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -44,7 +43,7 @@ cpdef Column make_duration_column(
     DataType type_,
     size_type size,
     MaskArg mask,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -52,18 +51,18 @@ cpdef Column make_fixed_width_column(
     DataType type_,
     size_type size,
     MaskArg mask,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column make_empty_column(
     MakeEmptyColumnOperand type_or_id,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column make_empty_lists_column(
     DataType child_type,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyi b/python/pylibcudf/pylibcudf/column_factories.pyi
index 66d46d88949..a9e92c5f823 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pyi
+++ b/python/pylibcudf/pylibcudf/column_factories.pyi
@@ -1,53 +1,53 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType, MaskState, TypeId
+from pylibcudf.utils import CudaStreamLike
 
 def make_numeric_column(
     type_: DataType,
     size: int,
     mstate: MaskState,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def make_fixed_point_column(
     type_: DataType,
     size: int,
     mstate: MaskState,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def make_timestamp_column(
     type_: DataType,
     size: int,
     mstate: MaskState,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def make_duration_column(
     type_: DataType,
     size: int,
     mstate: MaskState,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def make_fixed_width_column(
     type_: DataType,
     size: int,
     mstate: MaskState,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def make_empty_column(
     type_or_id: DataType | TypeId,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def make_empty_lists_column(
     child_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx
index 0848f1aff03..45d590f4106 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pyx
+++ b/python/pylibcudf/pylibcudf/column_factories.pyx
@@ -20,6 +20,7 @@ from .types cimport DataType, type_id
 
 from .types import MaskState, TypeId
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = [
@@ -34,7 +35,7 @@ __all__ = [
 
 cpdef Column make_empty_column(
     MakeEmptyColumnOperand type_or_id,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Creates an empty column of the specified type.
@@ -53,7 +54,7 @@ cpdef Column make_empty_column(
     """
     cdef unique_ptr[column] result
     cdef type_id id
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
     mr = _get_memory_resource(mr)
 
     if MakeEmptyColumnOperand is object:
@@ -75,14 +76,14 @@ cpdef Column make_empty_column(
         raise TypeError(
             "Must pass a TypeId or DataType"
         )
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column make_numeric_column(
     DataType type_,
     size_type size,
     MaskArg mstate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Creates an empty numeric column.
@@ -102,7 +103,8 @@ cpdef Column make_numeric_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -110,17 +112,17 @@ cpdef Column make_numeric_column(
             type_.c_obj,
             size,
             state,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column make_fixed_point_column(
     DataType type_,
     size_type size,
     MaskArg mstate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
 
@@ -136,7 +138,8 @@ cpdef Column make_fixed_point_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -144,18 +147,18 @@ cpdef Column make_fixed_point_column(
             type_.c_obj,
             size,
             state,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column make_timestamp_column(
     DataType type_,
     size_type size,
     MaskArg mstate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
 
@@ -171,7 +174,8 @@ cpdef Column make_timestamp_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -179,18 +183,18 @@ cpdef Column make_timestamp_column(
             type_.c_obj,
             size,
             state,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column make_duration_column(
     DataType type_,
     size_type size,
     MaskArg mstate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
 
@@ -206,7 +210,8 @@ cpdef Column make_duration_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -214,18 +219,18 @@ cpdef Column make_duration_column(
             type_.c_obj,
             size,
             state,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column make_fixed_width_column(
     DataType type_,
     size_type size,
     MaskArg mstate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
 
@@ -241,7 +246,8 @@ cpdef Column make_fixed_width_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -249,16 +255,16 @@ cpdef Column make_fixed_width_column(
             type_.c_obj,
             size,
             state,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column make_empty_lists_column(
     DataType child_type,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Creates an empty column of the specified type.
@@ -276,10 +282,10 @@ cpdef Column make_empty_lists_column(
         An empty Column
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
     mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_make_empty_lists_column(child_type.c_obj)
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/concatenate.pxd
index 60adf27c9a3..60189ba4406 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/concatenate.pxd
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from .table cimport Table
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
@@ -11,4 +10,4 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 # unify the column and table paths without using runtime dispatch instead. In this case
 # we choose to prioritize API consistency over performance, so we use the same function
 # with a bit of runtime dispatch overhead.
-cpdef concatenate(list objects, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef concatenate(list objects, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyi b/python/pylibcudf/pylibcudf/concatenate.pyi
index 18e8bff2e2f..59379e01c46 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pyi
+++ b/python/pylibcudf/pylibcudf/concatenate.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def concatenate[ColumnOrTable: (Column, Table)](
     objects: list[ColumnOrTable],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> ColumnOrTable: ...
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx
index 36fa0984a68..9921d5b1a39 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pyx
+++ b/python/pylibcudf/pylibcudf/concatenate.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -16,10 +16,11 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["concatenate"]
 
-cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef concatenate(list objects, object stream=None, DeviceMemoryResource mr=None):
     """Concatenate columns or tables.
 
     Parameters
@@ -41,7 +42,8 @@ cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None
 
     cdef vector[column_view] c_columns
     cdef vector[table_view] c_tables
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     cdef unique_ptr[column] c_col_result
@@ -53,17 +55,17 @@ cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None
 
         with nogil:
             c_tbl_result = cpp_concatenate.concatenate(
-                c_tables, stream.view(), mr.get_mr()
+                c_tables, _cs, mr.get_mr()
             )
-        return Table.from_libcudf(move(c_tbl_result), stream, mr)
+        return Table.from_libcudf(move(c_tbl_result), _stream, mr)
     elif isinstance(objects[0], Column):
         for column in objects:
             c_columns.push_back((<Column?>column).view())
 
         with nogil:
             c_col_result = cpp_concatenate.concatenate(
-                c_columns, stream.view(), mr.get_mr()
+                c_columns, _cs, mr.get_mr()
             )
-        return Column.from_libcudf(move(c_col_result), stream, mr)
+        return Column.from_libcudf(move(c_col_result), _stream, mr)
     else:
         raise ValueError("input must be a list of Columns or Tables")
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/contiguous_split.pxd
index a294e70a4a6..95259723dfa 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pxd
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pxd
@@ -32,13 +32,13 @@ cdef class HostBuffer:
 
 cdef class PackedColumns:
     cdef unique_ptr[packed_columns] c_obj
-    cdef Stream stream
+    cdef object stream
     cdef DeviceMemoryResource mr
 
     @staticmethod
     cdef PackedColumns from_libcudf(
         unique_ptr[packed_columns] data,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr
     )
     cpdef tuple release(self)
@@ -58,10 +58,10 @@ cdef class ChunkedPack:
 
 cpdef PackedColumns pack(Table input)
 
-cpdef Table unpack(PackedColumns input, Stream stream=*)
+cpdef Table unpack(PackedColumns input, object stream = *)
 
 cpdef Table unpack_from_memoryviews(
     memoryview metadata,
     object gpu_data,
-    Stream stream=*,
+    object stream = *,
 )
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi
index df241c079ae..6e0e653b5bb 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyi
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi
@@ -2,28 +2,30 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.mr import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.gpumemoryview import gpumemoryview
 from pylibcudf.span import Span
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 class PackedColumns:
     def __init__(self): ...
     def release(
-        self, stream: Stream | None = None
+        self, stream: CudaStreamLike | None = None
     ) -> tuple[memoryview[bytes], gpumemoryview]: ...
 
 def pack(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> PackedColumns: ...
-def unpack(input: PackedColumns, stream: Stream | None = None) -> Table: ...
+def unpack(
+    input: PackedColumns, stream: CudaStreamLike | None = None
+) -> Table: ...
 def unpack_from_memoryviews(
     metadata: memoryview[bytes],
     gpu_data: Span,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> Table: ...
 
 class ChunkedPack:
@@ -32,7 +34,7 @@ class ChunkedPack:
     def create(
         input: Table,
         user_buffer_size: int,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         temp_mr: DeviceMemoryResource | None = None,
     ) -> ChunkedPack: ...
     def has_next(self) -> bool: ...
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx
index 6b24def5dc8..239d89d6470 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyx
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx
@@ -15,6 +15,8 @@ from cuda.bindings.cyruntime cimport (
     cudaError_t,
     cudaMemcpyAsync,
     cudaMemcpyKind,
+    cudaStream_t,
+    cudaStreamSynchronize,
 )
 
 from pylibcudf.libcudf.contiguous_split cimport (
@@ -27,7 +29,6 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.utilities.span cimport device_span
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
@@ -36,6 +37,7 @@ from .gpumemoryview cimport gpumemoryview
 from .table cimport Table
 from .span import is_span
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = [
@@ -105,7 +107,7 @@ cdef class PackedColumns:
     @staticmethod
     cdef PackedColumns from_libcudf(
         unique_ptr[packed_columns] data,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr
     ):
         """Create a Python PackedColumns from a libcudf packed_columns."""
@@ -163,7 +165,7 @@ cdef class ChunkedPack:
     def create(
         Table input,
         size_t user_buffer_size,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource temp_mr=None,
     ):
         """
@@ -184,16 +186,16 @@ cdef class ChunkedPack:
         -------
         New ChunkedPack object.
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         temp_mr = _get_memory_resource(temp_mr)
         cdef unique_ptr[chunked_pack] obj = chunked_pack.create(
-            input.view(), user_buffer_size, stream.view(), temp_mr.get_mr()
+            input.view(), user_buffer_size, _stream.view().value(), temp_mr.get_mr()
         )
 
         cdef ChunkedPack out = ChunkedPack.__new__(ChunkedPack)
         out.table = input
         out.mr = temp_mr
-        out.stream = stream
+        out.stream = _stream
         out.c_obj = move(obj)
         return out
 
@@ -292,7 +294,8 @@ cdef class ChunkedPack:
                 dereference(self.c_obj).get_total_contiguous_size()
             )
         )
-        cdef cuda_stream_view stream = self.stream.view()
+        cdef Stream py_stream = self.stream
+        cdef cudaStream_t stream = py_stream.view().value()
         with nogil:
             while dereference(self.c_obj).has_next():
                 size = dereference(self.c_obj).next(d_span)
@@ -301,22 +304,22 @@ cdef class ChunkedPack:
                     d_span.data(),
                     size,
                     cudaMemcpyKind.cudaMemcpyDeviceToHost,
-                    stream.value(),
+                    stream,
                 )
                 offset += size
                 if err != cudaError.cudaSuccess:
-                    stream.synchronize()
+                    cudaStreamSynchronize(stream)
                     raise RuntimeError(
                         f"Memcpy in pack_to_host failed error: {err}"
                     )
-        stream.synchronize()
+        cudaStreamSynchronize(stream)
         return (
             self.build_metadata(),
             memoryview(HostBuffer.from_unique_ptr(move(h_buf))),
         )
 
 
-cpdef PackedColumns pack(Table input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef PackedColumns pack(Table input, object stream=None, DeviceMemoryResource mr=None):
     """Deep-copy a table into a serialized contiguous memory format.
 
     Later use `unpack` or `unpack_from_memoryviews` to unpack the serialized
@@ -346,16 +349,17 @@ cpdef PackedColumns pack(Table input, Stream stream=None, DeviceMemoryResource m
     For details, see :cpp:func:`pack`.
     """
     cdef unique_ptr[packed_columns] pack
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
         pack = move(make_unique[packed_columns](
-            cpp_pack(input.view(), stream.view(), mr.get_mr())
+            cpp_pack(input.view(), _cs, mr.get_mr())
         ))
-    return PackedColumns.from_libcudf(move(pack), stream, mr)
+    return PackedColumns.from_libcudf(move(pack), _stream, mr)
 
 
-cpdef Table unpack(PackedColumns input, Stream stream=None):
+cpdef Table unpack(PackedColumns input, object stream=None):
     """Deserialize the result of `pack`.
 
     Copies the result of a serialized table into a table.
@@ -375,16 +379,16 @@ cpdef Table unpack(PackedColumns input, Stream stream=None):
         Copy of the packed columns.
     """
     cdef table_view v
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
     with nogil:
         v = cpp_unpack(dereference(input.c_obj))
-    return Table.from_table_view_of_arbitrary(v, input, stream)
+    return Table.from_table_view_of_arbitrary(v, input, _stream)
 
 
 cpdef Table unpack_from_memoryviews(
     memoryview metadata,
     object gpu_data,
-    Stream stream=None,
+    object stream=None,
 ):
     """Deserialize the result of `pack`.
 
@@ -406,7 +410,7 @@ cpdef Table unpack_from_memoryviews(
     Table
         Copy of the packed columns.
     """
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
     cdef device_span[uint8_t] d_span = _get_device_span(gpu_data)
 
     if metadata.nbytes == 0:
@@ -416,7 +420,7 @@ cpdef Table unpack_from_memoryviews(
         # used for any operations.
         return Table.from_libcudf(
             make_unique[table](table_view()),
-            stream,
+            _stream,
             _get_memory_resource(),
         )
 
@@ -428,4 +432,4 @@ cpdef Table unpack_from_memoryviews(
     cdef table_view v
     with nogil:
         v = cpp_unpack(metadata_ptr, gpu_data_ptr)
-    return Table.from_table_view_of_arbitrary(v, gpu_data, stream)
+    return Table.from_table_view_of_arbitrary(v, gpu_data, _stream)
diff --git a/python/pylibcudf/pylibcudf/copying.pxd b/python/pylibcudf/pylibcudf/copying.pxd
index caaa590de15..4143e846994 100644
--- a/python/pylibcudf/pylibcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/copying.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool as cbool
@@ -9,7 +9,6 @@ from pylibcudf.libcudf.copying cimport (
 from pylibcudf.libcudf.types cimport size_type
 
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -40,7 +39,7 @@ cpdef Table gather(
     Table source_table,
     Column gather_map,
     out_of_bounds_policy bounds_policy,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -48,19 +47,19 @@ cpdef Table scatter(
     TableOrListOfScalars source,
     Column scatter_map,
     Table target_table,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef ColumnOrTable empty_like(
-    ColumnOrTable input, Stream stream=*, DeviceMemoryResource mr=*
+    ColumnOrTable input, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column allocate_like(
     Column input_column,
     mask_allocation_policy policy,
     size=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -70,7 +69,7 @@ cpdef Column copy_range_in_place(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
-    Stream stream=*,
+    object stream = *,
 )
 
 cpdef Column copy_range(
@@ -79,7 +78,7 @@ cpdef Column copy_range(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -87,19 +86,19 @@ cpdef Column shift(
     Column input,
     size_type offset,
     Scalar fill_value,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
-cpdef list slice(ColumnOrTable input, list indices, Stream stream=*)
+cpdef list slice(ColumnOrTable input, list indices, object stream = *)
 
-cpdef list split(ColumnOrTable input, list splits, Stream stream=*)
+cpdef list split(ColumnOrTable input, list splits, object stream = *)
 
 cpdef Column copy_if_else(
     LeftCopyIfElseOperand lhs,
     RightCopyIfElseOperand rhs,
     Column boolean_mask,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -107,13 +106,13 @@ cpdef Table boolean_mask_scatter(
     TableOrListOfScalars input,
     Table target,
     Column boolean_mask,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Scalar get_element(
     Column input_column,
     size_type index,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi
index 04acecc2f1b..bdff6cddad5 100644
--- a/python/pylibcudf/pylibcudf/copying.pyi
+++ b/python/pylibcudf/pylibcudf/copying.pyi
@@ -1,15 +1,15 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 from typing import TypeVar
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 class MaskAllocationPolicy(IntEnum):
     NEVER = ...
@@ -26,26 +26,26 @@ def gather(
     source_table: Table,
     gather_map: Column,
     bounds_policy: OutOfBoundsPolicy,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def scatter(
     source: Table | list[Scalar],
     scatter_map: Column,
     target_table: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def empty_like(
     input: ColumnOrTable,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> ColumnOrTable: ...
 def allocate_like(
     input_column: Column,
     policy: MaskAllocationPolicy,
     size: int | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def copy_range_in_place(
@@ -54,7 +54,7 @@ def copy_range_in_place(
     input_begin: int,
     input_end: int,
     target_begin: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> Column: ...
 def copy_range(
     input_column: Column,
@@ -62,39 +62,43 @@ def copy_range(
     input_begin: int,
     input_end: int,
     target_begin: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def shift(
     input: Column,
     offset: int,
     fill_value: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def slice(
-    input: ColumnOrTable, indices: list[int], stream: Stream | None = None
+    input: ColumnOrTable,
+    indices: list[int],
+    stream: CudaStreamLike | None = None,
 ) -> list[ColumnOrTable]: ...
 def split(
-    input: ColumnOrTable, splits: list[int], stream: Stream | None = None
+    input: ColumnOrTable,
+    splits: list[int],
+    stream: CudaStreamLike | None = None,
 ) -> list[ColumnOrTable]: ...
 def copy_if_else(
     lhs: Column | Scalar,
     rhs: Column | Scalar,
     boolean_mask: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def boolean_mask_scatter(
     input: Table | list[Scalar],
     target: Table,
     boolean_mask: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def get_element(
     input_column: Column,
     index: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Scalar: ...
diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx
index f8f44e03938..30be1ea7d0a 100644
--- a/python/pylibcudf/pylibcudf/copying.pyx
+++ b/python/pylibcudf/pylibcudf/copying.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator import dereference
@@ -40,6 +40,7 @@ from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
 from .utils cimport _as_vector, _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = [
@@ -64,7 +65,7 @@ cpdef Table gather(
     Table source_table,
     Column gather_map,
     out_of_bounds_policy bounds_policy,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Select rows from source_table according to the provided gather_map.
@@ -94,7 +95,8 @@ cpdef Table gather(
         If the gather_map contains nulls.
     """
     cdef unique_ptr[table] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -102,18 +104,18 @@ cpdef Table gather(
             source_table.view(),
             gather_map.view(),
             bounds_policy,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table scatter(
     TableOrListOfScalars source,
     Column scatter_map,
     Table target_table,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Scatter from source into target_table according to scatter_map.
@@ -155,7 +157,8 @@ cpdef Table scatter(
     """
     cdef unique_ptr[table] c_result
     cdef vector[reference_wrapper[const scalar]] source_scalars
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if TableOrListOfScalars is Table:
@@ -164,7 +167,7 @@ cpdef Table scatter(
                 source.view(),
                 scatter_map.view(),
                 target_table.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
@@ -174,14 +177,14 @@ cpdef Table scatter(
                 source_scalars,
                 scatter_map.view(),
                 target_table.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef ColumnOrTable empty_like(
-    ColumnOrTable input, Stream stream=None, DeviceMemoryResource mr=None
+    ColumnOrTable input, object stream=None, DeviceMemoryResource mr=None
 ):
     """Create an empty column or table with the same type as ``input``.
 
@@ -201,23 +204,23 @@ cpdef ColumnOrTable empty_like(
     """
     cdef unique_ptr[table] c_tbl_result
     cdef unique_ptr[column] c_col_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
     mr = _get_memory_resource(mr)
     if ColumnOrTable is Column:
         with nogil:
             c_col_result = cpp_copying.empty_like(input.view())
-        return Column.from_libcudf(move(c_col_result), stream, mr)
+        return Column.from_libcudf(move(c_col_result), _stream, mr)
     else:
         with nogil:
             c_tbl_result = cpp_copying.empty_like(input.view())
-        return Table.from_libcudf(move(c_tbl_result), stream, mr)
+        return Table.from_libcudf(move(c_tbl_result), _stream, mr)
 
 
 cpdef Column allocate_like(
     Column input_column,
     mask_allocation_policy policy,
     size=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Allocate a column with the same type as input_column.
@@ -244,7 +247,8 @@ cpdef Column allocate_like(
 
     cdef unique_ptr[column] c_result
     cdef size_type c_size = size if size is not None else input_column.size()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -252,11 +256,11 @@ cpdef Column allocate_like(
                 input_column.view(),
                 c_size,
                 policy,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column copy_range_in_place(
@@ -265,7 +269,7 @@ cpdef Column copy_range_in_place(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
-    Stream stream=None
+    object stream=None
 ):
     """Copy a range of elements from input_column to target_column.
 
@@ -301,7 +305,8 @@ cpdef Column copy_range_in_place(
     """
 
     cdef mutable_column_view target_view = target_column.mutable_view()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
 
     with nogil:
         cpp_copying.copy_range_in_place(
@@ -310,7 +315,7 @@ cpdef Column copy_range_in_place(
             input_begin,
             input_end,
             target_begin,
-            stream.view()
+            _cs
         )
     target_column.set_null_count(target_view.null_count())
 
@@ -321,7 +326,7 @@ cpdef Column copy_range(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Copy a range of elements from input_column to target_column.
@@ -357,7 +362,8 @@ cpdef Column copy_range(
         If target and source have different types.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -367,18 +373,18 @@ cpdef Column copy_range(
             input_begin,
             input_end,
             target_begin,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column shift(
     Column input,
     size_type offset,
     Scalar fill_value,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Shift the elements of input by offset.
@@ -409,7 +415,8 @@ cpdef Column shift(
         of fixed width or string type.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -417,13 +424,13 @@ cpdef Column shift(
                 input.view(),
                 offset,
                 dereference(fill_value.c_obj),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
-cpdef list slice(ColumnOrTable input, list indices, Stream stream=None):
+cpdef list slice(ColumnOrTable input, list indices, object stream=None):
     """Slice input according to indices.
 
     For details on the implementation, see :cpp:func:`slice`.
@@ -454,11 +461,12 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None):
     cdef vector[column_view] c_col_result
     cdef vector[table_view] c_tbl_result
     cdef int i
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
 
     if ColumnOrTable is Column:
         with nogil:
-            c_col_result = cpp_copying.slice(input.view(), c_indices, stream.view())
+            c_col_result = cpp_copying.slice(input.view(), c_indices, _cs)
 
         return [
             Column.from_column_view(c_col_result[i], input)
@@ -466,7 +474,7 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None):
         ]
     else:
         with nogil:
-            c_tbl_result = cpp_copying.slice(input.view(), c_indices, stream.view())
+            c_tbl_result = cpp_copying.slice(input.view(), c_indices, _cs)
 
         return [
             Table.from_table_view(c_tbl_result[i], input)
@@ -474,7 +482,7 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None):
         ]
 
 
-cpdef list split(ColumnOrTable input, list splits, Stream stream=None):
+cpdef list split(ColumnOrTable input, list splits, object stream=None):
     """Split input into multiple.
 
     For details on the implementation, see :cpp:func:`split`.
@@ -497,11 +505,12 @@ cpdef list split(ColumnOrTable input, list splits, Stream stream=None):
     cdef vector[column_view] c_col_result
     cdef vector[table_view] c_tbl_result
     cdef int i
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
 
     if ColumnOrTable is Column:
         with nogil:
-            c_col_result = cpp_copying.split(input.view(), c_splits, stream.view())
+            c_col_result = cpp_copying.split(input.view(), c_splits, _cs)
 
         return [
             Column.from_column_view(c_col_result[i], input)
@@ -509,7 +518,7 @@ cpdef list split(ColumnOrTable input, list splits, Stream stream=None):
         ]
     else:
         with nogil:
-            c_tbl_result = cpp_copying.split(input.view(), c_splits, stream.view())
+            c_tbl_result = cpp_copying.split(input.view(), c_splits, _cs)
 
         return [
             Table.from_table_view(c_tbl_result[i], input)
@@ -521,7 +530,7 @@ cpdef Column copy_if_else(
     LeftCopyIfElseOperand lhs,
     RightCopyIfElseOperand rhs,
     Column boolean_mask,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Copy elements from lhs or rhs into a new column according to boolean_mask.
@@ -556,7 +565,8 @@ cpdef Column copy_if_else(
         columns), or if lhs and rhs are not of the same length (if both are columns).
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Column:
@@ -565,7 +575,7 @@ cpdef Column copy_if_else(
                 lhs.view(),
                 rhs.view(),
                 boolean_mask.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Scalar:
@@ -574,7 +584,7 @@ cpdef Column copy_if_else(
                 lhs.view(),
                 dereference(rhs.c_obj),
                 boolean_mask.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif LeftCopyIfElseOperand is Scalar and RightCopyIfElseOperand is Column:
@@ -583,7 +593,7 @@ cpdef Column copy_if_else(
                 dereference(lhs.c_obj),
                 rhs.view(),
                 boolean_mask.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
@@ -592,18 +602,18 @@ cpdef Column copy_if_else(
                 dereference(lhs.c_obj),
                 dereference(rhs.c_obj),
                 boolean_mask.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Table boolean_mask_scatter(
     TableOrListOfScalars input,
     Table target,
     Column boolean_mask,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Scatter rows from input into target according to boolean_mask.
@@ -641,7 +651,8 @@ cpdef Table boolean_mask_scatter(
     """
     cdef unique_ptr[table] result
     cdef vector[reference_wrapper[const scalar]] source_scalars
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if TableOrListOfScalars is Table:
@@ -650,7 +661,7 @@ cpdef Table boolean_mask_scatter(
                 input.view(),
                 target.view(),
                 boolean_mask.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
@@ -660,17 +671,17 @@ cpdef Table boolean_mask_scatter(
                 source_scalars,
                 target.view(),
                 boolean_mask.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
 
-    return Table.from_libcudf(move(result), stream, mr)
+    return Table.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Scalar get_element(
     Column input_column,
     size_type index,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Get the element at index from input_column.
@@ -697,12 +708,13 @@ cpdef Scalar get_element(
         If index is out of bounds.
     """
     cdef unique_ptr[scalar] c_output
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_output = cpp_copying.get_element(
-            input_column.view(), index, stream.view(), mr.get_mr()
+            input_column.view(), index, _cs, mr.get_mr()
         )
 
     return Scalar.from_libcudf(move(c_output))
diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd
index 1a93ee62c43..d7d15f0c19f 100644
--- a/python/pylibcudf/pylibcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/datetime.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -14,54 +13,54 @@ ctypedef fused ColumnOrScalar:
 cpdef Column extract_datetime_component(
     Column input,
     datetime_component component,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column ceil_datetimes(
     Column input,
     rounding_frequency freq,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column floor_datetimes(
     Column input,
     rounding_frequency freq,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column round_datetimes(
     Column input,
     rounding_frequency freq,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column add_calendrical_months(
     Column timestamps,
     ColumnOrScalar months,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column day_of_year(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column is_leap_year(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column last_day_of_month(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column extract_quarter(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column days_in_month(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi
index abcc608daa4..e671d2d18cf 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyi
+++ b/python/pylibcudf/pylibcudf/datetime.pyi
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 class DatetimeComponent(IntEnum):
     YEAR = ...
@@ -33,55 +33,55 @@ class RoundingFrequency(IntEnum):
 def extract_datetime_component(
     input: Column,
     component: DatetimeComponent,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def ceil_datetimes(
     input: Column,
     freq: RoundingFrequency,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def floor_datetimes(
     input: Column,
     freq: RoundingFrequency,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def round_datetimes(
     input: Column,
     freq: RoundingFrequency,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def add_calendrical_months(
     input: Column,
     months: Column | Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def day_of_year(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_leap_year(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def last_day_of_month(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def extract_quarter(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def days_in_month(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index 2a837c5b749..1e5270bad92 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -30,6 +30,7 @@ from rmm.pylibrmm.stream cimport Stream
 from .column cimport Column
 from .scalar cimport Scalar
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "DatetimeComponent",
@@ -49,7 +50,7 @@ __all__ = [
 cpdef Column extract_datetime_component(
     Column input,
     datetime_component component,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -73,19 +74,20 @@ cpdef Column extract_datetime_component(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_extract_datetime_component(
-            input.view(), component, stream.view(), mr.get_mr()
+            input.view(), component, _cs, mr.get_mr()
         )
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column ceil_datetimes(
     Column input,
     rounding_frequency freq,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -109,17 +111,18 @@ cpdef Column ceil_datetimes(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_ceil_datetimes(input.view(), freq, stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_ceil_datetimes(input.view(), freq, _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column floor_datetimes(
     Column input,
     rounding_frequency freq,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -143,17 +146,18 @@ cpdef Column floor_datetimes(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_floor_datetimes(input.view(), freq, stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_floor_datetimes(input.view(), freq, _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column round_datetimes(
     Column input,
     rounding_frequency freq,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -177,17 +181,18 @@ cpdef Column round_datetimes(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_round_datetimes(input.view(), freq, stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_round_datetimes(input.view(), freq, _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column add_calendrical_months(
     Column input,
     ColumnOrScalar months,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -216,7 +221,8 @@ cpdef Column add_calendrical_months(
 
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -224,13 +230,13 @@ cpdef Column add_calendrical_months(
             input.view(),
             months.view() if ColumnOrScalar is Column else
             dereference(months.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column day_of_year(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Computes the day number since the start of
@@ -253,15 +259,16 @@ cpdef Column day_of_year(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_day_of_year(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_day_of_year(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column is_leap_year(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Check if the year of the given date is a leap year.
@@ -283,15 +290,16 @@ cpdef Column is_leap_year(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_is_leap_year(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_is_leap_year(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column last_day_of_month(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Computes the last day of the month.
@@ -313,15 +321,16 @@ cpdef Column last_day_of_month(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_last_day_of_month(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_last_day_of_month(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column extract_quarter(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns the quarter (ie. a value from {1, 2, 3, 4})
@@ -343,15 +352,16 @@ cpdef Column extract_quarter(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_extract_quarter(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_extract_quarter(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column days_in_month(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Extract the number of days in the month.
@@ -372,12 +382,13 @@ cpdef Column days_in_month(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_days_in_month(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(result), stream, mr)
+        result = cpp_days_in_month(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(result), _stream, mr)
 
 DatetimeComponent.__str__ = DatetimeComponent.__repr__
 RoundingFrequency.__str__ = RoundingFrequency.__repr__
diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd b/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd
index db9ca865197..832d572b467 100644
--- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd
+++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd
@@ -1,6 +1,5 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-from rmm.pylibrmm.stream cimport Stream
 
-cpdef void join_streams(list streams, Stream stream)
+cpdef void join_streams(list streams, object stream)
diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi
index 522239c6a80..c9c2ba79e36 100644
--- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi
+++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi
@@ -1,6 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-from rmm.pylibrmm.stream import Stream
+from pylibcudf.utils import CudaStreamLike
 
-def join_streams(streams: list[Stream], stream: Stream) -> None: ...
+def join_streams(
+    streams: list[CudaStreamLike], stream: CudaStreamLike
+) -> None: ...
diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx
index 7f3d2f228fb..d9efcb19ed9 100644
--- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx
+++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx
@@ -1,21 +1,22 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
+from cuda.bindings.cyruntime cimport cudaStream_t
 from libcpp.vector cimport vector
 
 from pylibcudf.libcudf.detail.utilities cimport stream_pool as cpp_stream_pool
+from pylibcudf.libcudf.detail.utilities.stream_pool cimport const_cudaStream_t
 from pylibcudf.libcudf.utilities.span cimport host_span
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 from rmm.pylibrmm.stream cimport Stream
 
-ctypedef const cuda_stream_view const_cuda_stream_view
+from ..utils cimport _get_stream
 
 
 __all__ = ["join_streams"]
 
 
-cpdef void join_streams(list streams, Stream stream):
+cpdef void join_streams(list streams, object stream):
     """Synchronize a stream to an event on a set of streams.
 
     This function synchronizes the joined stream with the waited-on streams
@@ -42,15 +43,16 @@ cpdef void join_streams(list streams, Stream stream):
     >>> plc.experimental.join_streams([stream1, stream2], join_stream)
     >>> # ... continue work on join_stream ...
     """
-    cdef Stream c_stream = <Stream?>stream
-    cdef vector[cuda_stream_view] c_streams
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
+    cdef vector[cudaStream_t] c_streams
 
     c_streams.reserve(len(streams))
     for s in streams:
-        c_streams.push_back((<Stream?>s).view())
+        c_streams.push_back((<Stream>_get_stream(s)).view().value())
 
     with nogil:
         cpp_stream_pool.join_streams(
-            host_span[const_cuda_stream_view](c_streams.data(), c_streams.size()),
-            c_stream.view()
+            host_span[const_cudaStream_t](c_streams.data(), c_streams.size()),
+            _cs
         )
diff --git a/python/pylibcudf/pylibcudf/filling.pxd b/python/pylibcudf/pylibcudf/filling.pxd
index b90d567b2c2..acb92e0212a 100644
--- a/python/pylibcudf/pylibcudf/filling.pxd
+++ b/python/pylibcudf/pylibcudf/filling.pxd
@@ -1,7 +1,6 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from pylibcudf.libcudf.types cimport size_type
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
@@ -17,7 +16,7 @@ cpdef Column fill(
     size_type begin,
     size_type end,
     Scalar value,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -26,21 +25,21 @@ cpdef void fill_in_place(
     size_type c_begin,
     size_type c_end,
     Scalar value,
-    Stream stream = *,
+    object stream = *,
 )
 
 cpdef Column sequence(
     size_type size,
     Scalar init,
     Scalar step,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Table repeat(
     Table input_table,
     ColumnOrSize count,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -48,6 +47,6 @@ cpdef Column calendrical_month_sequence(
     size_type n,
     Scalar init,
     size_type months,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi
index a1023f8016c..2789ecd5aca 100644
--- a/python/pylibcudf/pylibcudf/filling.pyi
+++ b/python/pylibcudf/pylibcudf/filling.pyi
@@ -1,32 +1,33 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-from rmm.pylibrmm.stream import Stream
-
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def fill(
     destination: Column,
     begin: int,
     end: int,
     value: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> Column: ...
 def fill_in_place(
     destination: Column,
     begin: int,
     end: int,
     value: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> None: ...
 def sequence(
-    size: int, init: Scalar, step: Scalar, stream: Stream | None = None
+    size: int, init: Scalar, step: Scalar, stream: CudaStreamLike | None = None
 ) -> Column: ...
 def repeat(
-    input_table: Table, count: Column | int, stream: Stream | None = None
+    input_table: Table,
+    count: Column | int,
+    stream: CudaStreamLike | None = None,
 ) -> Table: ...
 def calendrical_month_sequence(
-    n: int, init: Scalar, months: int, stream: Stream | None = None
+    n: int, init: Scalar, months: int, stream: CudaStreamLike | None = None
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx
index 68e4862dfb8..ce6002eb24e 100644
--- a/python/pylibcudf/pylibcudf/filling.pyx
+++ b/python/pylibcudf/pylibcudf/filling.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -22,6 +22,7 @@ from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = [
@@ -37,7 +38,7 @@ cpdef Column fill(
     size_type begin,
     size_type end,
     Scalar value,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
 
@@ -68,7 +69,8 @@ cpdef Column fill(
 
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -77,17 +79,17 @@ cpdef Column fill(
             begin,
             end,
             dereference((<Scalar> value).c_obj),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef void fill_in_place(
     Column destination,
     size_type begin,
     size_type end,
     Scalar value,
-    Stream stream=None,
+    object stream=None,
 ):
 
     """Fill destination column in place from begin to end with value.
@@ -112,7 +114,8 @@ cpdef void fill_in_place(
     None
     """
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
 
     cdef mutable_column_view c_destination = destination.mutable_view()
     with nogil:
@@ -121,7 +124,7 @@ cpdef void fill_in_place(
             begin,
             end,
             dereference(value.c_obj),
-            stream.view()
+            _cs
         )
     destination.set_null_count(c_destination.null_count())
 
@@ -129,7 +132,7 @@ cpdef Column sequence(
     size_type size,
     Scalar init,
     Scalar step,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a sequence column of size ``size`` with initial value ``init`` and step
@@ -157,7 +160,8 @@ cpdef Column sequence(
     cdef unique_ptr[column] result
     cdef size_type c_size = size
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -165,16 +169,16 @@ cpdef Column sequence(
             c_size,
             dereference(init.c_obj),
             dereference(step.c_obj),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Table repeat(
     Table input_table,
     ColumnOrSize count,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Repeat rows of a Table.
@@ -203,7 +207,8 @@ cpdef Table repeat(
 
     cdef unique_ptr[table] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if ColumnOrSize is Column:
@@ -211,7 +216,7 @@ cpdef Table repeat(
             result = cpp_repeat(
                 input_table.view(),
                 count.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     if ColumnOrSize is size_type:
@@ -219,17 +224,17 @@ cpdef Table repeat(
             result = cpp_repeat(
                 input_table.view(),
                 count,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-    return Table.from_libcudf(move(result), stream, mr)
+    return Table.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column calendrical_month_sequence(
     size_type n,
     Scalar init,
     size_type months,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
 
@@ -256,7 +261,8 @@ cpdef Column calendrical_month_sequence(
 
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -264,7 +270,7 @@ cpdef Column calendrical_month_sequence(
             n,
             dereference(init.c_obj),
             months,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/groupby.pxd b/python/pylibcudf/pylibcudf/groupby.pxd
index b5654ff6df8..a46146a145a 100644
--- a/python/pylibcudf/pylibcudf/groupby.pxd
+++ b/python/pylibcudf/pylibcudf/groupby.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -19,7 +19,6 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport null_order, order
 
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -46,31 +45,31 @@ cdef class GroupBy:
     cdef unique_ptr[vector[null_order]] _null_precedence
 
     cpdef tuple aggregate(
-        self, list requests, Stream stream=*, DeviceMemoryResource mr=*
+        self, list requests, object stream = *, DeviceMemoryResource mr=*
     )
-    cpdef tuple scan(self, list requests, Stream stream=*, DeviceMemoryResource mr=*)
+    cpdef tuple scan(self, list requests, object stream = *, DeviceMemoryResource mr=*)
     cpdef tuple shift(
         self,
         Table values,
         list offset,
         list fill_values,
-        Stream stream=*,
+        object stream = *,
         DeviceMemoryResource mr=*,
     )
     cpdef tuple replace_nulls(
         self,
         Table values,
         list replace_policies,
-        Stream stream=*,
+        object stream = *,
         DeviceMemoryResource mr=*,
     )
     cpdef tuple get_groups(
-        self, Table values=*, Stream stream=*, DeviceMemoryResource mr=*
+        self, Table values=*, object stream = *, DeviceMemoryResource mr=*
     )
 
     @staticmethod
     cdef tuple _parse_outputs(
         pair[unique_ptr[table], vector[aggregation_result]] c_res,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr,
     )
diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi
index 75322706187..01c732175f4 100644
--- a/python/pylibcudf/pylibcudf/groupby.pyi
+++ b/python/pylibcudf/pylibcudf/groupby.pyi
@@ -1,8 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.aggregation import Aggregation
 from pylibcudf.column import Column
@@ -10,6 +9,7 @@ from pylibcudf.replace import ReplacePolicy
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 from pylibcudf.types import NullOrder, NullPolicy, Order, Sorted
+from pylibcudf.utils import CudaStreamLike
 
 class GroupByRequest:
     def __init__(
@@ -28,13 +28,13 @@ class GroupBy:
     def aggregate(
         self,
         requests: list[GroupByRequest],
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> tuple[Table, list[Table]]: ...
     def scan(
         self,
         requests: list[GroupByRequest],
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> tuple[Table, list[Table]]: ...
     def shift(
@@ -42,19 +42,19 @@ class GroupBy:
         values: Table,
         offset: list[int],
         fill_values: list[Scalar],
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> tuple[Table, Table]: ...
     def replace_nulls(
         self,
         value: Table,
         replace_policies: list[ReplacePolicy],
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> tuple[Table, Table]: ...
     def get_groups(
         self,
         values: Table | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> tuple[list[int], Table, Table]: ...
diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx
index 94a292996a0..4b2f842a360 100644
--- a/python/pylibcudf/pylibcudf/groupby.pyx
+++ b/python/pylibcudf/pylibcudf/groupby.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -28,6 +28,7 @@ from .column cimport Column
 from .table cimport Table
 from .types cimport null_order, null_policy, order, sorted
 from .utils cimport _as_vector, _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = ["GroupBy", "GroupByRequest"]
@@ -141,12 +142,13 @@ cdef class GroupBy:
     @staticmethod
     cdef tuple _parse_outputs(
         pair[unique_ptr[table], vector[aggregation_result]] c_res,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr,
     ):
         # Convert libcudf aggregation/scan outputs into pylibcudf objects.
         # This function is for internal use only.
-        cdef Table group_keys = Table.from_libcudf(move(c_res.first), stream, mr)
+        cdef Stream _stream = <Stream>stream
+        cdef Table group_keys = Table.from_libcudf(move(c_res.first), _stream, mr)
 
         cdef int i, j
         cdef list results = []
@@ -155,13 +157,13 @@ cdef class GroupBy:
             inner_results = []
             for j in range(c_res.second[i].results.size()):
                 inner_results.append(
-                    Column.from_libcudf(move(c_res.second[i].results[j]), stream, mr)
+                    Column.from_libcudf(move(c_res.second[i].results[j]), _stream, mr)
                 )
             results.append(Table(inner_results))
         return group_keys, results
 
     cpdef tuple aggregate(
-        self, list requests, Stream stream=None, DeviceMemoryResource mr=None
+        self, list requests, object stream=None, DeviceMemoryResource mr=None
     ):
         """Compute aggregations on columns.
 
@@ -189,19 +191,20 @@ cdef class GroupBy:
             c_requests.push_back(move(request._to_libcudf_agg_request()))
 
         cdef pair[unique_ptr[table], vector[aggregation_result]] c_res
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         # TODO: Need to capture C++ exceptions indicating that an invalid type was used.
         # We rely on libcudf to tell us this rather than checking the types beforehand
         # ourselves.
         with nogil:
             c_res = dereference(self.c_obj).aggregate(
-                c_requests, stream.view(), mr.get_mr()
+                c_requests, _cs, mr.get_mr()
             )
-        return GroupBy._parse_outputs(move(c_res), stream, mr)
+        return GroupBy._parse_outputs(move(c_res), _stream, mr)
 
     cpdef tuple scan(
-        self, list requests, Stream stream=None, DeviceMemoryResource mr=None
+        self, list requests, object stream=None, DeviceMemoryResource mr=None
     ):
         """Compute scans on columns.
 
@@ -229,18 +232,23 @@ cdef class GroupBy:
             c_requests.push_back(move(request._to_libcudf_scan_request()))
 
         cdef pair[unique_ptr[table], vector[aggregation_result]] c_res
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
-            c_res = dereference(self.c_obj).scan(c_requests, stream.view(), mr.get_mr())
-        return GroupBy._parse_outputs(move(c_res), stream, mr)
+            c_res = dereference(self.c_obj).scan(
+                c_requests,
+                _cs,
+                mr.get_mr(),
+            )
+        return GroupBy._parse_outputs(move(c_res), _stream, mr)
 
     cpdef tuple shift(
         self,
         Table values,
         list offset,
         list fill_values,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None,
     ):
         """Compute shifts on columns.
@@ -269,26 +277,27 @@ cdef class GroupBy:
 
         cdef vector[size_type] c_offset = offset
         cdef pair[unique_ptr[table], unique_ptr[table]] c_res
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
             c_res = dereference(self.c_obj).shift(
                 values.view(),
                 c_offset,
                 c_fill_values,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         return (
-            Table.from_libcudf(move(c_res.first), stream, mr),
-            Table.from_libcudf(move(c_res.second), stream, mr),
+            Table.from_libcudf(move(c_res.first), _stream, mr),
+            Table.from_libcudf(move(c_res.second), _stream, mr),
         )
 
     cpdef tuple replace_nulls(
         self,
         Table value,
         list replace_policies,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None,
     ):
         """Replace nulls in columns.
@@ -312,22 +321,23 @@ cdef class GroupBy:
         """
         cdef pair[unique_ptr[table], unique_ptr[table]] c_res
         cdef vector[replace_policy] c_replace_policies = replace_policies
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
             c_res = dereference(self.c_obj).replace_nulls(
                 value.view(),
                 c_replace_policies,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         return (
-            Table.from_libcudf(move(c_res.first), stream, mr),
-            Table.from_libcudf(move(c_res.second), stream, mr),
+            Table.from_libcudf(move(c_res.first), _stream, mr),
+            Table.from_libcudf(move(c_res.second), _stream, mr),
         )
 
     cpdef tuple get_groups(
-        self, Table values=None, Stream stream=None, DeviceMemoryResource mr=None
+        self, Table values=None, object stream=None, DeviceMemoryResource mr=None
     ):
         """Get the grouped keys and values labels for each row.
 
@@ -352,24 +362,24 @@ cdef class GroupBy:
 
         cdef groups c_groups
         cdef table_view empty_view
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         mr = _get_memory_resource(mr)
         if values:
             c_groups = dereference(self.c_obj).get_groups(
-                values.view(), stream.view(), mr.get_mr()
+                values.view(), _stream.view().value(), mr.get_mr()
             )
             return (
                 c_groups.offsets,
-                Table.from_libcudf(move(c_groups.keys), stream, mr),
-                Table.from_libcudf(move(c_groups.values), stream, mr),
+                Table.from_libcudf(move(c_groups.keys), _stream, mr),
+                Table.from_libcudf(move(c_groups.values), _stream, mr),
             )
         else:
             # c_groups.values is nullptr - call get_groups with empty table view
             c_groups = dereference(self.c_obj).get_groups(
-                empty_view, stream.view(), mr.get_mr()
+                empty_view, _stream.view().value(), mr.get_mr()
             )
             return (
                 c_groups.offsets,
-                Table.from_libcudf(move(c_groups.keys), stream, mr),
+                Table.from_libcudf(move(c_groups.keys), _stream, mr),
                 None,
             )
diff --git a/python/pylibcudf/pylibcudf/hashing.pxd b/python/pylibcudf/pylibcudf/hashing.pxd
index 4febd6e4949..b824f2dbcb8 100644
--- a/python/pylibcudf/pylibcudf/hashing.pxd
+++ b/python/pylibcudf/pylibcudf/hashing.pxd
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stdint cimport uint32_t, uint64_t
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -12,34 +11,34 @@ from .table cimport Table
 cpdef Column murmurhash3_x86_32(
     Table input,
     uint32_t seed=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
 cpdef Table murmurhash3_x64_128(
     Table input,
     uint64_t seed=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
 cpdef Column xxhash_32(
     Table input,
     uint32_t seed=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
 cpdef Column xxhash_64(
     Table input,
     uint64_t seed=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
-cpdef Column md5(Table input, Stream stream=*, DeviceMemoryResource mr=*)
-cpdef Column sha1(Table input, Stream stream=*, DeviceMemoryResource mr=*)
-cpdef Column sha224(Table input, Stream stream=*, DeviceMemoryResource mr=*)
-cpdef Column sha256(Table input, Stream stream=*, DeviceMemoryResource mr=*)
-cpdef Column sha384(Table input, Stream stream=*, DeviceMemoryResource mr=*)
-cpdef Column sha512(Table input, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef Column md5(Table input, object stream = *, DeviceMemoryResource mr=*)
+cpdef Column sha1(Table input, object stream = *, DeviceMemoryResource mr=*)
+cpdef Column sha224(Table input, object stream = *, DeviceMemoryResource mr=*)
+cpdef Column sha256(Table input, object stream = *, DeviceMemoryResource mr=*)
+cpdef Column sha384(Table input, object stream = *, DeviceMemoryResource mr=*)
+cpdef Column sha512(Table input, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi
index 1b8d055368a..dae03796b9c 100644
--- a/python/pylibcudf/pylibcudf/hashing.pyi
+++ b/python/pylibcudf/pylibcudf/hashing.pyi
@@ -1,67 +1,67 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from typing import Final
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 LIBCUDF_DEFAULT_HASH_SEED: Final[int]
 
 def murmurhash3_x86_32(
     input: Table,
     seed: int = ...,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def murmurhash3_x64_128(
     input: Table,
     seed: int = ...,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def xxhash_32(
     input: Table,
     seed: int = ...,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def xxhash_64(
     input: Table,
     seed: int = ...,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def md5(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def sha1(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def sha224(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def sha256(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def sha384(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def sha512(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx
index d9db52720bf..941393cf949 100644
--- a/python/pylibcudf/pylibcudf/hashing.pyx
+++ b/python/pylibcudf/pylibcudf/hashing.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libc.stdint cimport uint32_t, uint64_t
 from libcpp.memory cimport unique_ptr
@@ -24,6 +24,7 @@ from rmm.pylibrmm.stream cimport Stream
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "LIBCUDF_DEFAULT_HASH_SEED",
@@ -44,7 +45,7 @@ LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED
 cpdef Column murmurhash3_x86_32(
     Table input,
     uint32_t seed=DEFAULT_HASH_SEED,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the MurmurHash3 32-bit hash value of each row in the given table.
@@ -65,24 +66,25 @@ cpdef Column murmurhash3_x86_32(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_murmurhash3_x86_32(
             input.view(),
             seed,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table murmurhash3_x64_128(
     Table input,
     uint64_t seed=DEFAULT_HASH_SEED,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the MurmurHash3 64-bit hash value of each row in the given table.
@@ -103,24 +105,25 @@ cpdef Table murmurhash3_x64_128(
     """
     cdef unique_ptr[table] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_murmurhash3_x64_128(
             input.view(),
             seed,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column xxhash_32(
     Table input,
     uint32_t seed=DEFAULT_HASH_SEED,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the xxHash 32-bit hash value of each row in the given table.
@@ -142,24 +145,25 @@ cpdef Column xxhash_32(
 
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_xxhash_32(
             input.view(),
             seed,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column xxhash_64(
     Table input,
     uint64_t seed=DEFAULT_HASH_SEED,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the xxHash 64-bit hash value of each row in the given table.
@@ -181,23 +185,24 @@ cpdef Column xxhash_64(
 
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_xxhash_64(
             input.view(),
             seed,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column md5(
     Table input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the MD5 hash value of each row in the given table.
@@ -220,16 +225,17 @@ cpdef Column md5(
 
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_md5(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(c_result), stream, mr)
+        c_result = cpp_md5(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column sha1(
     Table input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the SHA-1 hash value of each row in the given table.
@@ -250,17 +256,18 @@ cpdef Column sha1(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_sha1(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(c_result), stream, mr)
+        c_result = cpp_sha1(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column sha224(
     Table input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the SHA-224 hash value of each row in the given table.
@@ -281,17 +288,18 @@ cpdef Column sha224(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_sha224(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(c_result), stream, mr)
+        c_result = cpp_sha224(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column sha256(
     Table input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the SHA-256 hash value of each row in the given table.
@@ -312,17 +320,18 @@ cpdef Column sha256(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_sha256(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(c_result), stream, mr)
+        c_result = cpp_sha256(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column sha384(
     Table input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the SHA-384 hash value of each row in the given table.
@@ -343,17 +352,18 @@ cpdef Column sha384(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_sha384(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(c_result), stream, mr)
+        c_result = cpp_sha384(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column sha512(
     Table input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the SHA-512 hash value of each row in the given table.
@@ -374,9 +384,10 @@ cpdef Column sha512(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_sha512(input.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(c_result), stream, mr)
+        c_result = cpp_sha512(input.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/interop.pxd b/python/pylibcudf/pylibcudf/interop.pxd
index dfa62233541..942b9e806bc 100644
--- a/python/pylibcudf/pylibcudf/interop.pxd
+++ b/python/pylibcudf/pylibcudf/interop.pxd
@@ -1,12 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.table cimport Table
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 cpdef Table from_dlpack(
-    object managed_tensor, Stream stream=*, DeviceMemoryResource mr=*
+    object managed_tensor, object stream = *, DeviceMemoryResource mr=*
 )
 
-cpdef object to_dlpack(Table input, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef object to_dlpack(Table input, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi
index 0c10d71ec4f..34fe9394f7d 100644
--- a/python/pylibcudf/pylibcudf/interop.pyi
+++ b/python/pylibcudf/pylibcudf/interop.pyi
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from collections.abc import Iterable, Mapping
@@ -8,12 +8,12 @@ from typing import Any, overload
 import pyarrow as pa
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 @dataclass
 class ColumnMetadata:
@@ -33,14 +33,14 @@ def from_arrow(
     obj: pa.Array[Any],
     *,
     data_type: DataType | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 @overload
 def from_arrow(
     obj: pa.Table,
     *,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 @overload
@@ -67,11 +67,11 @@ def to_arrow(
 ) -> pa.Scalar[Any]: ...
 def from_dlpack(
     managed_tensor: Any,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def to_dlpack(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Any: ...
diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx
index ffc14415470..23c47bb090f 100644
--- a/python/pylibcudf/pylibcudf/interop.pyx
+++ b/python/pylibcudf/pylibcudf/interop.pyx
@@ -23,6 +23,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
 from ._interop_helpers import ColumnMetadata
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = [
@@ -35,7 +36,7 @@ __all__ = [
 
 
 cpdef Table from_dlpack(
-    object managed_tensor, Stream stream=None, DeviceMemoryResource mr=None
+    object managed_tensor, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Convert a DLPack DLTensor into a cudf table.
@@ -65,7 +66,8 @@ cpdef Table from_dlpack(
     if dlpack_tensor is NULL:
         raise ValueError("PyCapsule object contained a NULL pointer")
     PyCapsule_SetName(managed_tensor, "used_dltensor")
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     # Note: A copy is always performed when converting the dlpack
@@ -74,14 +76,14 @@ cpdef Table from_dlpack(
     # TODO: https://github.com/rapidsai/cudf/issues/10874
     # TODO: https://github.com/rapidsai/cudf/issues/10849
     with nogil:
-        c_result = cpp_from_dlpack(dlpack_tensor, stream.view(), mr.get_mr())
+        c_result = cpp_from_dlpack(dlpack_tensor, _cs, mr.get_mr())
 
-    cdef Table result = Table.from_libcudf(move(c_result), stream, mr)
+    cdef Table result = Table.from_libcudf(move(c_result), _stream, mr)
     dlpack_tensor.deleter(dlpack_tensor)
     return result
 
 
-cpdef object to_dlpack(Table input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef object to_dlpack(Table input, object stream=None, DeviceMemoryResource mr=None):
     """
     Convert a cudf table into a DLPack DLTensor.
 
@@ -109,11 +111,12 @@ cpdef object to_dlpack(Table input, Stream stream=None, DeviceMemoryResource mr=
                 "Input is required to have null count as zero."
             )
     cdef DLManagedTensor *dlpack_tensor
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        dlpack_tensor = cpp_to_dlpack(input.view(), stream.view(), mr.get_mr())
+        dlpack_tensor = cpp_to_dlpack(input.view(), _cs, mr.get_mr())
 
     return PyCapsule_New(
         dlpack_tensor,
diff --git a/python/pylibcudf/pylibcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/io/avro.pxd
index d76f2c1e628..0e8cb7ee283 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pxd
+++ b/python/pylibcudf/pylibcudf/io/avro.pxd
@@ -1,6 +1,5 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
@@ -29,5 +28,5 @@ cdef class AvroReaderOptionsBuilder:
     cpdef AvroReaderOptions build(self)
 
 cpdef TableWithMetadata read_avro(
-    AvroReaderOptions options, Stream stream = *, DeviceMemoryResource mr=*
+    AvroReaderOptions options, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/io/avro.pyi b/python/pylibcudf/pylibcudf/io/avro.pyi
index d7b6c87d388..7e41c39a2be 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pyi
+++ b/python/pylibcudf/pylibcudf/io/avro.pyi
@@ -1,9 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.io.types import SourceInfo, TableWithMetadata
+from pylibcudf.utils import CudaStreamLike
 
 __all__ = ["AvroReaderOptions", "AvroReaderOptionsBuilder", "read_avro"]
 
@@ -21,6 +21,6 @@ class AvroReaderOptionsBuilder:
 
 def read_avro(
     options: AvroReaderOptions,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> TableWithMetadata: ...
diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx
index 9c5e2c05b11..f2bd021cdde 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pyx
+++ b/python/pylibcudf/pylibcudf/io/avro.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.string cimport string
@@ -6,6 +6,7 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
@@ -152,7 +153,7 @@ cdef class AvroReaderOptionsBuilder:
 
 cpdef TableWithMetadata read_avro(
     AvroReaderOptions options,
-    Stream stream = None,
+    object stream = None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -173,8 +174,9 @@ cpdef TableWithMetadata read_avro(
         Device memory resource used to allocate the returned table's device memory.
     """
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = move(cpp_read_avro(options.c_obj, s.view(), mr.get_mr()))
+        c_result = move(cpp_read_avro(options.c_obj, _cs, mr.get_mr()))
 
     return TableWithMetadata.from_libcudf(c_result, s, mr)
diff --git a/python/pylibcudf/pylibcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/io/csv.pxd
index 2f138e3aaa1..4293452311d 100644
--- a/python/pylibcudf/pylibcudf/io/csv.pxd
+++ b/python/pylibcudf/pylibcudf/io/csv.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp cimport bool
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from pylibcudf.io.types cimport SinkInfo, SourceInfo, TableWithMetadata
@@ -74,7 +73,7 @@ cdef class CsvReaderOptionsBuilder:
     cpdef CsvReaderOptions build(self)
 
 cpdef TableWithMetadata read_csv(
-    CsvReaderOptions options, Stream stream = *, DeviceMemoryResource mr=*
+    CsvReaderOptions options, object stream = *, DeviceMemoryResource mr=*
 )
 
 cdef class CsvWriterOptions:
@@ -98,6 +97,6 @@ cdef class CsvWriterOptionsBuilder:
     cpdef CsvWriterOptions build(self)
 
 
-cpdef void write_csv(CsvWriterOptions options, Stream stream = *)
+cpdef void write_csv(CsvWriterOptions options, object stream = *)
 
 cpdef bool is_supported_write_csv(DataType type)
diff --git a/python/pylibcudf/pylibcudf/io/csv.pyi b/python/pylibcudf/pylibcudf/io/csv.pyi
index ade964da509..41465b3ba43 100644
--- a/python/pylibcudf/pylibcudf/io/csv.pyi
+++ b/python/pylibcudf/pylibcudf/io/csv.pyi
@@ -4,7 +4,6 @@
 from typing import Self
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.io.types import (
     CompressionType,
@@ -15,6 +14,7 @@ from pylibcudf.io.types import (
 )
 from pylibcudf.table import Table
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 class CsvReaderOptions:
     def __init__(self): ...
@@ -61,10 +61,12 @@ class CsvReaderOptionsBuilder:
 
 def read_csv(
     options: CsvReaderOptions,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> TableWithMetadata: ...
-def write_csv(options: CsvWriterOptions, stream: Stream | None = None): ...
+def write_csv(
+    options: CsvWriterOptions, stream: CudaStreamLike | None = None
+): ...
 
 class CsvWriterOptions:
     def __init__(self): ...
diff --git a/python/pylibcudf/pylibcudf/io/csv.pyx b/python/pylibcudf/pylibcudf/io/csv.pyx
index 749cd45fcb5..1c3ae9cb0bf 100644
--- a/python/pylibcudf/pylibcudf/io/csv.pyx
+++ b/python/pylibcudf/pylibcudf/io/csv.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -8,6 +8,7 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from pylibcudf.io.types cimport SourceInfo, SinkInfo, TableWithMetadata
@@ -672,7 +673,7 @@ cdef class CsvReaderOptionsBuilder:
 
 cpdef TableWithMetadata read_csv(
     CsvReaderOptions options,
-    Stream stream = None,
+    object stream = None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -694,9 +695,10 @@ cpdef TableWithMetadata read_csv(
     """
     cdef table_with_metadata c_result
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = move(cpp_read_csv(options.c_obj, s.view(), mr.get_mr()))
+        c_result = move(cpp_read_csv(options.c_obj, _cs, mr.get_mr()))
 
     cdef TableWithMetadata tbl_meta = TableWithMetadata.from_libcudf(c_result, s, mr)
     return tbl_meta
@@ -882,7 +884,7 @@ cdef class CsvWriterOptionsBuilder:
 
 cpdef void write_csv(
     CsvWriterOptions options,
-    Stream stream = None,
+    object stream = None,
 ):
     """
     Write to CSV format.
@@ -900,8 +902,9 @@ cpdef void write_csv(
         CUDA stream used for device memory operations and kernel launches
     """
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     with nogil:
-        cpp_write_csv(move(options.c_obj), s.view())
+        cpp_write_csv(move(options.c_obj), _cs)
 
 
 cpdef bool is_supported_write_csv(DataType type):
diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd
index 298b36651c3..8c471831823 100644
--- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd
+++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd
@@ -32,5 +32,5 @@ cdef class FileMetaData:
 
 cdef class HybridScanReader:
     cdef unique_ptr[cpp_hybrid_scan_reader] c_obj
-    cdef Stream stream
+    cdef Stream _stream
     cdef DeviceMemoryResource mr
diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi
index 0f0429a66db..6f1fbc250d8 100644
--- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi
+++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi
@@ -4,13 +4,13 @@
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.io.parquet import ParquetReaderOptions
 from pylibcudf.io.text import ByteRangeInfo
 from pylibcudf.io.types import TableWithMetadata
 from pylibcudf.span import Span
+from pylibcudf.utils import CudaStreamLike
 
 class UseDataPageMask(IntEnum):
     YES: int
@@ -44,7 +44,7 @@ class HybridScanReader:
         self,
         row_group_indices: list[int],
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> list[int]: ...
     def secondary_filters_byte_ranges(
         self, row_group_indices: list[int], options: ParquetReaderOptions
@@ -54,20 +54,20 @@ class HybridScanReader:
         dictionary_page_data: list[Span],
         row_group_indices: list[int],
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> list[int]: ...
     def filter_row_groups_with_bloom_filters(
         self,
         bloom_filter_data: list[Span],
         row_group_indices: list[int],
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> list[int]: ...
     def build_row_mask_with_page_index_stats(
         self,
         row_group_indices: list[int],
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Column: ...
     def filter_column_chunks_byte_ranges(
@@ -80,7 +80,7 @@ class HybridScanReader:
         row_mask: Column,
         mask_data_pages: UseDataPageMask,
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> TableWithMetadata: ...
     def payload_column_chunks_byte_ranges(
@@ -93,7 +93,7 @@ class HybridScanReader:
         row_mask: Column,
         mask_data_pages: UseDataPageMask,
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> TableWithMetadata: ...
     def all_column_chunks_byte_ranges(
@@ -104,7 +104,7 @@ class HybridScanReader:
         row_group_indices: list[int],
         column_chunk_data: list[Span],
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> TableWithMetadata: ...
     def setup_chunking_for_filter_columns(
@@ -116,7 +116,7 @@ class HybridScanReader:
         mask_data_pages: UseDataPageMask,
         column_chunk_data: list[Span],
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> None: ...
     def materialize_filter_columns_chunk(
@@ -132,7 +132,7 @@ class HybridScanReader:
         mask_data_pages: UseDataPageMask,
         column_chunk_data: list[Span],
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> None: ...
     def materialize_payload_columns_chunk(
diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx
index beb28f6a1b0..4d25a05d362 100644
--- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx
+++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx
@@ -225,7 +225,7 @@ cdef class HybridScanReader:
         self,
         list row_group_indices,
         ParquetReaderOptions options,
-        Stream stream=None
+        object stream=None
     ):
         """Filter row groups using column chunk statistics.
 
@@ -243,7 +243,7 @@ cdef class HybridScanReader:
         list[int]
             Filtered row group indices
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         cdef vector[size_type] indices_vec = row_group_indices
         cdef vector[size_type] filtered = (
             self.c_obj.get()[0].filter_row_groups_with_stats(
@@ -251,7 +251,7 @@ cdef class HybridScanReader:
                     indices_vec.data(), indices_vec.size()
                 ),
                 options.c_obj,
-                stream.view()
+                _stream.view().value()
             )
         )
         return list(filtered)
@@ -295,7 +295,7 @@ cdef class HybridScanReader:
         list dictionary_page_data,
         list row_group_indices,
         ParquetReaderOptions options,
-        Stream stream=None
+        object stream=None
     ):
         """Filter row groups using column chunk dictionary pages.
 
@@ -316,7 +316,7 @@ cdef class HybridScanReader:
             Filtered row group indices
         """
         cdef vector[device_span[const_uint8_t]] spans_vec
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         for span in dictionary_page_data:
             spans_vec.push_back(_get_device_span(span))
 
@@ -329,7 +329,7 @@ cdef class HybridScanReader:
                 ),
                 host_span[const_size_type](indices_vec.data(), indices_vec.size()),
                 options.c_obj,
-                stream.view()
+                _stream.view().value()
             )
         return list(filtered)
 
@@ -338,7 +338,7 @@ cdef class HybridScanReader:
         list bloom_filter_data,
         list row_group_indices,
         ParquetReaderOptions options,
-        Stream stream=None
+        object stream=None
     ):
         """Filter row groups using column chunk bloom filters.
 
@@ -359,7 +359,7 @@ cdef class HybridScanReader:
             Filtered row group indices
         """
         cdef vector[device_span[const_uint8_t]] spans_vec
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         for span in bloom_filter_data:
             spans_vec.push_back(_get_device_span(span))
 
@@ -372,7 +372,7 @@ cdef class HybridScanReader:
                 ),
                 host_span[const_size_type](indices_vec.data(), indices_vec.size()),
                 options.c_obj,
-                stream.view()
+                _stream.view().value()
             )
         return list(filtered)
 
@@ -380,7 +380,7 @@ cdef class HybridScanReader:
         self,
         list row_group_indices,
         ParquetReaderOptions options,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         """Build a boolean column indicating surviving rows from page stats.
@@ -402,16 +402,16 @@ cdef class HybridScanReader:
             Boolean column indicating surviving rows
         """
         cdef vector[size_type] indices_vec = row_group_indices
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         mr = _get_memory_resource(mr)
         cdef unique_ptr[column] c_result = \
             self.c_obj.get()[0].build_row_mask_with_page_index_stats(
                 host_span[const_size_type](indices_vec.data(), indices_vec.size()),
                 options.c_obj,
-                stream.view(),
+                _stream.view().value(),
                 mr.get_mr()
             )
-        return Column.from_libcudf(move(c_result), stream, mr)
+        return Column.from_libcudf(move(c_result), _stream, mr)
 
     def filter_column_chunks_byte_ranges(
         self,
@@ -447,7 +447,7 @@ cdef class HybridScanReader:
         Column row_mask,
         cpp_use_data_page_mask mask_data_pages,
         ParquetReaderOptions options,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         """Materialize filter columns and update the row mask.
@@ -477,7 +477,7 @@ cdef class HybridScanReader:
         cdef vector[size_type] indices_vec = row_group_indices
 
         cdef vector[device_span[const_uint8_t]] spans_vec
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         mr = _get_memory_resource(mr)
         for span in column_chunk_data:
             spans_vec.push_back(_get_device_span(span))
@@ -492,10 +492,10 @@ cdef class HybridScanReader:
                 mask_view,
                 mask_data_pages,
                 options.c_obj,
-                stream.view(),
+                _stream.view().value(),
                 mr.get_mr()
             )
-        return TableWithMetadata.from_libcudf(c_result, stream, mr)
+        return TableWithMetadata.from_libcudf(c_result, _stream, mr)
 
     def payload_column_chunks_byte_ranges(
         self,
@@ -531,7 +531,7 @@ cdef class HybridScanReader:
         Column row_mask,
         cpp_use_data_page_mask mask_data_pages,
         ParquetReaderOptions options,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         """Materialize payload columns and apply the row mask.
@@ -561,7 +561,7 @@ cdef class HybridScanReader:
         cdef vector[size_type] indices_vec = row_group_indices
 
         cdef vector[device_span[const_uint8_t]] spans_vec
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         mr = _get_memory_resource(mr)
         for span in column_chunk_data:
             spans_vec.push_back(_get_device_span(span))
@@ -576,10 +576,10 @@ cdef class HybridScanReader:
                 mask_view,
                 mask_data_pages,
                 options.c_obj,
-                stream.view(),
+                _stream.view().value(),
                 mr.get_mr()
             )
-        return TableWithMetadata.from_libcudf(c_result, stream, mr)
+        return TableWithMetadata.from_libcudf(c_result, _stream, mr)
 
     def all_column_chunks_byte_ranges(
         self,
@@ -613,7 +613,7 @@ cdef class HybridScanReader:
         list row_group_indices,
         list column_chunk_data,
         ParquetReaderOptions options,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         """Materialize all columns.
@@ -639,7 +639,7 @@ cdef class HybridScanReader:
         cdef vector[size_type] indices_vec = row_group_indices
 
         cdef vector[device_span[const_uint8_t]] spans_vec
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         mr = _get_memory_resource(mr)
         for span in column_chunk_data:
             spans_vec.push_back(_get_device_span(span))
@@ -650,10 +650,10 @@ cdef class HybridScanReader:
                     <const_device_span_const_uint8_t*>spans_vec.data(), spans_vec.size()
                 ),
                 options.c_obj,
-                stream.view(),
+                _stream.view().value(),
                 mr.get_mr()
             )
-        return TableWithMetadata.from_libcudf(c_result, stream, mr)
+        return TableWithMetadata.from_libcudf(c_result, _stream, mr)
 
     def setup_chunking_for_filter_columns(
         self,
@@ -664,7 +664,7 @@ cdef class HybridScanReader:
         cpp_use_data_page_mask mask_data_pages,
         list column_chunk_data,
         ParquetReaderOptions options,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         """Setup chunking information for filter columns.
@@ -696,7 +696,7 @@ cdef class HybridScanReader:
         for span in column_chunk_data:
             spans_vec.push_back(_get_device_span(span))
 
-        self.stream = _get_stream(stream)
+        self._stream = _get_stream(stream)
         self.mr = _get_memory_resource(mr)
 
         cdef column_view mask_view = row_mask.view()
@@ -710,7 +710,7 @@ cdef class HybridScanReader:
                 <const_device_span_const_uint8_t*>spans_vec.data(), spans_vec.size()
             ),
             options.c_obj,
-            self.stream.view(),
+            self._stream.view().value(),
             self.mr.get_mr()
         )
 
@@ -735,7 +735,7 @@ cdef class HybridScanReader:
                 mask_view
             )
         return TableWithMetadata.from_libcudf(
-            c_result, self.stream, self.mr
+            c_result, self._stream, self.mr
         )
 
     def setup_chunking_for_payload_columns(
@@ -747,7 +747,7 @@ cdef class HybridScanReader:
         cpp_use_data_page_mask mask_data_pages,
         list column_chunk_data,
         ParquetReaderOptions options,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         """Setup chunking information for payload columns.
@@ -779,7 +779,7 @@ cdef class HybridScanReader:
         for span in column_chunk_data:
             spans_vec.push_back(_get_device_span(span))
 
-        self.stream = _get_stream(stream)
+        self._stream = _get_stream(stream)
         self.mr = _get_memory_resource(mr)
 
         cdef column_view mask_view = row_mask.view()
@@ -793,7 +793,7 @@ cdef class HybridScanReader:
                 <const_device_span_const_uint8_t*>spans_vec.data(), spans_vec.size()
             ),
             options.c_obj,
-            self.stream.view(),
+            self._stream.view().value(),
             self.mr.get_mr()
         )
 
@@ -818,7 +818,7 @@ cdef class HybridScanReader:
                 mask_view
             )
         return TableWithMetadata.from_libcudf(
-            c_result, self.stream, self.mr
+            c_result, self._stream, self.mr
         )
 
     def construct_row_group_passes(
diff --git a/python/pylibcudf/pylibcudf/io/json.pxd b/python/pylibcudf/pylibcudf/io/json.pxd
index 96bc102ef0b..e46942ea14b 100644
--- a/python/pylibcudf/pylibcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/io/json.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp cimport bool
 from libcpp.map cimport map
 from libcpp.vector cimport vector
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from pylibcudf.io.types cimport (
@@ -83,7 +82,7 @@ cdef class JsonReaderOptionsBuilder:
     cpdef build(self)
 
 cpdef TableWithMetadata read_json(
-    JsonReaderOptions options, Stream stream = *, DeviceMemoryResource mr = *
+    JsonReaderOptions options, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef TableWithMetadata read_json_from_string_column(
@@ -93,7 +92,7 @@ cpdef TableWithMetadata read_json_from_string_column(
     list dtypes = *,
     compression_type compression = *,
     json_recovery_mode_t recovery_mode = *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *)
 
 cdef class JsonWriterOptions:
@@ -117,13 +116,13 @@ cdef class JsonWriterOptionsBuilder:
     cpdef JsonWriterOptionsBuilder utf8_escaped(self, bool val)
     cpdef JsonWriterOptions build(self)
 
-cpdef void write_json(JsonWriterOptions options, Stream stream = *)
+cpdef void write_json(JsonWriterOptions options, object stream = *)
 
 cpdef bool is_supported_write_json(DataType type)
 
 cpdef tuple chunked_read_json(
     JsonReaderOptions options,
     int chunk_size= *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi
index f19da874a0d..a03d8ef407c 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyi
+++ b/python/pylibcudf/pylibcudf/io/json.pyi
@@ -4,7 +4,6 @@ from collections.abc import Mapping
 from typing import Self, TypeAlias
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.io.types import (
@@ -17,6 +16,7 @@ from pylibcudf.io.types import (
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 ChildNameToTypeMap: TypeAlias = Mapping[str, ChildNameToTypeMap]
 
@@ -73,7 +73,7 @@ class JsonReaderOptionsBuilder:
 
 def read_json(
     options: JsonReaderOptions,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> TableWithMetadata: ...
 def read_json_from_string_column(
@@ -83,7 +83,7 @@ def read_json_from_string_column(
     dtypes: list | None = None,
     compression: CompressionType = CompressionType.NONE,
     recovery_mode: JSONRecoveryMode = JSONRecoveryMode.RECOVER_WITH_NULL,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> TableWithMetadata: ...
 
@@ -105,12 +105,12 @@ class JsonWriterOptionsBuilder:
     def build(self) -> JsonWriterOptions: ...
 
 def write_json(
-    options: JsonWriterOptions, stream: Stream | None = None
+    options: JsonWriterOptions, stream: CudaStreamLike | None = None
 ) -> None: ...
 def chunked_read_json(
     options: JsonReaderOptions,
     chunk_size: int = 100_000_000,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[list[Column], list[str], ChildNameToTypeMap]: ...
 def is_supported_write_json(type: DataType) -> bool: ...
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index aa66c6fe5c2..1bce364fdd8 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp cimport bool
 from libcpp.map cimport map
@@ -49,6 +49,7 @@ from pylibcudf.utils cimport _get_stream
 from cython.operator import dereference
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "chunked_read_json",
@@ -704,7 +705,7 @@ cdef class JsonReaderOptionsBuilder:
 cpdef tuple chunked_read_json(
     JsonReaderOptions options,
     int chunk_size=100_000_000,
-    Stream stream = None,
+    object stream = None,
     DeviceMemoryResource mr = None,
 ):
     """
@@ -735,6 +736,7 @@ cpdef tuple chunked_read_json(
     child_names = None
     i = 0
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     mr = _get_memory_resource(mr)
     while True:
         options.enable_lines(True)
@@ -743,7 +745,7 @@ cpdef tuple chunked_read_json(
 
         try:
             with nogil:
-                c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr()))
+                c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr()))
         except (ValueError, OverflowError):
             break
         if meta_names is None:
@@ -772,7 +774,7 @@ cpdef tuple chunked_read_json(
 
 cpdef TableWithMetadata read_json(
     JsonReaderOptions options,
-    Stream stream = None,
+    object stream = None,
     DeviceMemoryResource mr = None
 ):
     """
@@ -797,9 +799,10 @@ cpdef TableWithMetadata read_json(
     """
     cdef table_with_metadata c_result
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr()))
+        c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr()))
 
     return TableWithMetadata.from_libcudf(c_result, s, mr)
 
@@ -810,7 +813,7 @@ cpdef TableWithMetadata read_json_from_string_column(
     list dtypes = None,
     compression_type compression = compression_type.NONE,
     json_recovery_mode_t recovery_mode = json_recovery_mode_t.RECOVER_WITH_NULL,
-    Stream stream = None,
+    object stream = None,
     DeviceMemoryResource mr = None
 ):
     """
@@ -852,7 +855,8 @@ cpdef TableWithMetadata read_json_from_string_column(
     cdef unique_ptr[column] c_join_string_column
     cdef column_contents c_contents
     cdef table_with_metadata c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     # Join the string column into a single string
@@ -862,7 +866,7 @@ cpdef TableWithMetadata read_json_from_string_column(
                 input.view(),
                 dereference(c_separator),
                 dereference(c_narep),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
@@ -870,7 +874,7 @@ cpdef TableWithMetadata read_json_from_string_column(
 
     # Create a new source from the joined string data
     cdef SourceInfo joined_source = SourceInfo(
-            [DeviceBuffer.c_from_unique_ptr(move(c_contents.data), stream, mr)])
+            [DeviceBuffer.c_from_unique_ptr(move(c_contents.data), _stream, mr)])
 
     # Create new options using the joined string as source
     cdef JsonReaderOptions options = (
@@ -886,9 +890,9 @@ cpdef TableWithMetadata read_json_from_string_column(
 
     # Read JSON from the joined string
     with nogil:
-        c_result = move(cpp_read_json(options.c_obj, stream.view(), mr.get_mr()))
+        c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr()))
 
-    return TableWithMetadata.from_libcudf(c_result, stream, mr)
+    return TableWithMetadata.from_libcudf(c_result, _stream, mr)
 
 cdef class JsonWriterOptions:
     """
@@ -1090,7 +1094,7 @@ cdef class JsonWriterOptionsBuilder:
         return json_options
 
 
-cpdef void write_json(JsonWriterOptions options, Stream stream = None):
+cpdef void write_json(JsonWriterOptions options, object stream = None):
     """
     Writes a set of columns to JSON format.
 
@@ -1106,8 +1110,9 @@ cpdef void write_json(JsonWriterOptions options, Stream stream = None):
     None
     """
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     with nogil:
-        cpp_write_json(options.c_obj, s.view())
+        cpp_write_json(options.c_obj, _cs)
 
 cpdef bool is_supported_write_json(DataType type):
     """Check if the dtype is supported for JSON writing
diff --git a/python/pylibcudf/pylibcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/io/orc.pxd
index 24221163917..72ad5aac534 100644
--- a/python/pylibcudf/pylibcudf/io/orc.pxd
+++ b/python/pylibcudf/pylibcudf/io/orc.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libc.stdint cimport uint64_t, int64_t
 
@@ -9,7 +9,6 @@ from libcpp.optional cimport optional
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from pylibcudf.io.types cimport (
@@ -65,7 +64,7 @@ cdef class OrcReaderOptionsBuilder:
     cpdef OrcReaderOptions build(self)
 
 cpdef TableWithMetadata read_orc(
-    OrcReaderOptions options, Stream stream = *, DeviceMemoryResource mr=*
+    OrcReaderOptions options, object stream = *, DeviceMemoryResource mr=*
 )
 
 cdef class OrcColumnStatistics:
@@ -89,7 +88,7 @@ cdef class ParsedOrcStatistics:
 
 cpdef ParsedOrcStatistics read_parsed_orc_statistics(
     SourceInfo source_info,
-    Stream stream=*
+    object stream = *
 )
 
 cdef class OrcWriterOptions:
@@ -110,7 +109,7 @@ cdef class OrcWriterOptionsBuilder:
     cpdef OrcWriterOptionsBuilder metadata(self, TableInputMetadata meta)
     cpdef OrcWriterOptions build(self)
 
-cpdef void write_orc(OrcWriterOptions options, Stream stream = *)
+cpdef void write_orc(OrcWriterOptions options, object stream = *)
 
 cdef class OrcChunkedWriter:
     cdef unique_ptr[orc_chunked_writer] c_obj
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyi b/python/pylibcudf/pylibcudf/io/orc.pyi
index dcf2b731bac..3cb6daff240 100644
--- a/python/pylibcudf/pylibcudf/io/orc.pyi
+++ b/python/pylibcudf/pylibcudf/io/orc.pyi
@@ -4,7 +4,6 @@
 from typing import Any, Self
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.io.types import (
     CompressionType,
@@ -16,6 +15,7 @@ from pylibcudf.io.types import (
 )
 from pylibcudf.table import Table
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 class OrcReaderOptions:
     def set_num_rows(self, nrows: int) -> None: ...
@@ -34,7 +34,7 @@ class OrcReaderOptionsBuilder:
 
 def read_orc(
     options: OrcReaderOptions,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> TableWithMetadata: ...
 
@@ -59,7 +59,7 @@ class ParsedOrcStatistics:
 
 def read_parsed_orc_statistics(
     source_info: SourceInfo,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> ParsedOrcStatistics: ...
 
 class OrcWriterOptions:
@@ -79,7 +79,7 @@ class OrcWriterOptionsBuilder:
     def build(self) -> OrcWriterOptions: ...
 
 def write_orc(
-    options: OrcWriterOptions, stream: Stream | None = None
+    options: OrcWriterOptions, stream: CudaStreamLike | None = None
 ) -> None: ...
 def is_supported_read_orc(compression: CompressionType) -> bool: ...
 def is_supported_write_orc(compression: CompressionType) -> bool: ...
@@ -90,7 +90,7 @@ class OrcChunkedWriter:
     def write(self, table: Table) -> None: ...
     @staticmethod
     def from_options(
-        options: ChunkedOrcWriterOptions, stream: Stream | None = None
+        options: ChunkedOrcWriterOptions, stream: CudaStreamLike | None = None
     ) -> OrcChunkedWriter: ...
 
 class ChunkedOrcWriterOptions:
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx
index 8c3687ec232..3a2fabc5683 100644
--- a/python/pylibcudf/pylibcudf/io/orc.pyx
+++ b/python/pylibcudf/pylibcudf/io/orc.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp cimport bool
 from libcpp.string cimport string
@@ -8,6 +8,7 @@ from libcpp.vector cimport vector
 import datetime
 
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from pylibcudf.io.types cimport SourceInfo, TableWithMetadata, SinkInfo
@@ -444,7 +445,7 @@ cdef class OrcReaderOptionsBuilder:
 
 
 cpdef TableWithMetadata read_orc(
-    OrcReaderOptions options, Stream stream = None, DeviceMemoryResource mr=None
+    OrcReaderOptions options, object stream = None, DeviceMemoryResource mr=None
 ):
     """
     Read from ORC format.
@@ -465,17 +466,17 @@ cpdef TableWithMetadata read_orc(
     """
     cdef table_with_metadata c_result
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     mr = _get_memory_resource(mr)
-
     with nogil:
-        c_result = move(cpp_read_orc(options.c_obj, s.view(), mr.get_mr()))
+        c_result = move(cpp_read_orc(options.c_obj, _cs, mr.get_mr()))
 
     return TableWithMetadata.from_libcudf(c_result, s, mr)
 
 
 cpdef ParsedOrcStatistics read_parsed_orc_statistics(
     SourceInfo source_info,
-    Stream stream=None
+    object stream=None
 ):
     """
     Read ORC statistics from a source.
@@ -494,8 +495,9 @@ cpdef ParsedOrcStatistics read_parsed_orc_statistics(
     """
     cdef Stream s = _get_stream(stream)
     cdef parsed_orc_statistics parsed
+    cdef cudaStream_t _cs = s.view().value()
     with nogil:
-        parsed = cpp_read_parsed_orc_statistics(source_info.c_obj, s.view())
+        parsed = cpp_read_parsed_orc_statistics(source_info.c_obj, _cs)
     return ParsedOrcStatistics.from_libcudf(parsed)
 
 
@@ -667,7 +669,7 @@ cdef class OrcWriterOptionsBuilder:
         return orc_options
 
 
-cpdef void write_orc(OrcWriterOptions options, Stream stream = None):
+cpdef void write_orc(OrcWriterOptions options, object stream = None):
     """
     Write to ORC format.
 
@@ -688,8 +690,9 @@ cpdef void write_orc(OrcWriterOptions options, Stream stream = None):
     None
     """
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     with nogil:
-        cpp_write_orc(move(options.c_obj), s.view())
+        cpp_write_orc(move(options.c_obj), _cs)
 
 
 cdef class OrcChunkedWriter:
@@ -721,7 +724,7 @@ cdef class OrcChunkedWriter:
             self.c_obj.get()[0].write(table.view())
 
     @staticmethod
-    def from_options(ChunkedOrcWriterOptions options, Stream stream = None):
+    def from_options(ChunkedOrcWriterOptions options, object stream = None):
         """
         Creates a chunked ORC writer from options
 
@@ -740,7 +743,8 @@ cdef class OrcChunkedWriter:
             OrcChunkedWriter
         )
         cdef Stream s = _get_stream(stream)
-        orc_writer.c_obj.reset(new orc_chunked_writer(options.c_obj, s.view()))
+        cdef cudaStream_t _cs = s.view().value()
+        orc_writer.c_obj.reset(new orc_chunked_writer(options.c_obj, _cs))
         return orc_writer
 
 
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd
index d9350f77721..c98a90dd692 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pxd
+++ b/python/pylibcudf/pylibcudf/io/parquet.pxd
@@ -6,8 +6,8 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
+from rmm.pylibrmm.stream cimport Stream
 
 from pylibcudf.expressions cimport Expression
 
@@ -74,7 +74,7 @@ cdef class ParquetReaderOptionsBuilder:
 
 
 cdef class ChunkedParquetReader:
-    cdef readonly Stream stream
+    cdef Stream _stream
     cdef DeviceMemoryResource mr
     cdef unique_ptr[cpp_chunked_parquet_reader] reader
 
@@ -83,7 +83,7 @@ cdef class ChunkedParquetReader:
 
 
 cpdef read_parquet(
-    ParquetReaderOptions options, Stream stream = *, DeviceMemoryResource mr=*
+    ParquetReaderOptions options, object stream = *, DeviceMemoryResource mr=*
 )
 
 
@@ -180,7 +180,7 @@ cdef class ParquetWriterOptionsBuilder:
 
     cpdef ParquetWriterOptions build(self)
 
-cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = *)
+cpdef memoryview write_parquet(ParquetWriterOptions options, object stream = *)
 
 cpdef bool is_supported_read_parquet(compression_type compression)
 
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyi b/python/pylibcudf/pylibcudf/io/parquet.pyi
index c0c31e22007..f0a092f63e0 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyi
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyi
@@ -5,7 +5,6 @@ from collections.abc import Mapping, Sequence
 from typing import Self
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.expressions import Expression
 from pylibcudf.io.types import (
@@ -20,6 +19,7 @@ from pylibcudf.io.types import (
 )
 from pylibcudf.table import Table
 from pylibcudf.types import TypeId
+from pylibcudf.utils import CudaStreamLike
 
 class ParquetReaderOptions:
     def __init__(self): ...
@@ -53,7 +53,7 @@ class ChunkedParquetReader:
     def __init__(
         self,
         options: ParquetReaderOptions,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         chunk_read_limit: int = 0,
         pass_read_limit: int = 1024000000,
     ) -> None: ...
@@ -62,7 +62,7 @@ class ChunkedParquetReader:
 
 def read_parquet(
     options: ParquetReaderOptions,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> TableWithMetadata: ...
 
@@ -101,7 +101,7 @@ class ParquetWriterOptionsBuilder:
     def build(self) -> ParquetWriterOptions: ...
 
 def write_parquet(
-    options: ParquetWriterOptions, stream: Stream | None = None
+    options: ParquetWriterOptions, stream: CudaStreamLike | None = None
 ) -> memoryview: ...
 def is_supported_read_parquet(compression: CompressionType) -> bool: ...
 def is_supported_write_parquet(compression: CompressionType) -> bool: ...
@@ -112,7 +112,8 @@ class ChunkedParquetWriter:
     def write(self, table: Table, partitions_info: object = None) -> None: ...
     @staticmethod
     def from_options(
-        options: ChunkedParquetWriterOptions, stream: Stream | None = None
+        options: ChunkedParquetWriterOptions,
+        stream: CudaStreamLike | None = None,
     ) -> ChunkedParquetWriter: ...
 
 class ChunkedParquetWriterOptions:
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx
index c4bad082304..86904513cfa 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyx
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyx
@@ -46,6 +46,7 @@ from pylibcudf.libcudf.io.types cimport (
 from pylibcudf.libcudf.types cimport size_type, type_id
 from pylibcudf.table cimport Table
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "ChunkedParquetReader",
@@ -507,20 +508,21 @@ cdef class ChunkedParquetReader:
     def __init__(
         self,
         ParquetReaderOptions options,
-        Stream stream = None,
+        object stream = None,
         DeviceMemoryResource mr = None,
         size_t chunk_read_limit=0,
         size_t pass_read_limit=1024000000,
     ):
-        self.stream = _get_stream(stream)
+        self._stream = _get_stream(stream)
         self.mr = _get_memory_resource(mr)
+        cdef cudaStream_t stream_view = self._stream.view().value()
         with nogil:
             self.reader.reset(
                 new cpp_chunked_parquet_reader(
                     chunk_read_limit,
                     pass_read_limit,
                     options.c_obj,
-                    self.stream.view(),
+                    stream_view,
                     self.mr.get_mr()
                 )
             )
@@ -560,11 +562,11 @@ cdef class ChunkedParquetReader:
         with nogil:
             c_result = move(self.reader.get()[0].read_chunk())
 
-        return TableWithMetadata.from_libcudf(c_result, self.stream, mr)
+        return TableWithMetadata.from_libcudf(c_result, self._stream, mr)
 
 
 cpdef read_parquet(
-    ParquetReaderOptions options, Stream stream = None, DeviceMemoryResource mr=None
+    ParquetReaderOptions options, object stream = None, DeviceMemoryResource mr=None
 ):
     """
     Read from Parquet format.
@@ -584,9 +586,10 @@ cpdef read_parquet(
         Device memory resource used to allocate the returned table's device memory.
     """
     cdef Stream s = _get_stream(stream)
+    cdef cudaStream_t _cs = s.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = move(cpp_read_parquet(options.c_obj, s.view(), mr.get_mr()))
+        c_result = move(cpp_read_parquet(options.c_obj, _cs, mr.get_mr()))
 
     return TableWithMetadata.from_libcudf(c_result, s, mr)
 
@@ -640,7 +643,7 @@ cdef class ChunkedParquetWriter:
             self.c_obj.get()[0].write(table.view(), partitions)
 
     @staticmethod
-    def from_options(ChunkedParquetWriterOptions options, Stream stream = None):
+    def from_options(ChunkedParquetWriterOptions options, object stream = None):
         """
         Creates a chunked Parquet writer from options
 
@@ -659,8 +662,9 @@ cdef class ChunkedParquetWriter:
             ChunkedParquetWriter
         )
         cdef Stream s = _get_stream(stream)
+        cdef cudaStream_t _cs = s.view().value()
         parquet_writer.c_obj.reset(
-            new cpp_chunked_parquet_writer(options.c_obj, s.view())
+            new cpp_chunked_parquet_writer(options.c_obj, _cs)
         )
         return parquet_writer
 
@@ -1235,7 +1239,7 @@ cdef class ParquetWriterOptionsBuilder:
         return parquet_options
 
 
-cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = None):
+cpdef memoryview write_parquet(ParquetWriterOptions options, object stream = None):
     """
     Writes a set of columns to parquet format.
 
@@ -1255,9 +1259,9 @@ cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = Non
     """
     cdef unique_ptr[vector[uint8_t]] c_result
     cdef Stream s = _get_stream(stream)
-
+    cdef cudaStream_t _cs = s.view().value()
     with nogil:
-        c_result = cpp_write_parquet(move(options.c_obj), s.view())
+        c_result = cpp_write_parquet(move(options.c_obj), _cs)
 
     return memoryview(HostBuffer.from_unique_ptr(move(c_result)))
 
diff --git a/python/pylibcudf/pylibcudf/io/text.pxd b/python/pylibcudf/pylibcudf/io/text.pxd
index 7623c8da26b..5276f9ffaba 100644
--- a/python/pylibcudf/pylibcudf/io/text.pxd
+++ b/python/pylibcudf/pylibcudf/io/text.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.column cimport Column
-from pylibcudf.io.types cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from pylibcudf.libcudf.io.text cimport parse_options, data_chunk_source, byte_range_info
 
@@ -23,7 +22,7 @@ cpdef Column multibyte_split(
     DataChunkSource source,
     str delimiter,
     ParseOptions options=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
diff --git a/python/pylibcudf/pylibcudf/io/text.pyi b/python/pylibcudf/pylibcudf/io/text.pyi
index 66406c94dd2..581e45c3194 100644
--- a/python/pylibcudf/pylibcudf/io/text.pyi
+++ b/python/pylibcudf/pylibcudf/io/text.pyi
@@ -1,10 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 class ByteRangeInfo:
     def __init__(self, offset: int, size: int) -> None: ...
@@ -35,6 +35,6 @@ def multibyte_split(
     source: DataChunkSource,
     delimiter: str,
     options: ParseOptions | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/io/text.pyx b/python/pylibcudf/pylibcudf/io/text.pyx
index 9fb220b0a37..be15701a4d8 100644
--- a/python/pylibcudf/pylibcudf/io/text.pyx
+++ b/python/pylibcudf/pylibcudf/io/text.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -9,10 +9,11 @@ from libcpp.utility cimport move
 
 from pylibcudf.column cimport Column
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
-from pylibcudf.io.types cimport Stream
+from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.io cimport text as cpp_text
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "ByteRangeInfo",
@@ -193,7 +194,7 @@ cpdef Column multibyte_split(
     DataChunkSource source,
     str delimiter,
     ParseOptions options=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -224,7 +225,8 @@ cpdef Column multibyte_split(
     cdef unique_ptr[column] c_result
     cdef unique_ptr[data_chunk_source] c_source = move(source.c_source)
     cdef string c_delimiter = delimiter.encode()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if options is None:
@@ -237,8 +239,8 @@ cpdef Column multibyte_split(
             dereference(c_source),
             c_delimiter,
             c_options,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/io/timezone.pxd
index a2fa33d102d..9a12be928b2 100644
--- a/python/pylibcudf/pylibcudf/io/timezone.pxd
+++ b/python/pylibcudf/pylibcudf/io/timezone.pxd
@@ -1,11 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from ..table cimport Table
-from .types cimport Stream
+
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
 cpdef Table make_timezone_transition_table(
-    str tzif_dir, str timezone_name, Stream stream=*, DeviceMemoryResource mr=*
+    str tzif_dir, str timezone_name, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyi b/python/pylibcudf/pylibcudf/io/timezone.pyi
index d83f68424b4..f87dda70f70 100644
--- a/python/pylibcudf/pylibcudf/io/timezone.pyi
+++ b/python/pylibcudf/pylibcudf/io/timezone.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def make_timezone_transition_table(
     tzif_dir: str,
     timezone_name: str,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx
index 0416df1cf0b..033ed15a1ba 100644
--- a/python/pylibcudf/pylibcudf/io/timezone.pyx
+++ b/python/pylibcudf/pylibcudf/io/timezone.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -12,13 +12,14 @@ from pylibcudf.libcudf.table.table cimport table
 
 from ..utils cimport _get_stream, _get_memory_resource
 from ..table cimport Table
-from .types cimport Stream
+from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["make_timezone_transition_table"]
 
 cpdef Table make_timezone_transition_table(
-    str tzif_dir, str timezone_name, Stream stream=None, DeviceMemoryResource mr=None,
+    str tzif_dir, str timezone_name, object stream=None, DeviceMemoryResource mr=None,
 ):
     """
     Creates a transition table to convert ORC timestamps to UTC.
@@ -42,15 +43,16 @@ cpdef Table make_timezone_transition_table(
     cdef unique_ptr[table] c_result
     cdef string c_tzdir = tzif_dir.encode()
     cdef string c_tzname = timezone_name.encode()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_make_timezone_transition_table(
             make_optional[string](c_tzdir),
             c_tzname,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/io/types.pxd b/python/pylibcudf/pylibcudf/io/types.pxd
index db7e2ad95c5..1e52f4faa05 100644
--- a/python/pylibcudf/pylibcudf/io/types.pxd
+++ b/python/pylibcudf/pylibcudf/io/types.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libc.stdint cimport uint8_t, int32_t
 
@@ -29,7 +29,6 @@ from pylibcudf.libcudf.utilities.span cimport host_span
 
 from pylibcudf.table cimport Table
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 cdef class PartitionInfo:
@@ -86,7 +85,7 @@ cdef class TableWithMetadata:
 
     @staticmethod
     cdef TableWithMetadata from_libcudf(
-        table_with_metadata& tbl, Stream stream, DeviceMemoryResource mr
+        table_with_metadata& tbl, object stream, DeviceMemoryResource mr
     )
 
 cdef class SourceInfo:
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 1c4a7f49268..27c3bb47caf 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -33,7 +33,6 @@ from pylibcudf.libcudf.utilities.span cimport device_span, host_span
 from pylibcudf.span import is_span
 
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 import codecs
 import errno
@@ -396,7 +395,7 @@ cdef class TableWithMetadata:
     @staticmethod
     cdef TableWithMetadata from_libcudf(
         table_with_metadata& tbl_with_meta,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr
     ):
         """Create a Python TableWithMetadata from a libcudf table_with_metadata"""
diff --git a/python/pylibcudf/pylibcudf/join.pxd b/python/pylibcudf/pylibcudf/join.pxd
index 31a998029e3..f0b69a42621 100644
--- a/python/pylibcudf/pylibcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/join.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf cimport join as cpp_join
 from pylibcudf.libcudf.types cimport null_equality
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
@@ -16,7 +15,7 @@ cpdef tuple inner_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -24,7 +23,7 @@ cpdef tuple left_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -32,7 +31,7 @@ cpdef tuple full_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -40,7 +39,7 @@ cpdef Column left_semi_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -48,19 +47,19 @@ cpdef Column left_anti_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Table cross_join(
-    Table left, Table right, Stream stream=*, DeviceMemoryResource mr=*
+    Table left, Table right, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef tuple conditional_inner_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -68,7 +67,7 @@ cpdef tuple conditional_left_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -76,7 +75,7 @@ cpdef tuple conditional_full_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -84,7 +83,7 @@ cpdef Column conditional_left_semi_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -92,7 +91,7 @@ cpdef Column conditional_left_anti_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -103,7 +102,7 @@ cpdef tuple mixed_inner_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -114,7 +113,7 @@ cpdef tuple mixed_left_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -125,7 +124,7 @@ cpdef tuple mixed_full_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -136,7 +135,7 @@ cpdef Column mixed_left_semi_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -147,7 +146,7 @@ cpdef Column mixed_left_anti_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
diff --git a/python/pylibcudf/pylibcudf/join.pyi b/python/pylibcudf/pylibcudf/join.pyi
index 615eb914618..1cf86c7c704 100644
--- a/python/pylibcudf/pylibcudf/join.pyi
+++ b/python/pylibcudf/pylibcudf/join.pyi
@@ -4,12 +4,12 @@
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.expressions import Expression
 from pylibcudf.table import Table
 from pylibcudf.types import NullEquality
+from pylibcudf.utils import CudaStreamLike
 
 class SetAsBuildTable(IntEnum):
     LEFT = ...
@@ -19,76 +19,76 @@ def inner_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def left_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def full_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def left_semi_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def left_anti_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def cross_join(
     left: Table,
     right: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def conditional_inner_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_left_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_full_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_left_semi_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def conditional_left_anti_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def mixed_inner_join(
@@ -98,7 +98,7 @@ def mixed_inner_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_left_join(
@@ -108,7 +108,7 @@ def mixed_left_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_full_join(
@@ -118,7 +118,7 @@ def mixed_full_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_left_semi_join(
@@ -128,7 +128,7 @@ def mixed_left_semi_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def mixed_left_anti_join(
@@ -138,7 +138,7 @@ def mixed_left_anti_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 
@@ -148,17 +148,17 @@ class FilteredJoin:
         build: Table,
         compare_nulls: NullEquality,
         load_factor: float = ...,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> None: ...
     def semi_join(
         self,
         probe: Table,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Column: ...
     def anti_join(
         self,
         probe: Table,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index 61a321b27a8..78a44554dff 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -22,6 +22,7 @@ from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
 
 from pylibcudf.libcudf.join import set_as_build_table as SetAsBuildTable  # no-cython-lint  # noqa: F401, deprecated
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "conditional_full_join",
@@ -45,9 +46,10 @@ __all__ = [
 ]
 
 cdef Column _column_from_gather_map(
-    cpp_join.gather_map_type gather_map, Stream stream, DeviceMemoryResource mr
+    cpp_join.gather_map_type gather_map, object stream, DeviceMemoryResource mr
 ):
     # helper to convert a gather map to a Column
+    cdef Stream _stream = _get_stream(stream)
     return Column.from_libcudf(
         move(
             make_unique[column](
@@ -55,9 +57,7 @@ cdef Column _column_from_gather_map(
                 device_buffer(),
                 0
             )
-        ),
-        stream,
-        mr
+        ), _stream, mr
     )
 
 
@@ -65,7 +65,7 @@ cpdef tuple inner_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform an inner join between two tables.
@@ -89,16 +89,21 @@ cpdef tuple inner_join(
     """
     cdef cpp_join.gather_map_pair_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.inner_join(
-            left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr()
+            left_keys.view(),
+            right_keys.view(),
+            nulls_equal,
+            _cs,
+            mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -106,7 +111,7 @@ cpdef tuple left_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a left join between two tables.
@@ -130,16 +135,21 @@ cpdef tuple left_join(
     """
     cdef cpp_join.gather_map_pair_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.left_join(
-            left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr()
+            left_keys.view(),
+            right_keys.view(),
+            nulls_equal,
+            _cs,
+            mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -147,7 +157,7 @@ cpdef tuple full_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a full join between two tables.
@@ -171,16 +181,21 @@ cpdef tuple full_join(
     """
     cdef cpp_join.gather_map_pair_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.full_join(
-            left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr()
+            left_keys.view(),
+            right_keys.view(),
+            nulls_equal,
+            _cs,
+            mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -188,7 +203,7 @@ cpdef Column left_semi_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a left semi join between two tables.
@@ -211,7 +226,8 @@ cpdef Column left_semi_join(
     """
     cdef cpp_join.gather_map_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     cdef unique_ptr[cpp_join.filtered_join] join_obj
@@ -221,22 +237,22 @@ cpdef Column left_semi_join(
             new cpp_join.filtered_join(
                 right_keys.view(),
                 nulls_equal,
-                stream.view()
+                _cs
             )
         )
         c_result = join_obj.get()[0].semi_join(
             left_keys.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return _column_from_gather_map(move(c_result), stream, mr)
+    return _column_from_gather_map(move(c_result), _stream, mr)
 
 
 cpdef Column left_anti_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a left anti join between two tables.
@@ -259,7 +275,8 @@ cpdef Column left_anti_join(
     """
     cdef cpp_join.gather_map_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     cdef unique_ptr[cpp_join.filtered_join] join_obj
@@ -269,19 +286,19 @@ cpdef Column left_anti_join(
             new cpp_join.filtered_join(
                 right_keys.view(),
                 nulls_equal,
-                stream.view()
+                _cs
             )
         )
         c_result = join_obj.get()[0].anti_join(
             left_keys.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return _column_from_gather_map(move(c_result), stream, mr)
+    return _column_from_gather_map(move(c_result), _stream, mr)
 
 
 cpdef Table cross_join(
-    Table left, Table right, Stream stream=None, DeviceMemoryResource mr=None
+    Table left, Table right, object stream=None, DeviceMemoryResource mr=None
 ):
     """Perform a cross join on two tables.
 
@@ -305,21 +322,22 @@ cpdef Table cross_join(
     """
     cdef unique_ptr[table] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_join.cross_join(
-            left.view(), right.view(), stream.view(), mr.get_mr()
+            left.view(), right.view(), _cs, mr.get_mr()
         )
-    return Table.from_libcudf(move(result), stream, mr)
+    return Table.from_libcudf(move(result), _stream, mr)
 
 
 cpdef tuple conditional_inner_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a conditional inner join between two tables.
@@ -344,7 +362,8 @@ cpdef tuple conditional_inner_join(
     cdef cpp_join.gather_map_pair_type c_result
     cdef optional[size_t] output_size
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -353,12 +372,12 @@ cpdef tuple conditional_inner_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -366,7 +385,7 @@ cpdef tuple conditional_left_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a conditional left join between two tables.
@@ -391,7 +410,8 @@ cpdef tuple conditional_left_join(
     cdef cpp_join.gather_map_pair_type c_result
     cdef optional[size_t] output_size
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -400,12 +420,12 @@ cpdef tuple conditional_left_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -413,7 +433,7 @@ cpdef tuple conditional_full_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a conditional full join between two tables.
@@ -437,7 +457,8 @@ cpdef tuple conditional_full_join(
     """
     cdef cpp_join.gather_map_pair_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -445,12 +466,12 @@ cpdef tuple conditional_full_join(
             left.view(),
             right.view(),
             dereference(binary_predicate.c_obj.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -458,7 +479,7 @@ cpdef Column conditional_left_semi_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a conditional left semi join between two tables.
@@ -482,7 +503,8 @@ cpdef Column conditional_left_semi_join(
     cdef cpp_join.gather_map_type c_result
     cdef optional[size_t] output_size
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -491,17 +513,17 @@ cpdef Column conditional_left_semi_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return _column_from_gather_map(move(c_result), stream, mr)
+    return _column_from_gather_map(move(c_result), _stream, mr)
 
 
 cpdef Column conditional_left_anti_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a conditional left anti join between two tables.
@@ -525,7 +547,8 @@ cpdef Column conditional_left_anti_join(
     cdef cpp_join.gather_map_type c_result
     cdef optional[size_t] output_size
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -534,10 +557,10 @@ cpdef Column conditional_left_anti_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return _column_from_gather_map(move(c_result), stream, mr)
+    return _column_from_gather_map(move(c_result), _stream, mr)
 
 
 cpdef tuple mixed_inner_join(
@@ -547,7 +570,7 @@ cpdef tuple mixed_inner_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a mixed inner join between two tables.
@@ -578,7 +601,8 @@ cpdef tuple mixed_inner_join(
     cdef cpp_join.gather_map_pair_type c_result
     cdef cpp_join.output_size_data_type empty_optional
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -590,12 +614,12 @@ cpdef tuple mixed_inner_join(
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
             empty_optional,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -606,7 +630,7 @@ cpdef tuple mixed_left_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a mixed left join between two tables.
@@ -637,7 +661,8 @@ cpdef tuple mixed_left_join(
     cdef cpp_join.gather_map_pair_type c_result
     cdef cpp_join.output_size_data_type empty_optional
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -649,12 +674,12 @@ cpdef tuple mixed_left_join(
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
             empty_optional,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -665,7 +690,7 @@ cpdef tuple mixed_full_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a mixed full join between two tables.
@@ -696,7 +721,8 @@ cpdef tuple mixed_full_join(
     cdef cpp_join.gather_map_pair_type c_result
     cdef cpp_join.output_size_data_type empty_optional
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -708,12 +734,12 @@ cpdef tuple mixed_full_join(
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
             empty_optional,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream, mr),
-        _column_from_gather_map(move(c_result.second), stream, mr),
+        _column_from_gather_map(move(c_result.first), _stream, mr),
+        _column_from_gather_map(move(c_result.second), _stream, mr),
     )
 
 
@@ -724,7 +750,7 @@ cpdef Column mixed_left_semi_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a mixed left semi join between two tables.
@@ -753,7 +779,8 @@ cpdef Column mixed_left_semi_join(
     """
     cdef cpp_join.gather_map_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -764,10 +791,10 @@ cpdef Column mixed_left_semi_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return _column_from_gather_map(move(c_result), stream, mr)
+    return _column_from_gather_map(move(c_result), _stream, mr)
 
 
 cpdef Column mixed_left_anti_join(
@@ -777,7 +804,7 @@ cpdef Column mixed_left_anti_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a mixed left anti join between two tables.
@@ -806,7 +833,8 @@ cpdef Column mixed_left_anti_join(
     """
     cdef cpp_join.gather_map_type c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -817,10 +845,10 @@ cpdef Column mixed_left_anti_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return _column_from_gather_map(move(c_result), stream, mr)
+    return _column_from_gather_map(move(c_result), _stream, mr)
 
 
 cdef class FilteredJoin:
@@ -841,7 +869,7 @@ cdef class FilteredJoin:
         Table build,
         null_equality compare_nulls,
         double load_factor=0.5,
-        Stream stream=None,
+        object stream=None,
     ):
         """
         Construct a filtered hash join object for subsequent probe calls.
@@ -858,7 +886,8 @@ cdef class FilteredJoin:
         stream : Stream, optional
             CUDA stream used for device memory operations and kernel launches.
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
 
         with nogil:
             self.c_obj.reset(
@@ -866,14 +895,14 @@ cdef class FilteredJoin:
                     build.view(),
                     compare_nulls,
                     load_factor,
-                    stream.view()
+                    _cs
                 )
             )
 
     def semi_join(
         self,
         Table probe,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None,
     ):
         """
@@ -898,21 +927,22 @@ cdef class FilteredJoin:
         """
         cdef cpp_join.gather_map_type c_result
 
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
 
         with nogil:
             c_result = self.c_obj.get()[0].semi_join(
                 probe.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-        return _column_from_gather_map(move(c_result), stream, mr)
+        return _column_from_gather_map(move(c_result), _stream, mr)
 
     def anti_join(
         self,
         Table probe,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None,
     ):
         """
@@ -937,13 +967,14 @@ cdef class FilteredJoin:
         """
         cdef cpp_join.gather_map_type c_result
 
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
 
         with nogil:
             c_result = self.c_obj.get()[0].anti_join(
                 probe.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-        return _column_from_gather_map(move(c_result), stream, mr)
+        return _column_from_gather_map(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/json.pxd b/python/pylibcudf/pylibcudf/json.pxd
index 5489fa26ee8..47cf3b37c63 100644
--- a/python/pylibcudf/pylibcudf/json.pxd
+++ b/python/pylibcudf/pylibcudf/json.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
@@ -6,7 +6,6 @@ from pylibcudf.libcudf.json cimport get_json_object_options
 from pylibcudf.scalar cimport Scalar
 
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cdef class GetJsonObjectOptions:
@@ -17,6 +16,6 @@ cpdef Column get_json_object(
     Column col,
     Scalar json_path,
     GetJsonObjectOptions options=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi
index fa6bb08d510..a60bcb36f26 100644
--- a/python/pylibcudf/pylibcudf/json.pyi
+++ b/python/pylibcudf/pylibcudf/json.pyi
@@ -1,11 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 class GetJsonObjectOptions:
     def __init__(
@@ -26,6 +26,6 @@ def get_json_object(
     col: Column,
     json_path: Scalar,
     options: GetJsonObjectOptions | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx
index b50bd4e7714..a470f6a1cb3 100644
--- a/python/pylibcudf/pylibcudf/json.pyx
+++ b/python/pylibcudf/pylibcudf/json.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["GetJsonObjectOptions", "get_json_object"]
 
@@ -120,7 +121,7 @@ cpdef Column get_json_object(
     Column col,
     Scalar json_path,
     GetJsonObjectOptions options=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -155,7 +156,8 @@ cpdef Column get_json_object(
         options = GetJsonObjectOptions()
 
     cdef cpp_json.get_json_object_options c_options = options.options
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -163,8 +165,8 @@ cpdef Column get_json_object(
             col.view(),
             dereference(c_json_path),
             c_options,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/labeling.pxd b/python/pylibcudf/pylibcudf/labeling.pxd
index fc93568ed7c..0d8f02d48ce 100644
--- a/python/pylibcudf/pylibcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/labeling.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp cimport bool
 from pylibcudf.libcudf.labeling cimport inclusive
 
 from .column cimport Column
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
@@ -15,6 +14,6 @@ cpdef Column label_bins(
     inclusive left_inclusive,
     Column right_edges,
     inclusive right_inclusive,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi
index e9ff5c97f0b..272edd43f5f 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyi
+++ b/python/pylibcudf/pylibcudf/labeling.pyi
@@ -1,12 +1,12 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 class Inclusive(IntEnum):
     YES = ...
@@ -18,6 +18,6 @@ def label_bins(
     left_inclusive: Inclusive,
     right_edges: Column,
     right_inclusive: Inclusive,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx
index 878390543cb..e3a052f7cb8 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyx
+++ b/python/pylibcudf/pylibcudf/labeling.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["Inclusive", "label_bins"]
 
@@ -23,7 +24,7 @@ cpdef Column label_bins(
     inclusive left_inclusive,
     Column right_edges,
     inclusive right_inclusive,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Labels elements based on membership in the specified bins.
@@ -54,7 +55,8 @@ cpdef Column label_bins(
         according to the specified bins.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -64,10 +66,10 @@ cpdef Column label_bins(
             left_inclusive,
             right_edges.view(),
             right_inclusive,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 Inclusive.__str__ = Inclusive.__repr__
diff --git a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
index 7ec2c6fe31f..303b112f71e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
@@ -10,7 +10,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport data_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -57,7 +57,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         const column_view& rhs,
         binary_operator op,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -66,7 +66,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         const scalar& rhs,
         binary_operator op,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -75,7 +75,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         const column_view& rhs,
         binary_operator op,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -84,7 +84,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         const column_view& rhs,
         const string& op,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
index daefd24fb7b..b22eeb1dd40 100644
--- a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport (
 from pylibcudf.libcudf.types cimport data_type, size_type
 
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -25,13 +25,13 @@ cdef extern from "cudf/column/column.hpp" namespace "cudf" nogil:
         column() except +libcudf_exception_handler
         column(
             const column& other,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
         column(
             column_view view,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
index 5e17d3b89bd..f8cf3b38ccb 100644
--- a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
@@ -13,7 +13,7 @@ from pylibcudf.libcudf.types cimport (
 )
 
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -22,7 +22,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         data_type type,
         size_type size,
         mask_state state,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -31,7 +31,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         size_type size,
         device_buffer mask,
         size_type null_count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -39,7 +39,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         data_type type,
         size_type size,
         mask_state state,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_fixed_point_column(
@@ -47,14 +47,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         size_type size,
         device_buffer mask,
         size_type null_count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_timestamp_column(
         data_type type,
         size_type size,
         mask_state state,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_timestamp_column(
@@ -62,14 +62,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         size_type size,
         device_buffer mask,
         size_type null_count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_duration_column(
         data_type type,
         size_type size,
         mask_state state,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_duration_column(
@@ -77,14 +77,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         size_type size,
         device_buffer mask,
         size_type null_count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_fixed_width_column(
         data_type type,
         size_type size,
         mask_state state,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_fixed_width_column(
@@ -92,27 +92,27 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         size_type size,
         device_buffer mask,
         size_type null_count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_column_from_scalar(
         const scalar& s,
         size_type size,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_dictionary_from_scalar(
         const scalar& s,
         size_type size,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_dictionary_column(
         unique_ptr[column] keys_column,
         unique_ptr[column] indices_column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_empty_column(
diff --git a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
index 272f452a0a0..53cadee79c9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view
 from pylibcudf.libcudf.utilities.span cimport host_span
 
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -24,11 +24,11 @@ cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
 
     cdef unique_ptr[column] concatenate(
         const vector[column_view] columns,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[table] concatenate(
         const vector[table_view] tables,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
index 9d839835465..dd439d0d01d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
@@ -10,7 +10,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.libcudf.utilities.span cimport device_span
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -32,7 +32,7 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil:
         unique_ptr[chunked_pack] create(
             const table_view & input,
             size_t user_buffer_size,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref temp_mr,
         ) except +libcudf_exception_handler
 
@@ -43,13 +43,13 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil:
     cdef vector[contiguous_split_result] contiguous_split (
         table_view input_table,
         vector[size_type] splits,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef packed_columns pack (
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
index 2c3741342e9..36c95fa777c 100644
--- a/python/pylibcudf/pylibcudf/libcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
@@ -17,7 +17,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
 
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 ctypedef const scalar constscalar
@@ -31,7 +31,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const table_view& source_table,
         const column_view& gather_map,
         out_of_bounds_policy policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -39,7 +39,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const column_view& input,
         size_type offset,
         const scalar& fill_values,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -47,7 +47,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const table_view& source_table,
         const column_view& scatter_map,
         const table_view& target_table,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -55,7 +55,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const vector[reference_wrapper[constscalar]]& source_scalars,
         const column_view& indices,
         const table_view& target,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -71,7 +71,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] allocate_like (
         const column_view& input_column,
         mask_allocation_policy policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -79,7 +79,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const column_view& input_column,
         size_type size,
         mask_allocation_policy policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -93,7 +93,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         size_type input_begin,
         size_type input_end,
         size_type target_begin,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_range (
@@ -102,39 +102,39 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         size_type input_begin,
         size_type input_end,
         size_type target_begin,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef vector[column_view] slice (
         const column_view& input_column,
         vector[size_type] indices,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef vector[table_view] slice (
         const table_view& input_table,
         vector[size_type] indices,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef vector[column_view] split (
         const column_view& input_column,
         vector[size_type] splits,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef vector[table_view] split (
         const table_view& input_table,
         vector[size_type] splits,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const column_view& lhs,
         const column_view& rhs,
         const column_view& boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -142,7 +142,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const scalar& lhs,
         const column_view& rhs,
         const column_view& boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -150,7 +150,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const column_view& lhs,
         const scalar& rhs,
         const column_view boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -158,7 +158,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const scalar& lhs,
         const scalar& rhs,
         const column_view boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -166,7 +166,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const table_view& input,
         const table_view& target,
         const column_view& boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -174,14 +174,14 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const vector[reference_wrapper[constscalar]]& input,
         const table_view& target,
         const column_view& boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[scalar] get_element (
         const column_view& input,
         size_type index,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
index a14932f8910..7db66dc1070 100644
--- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -27,7 +27,7 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
     cdef unique_ptr[column] extract_datetime_component(
         const column_view& column,
         datetime_component component,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -42,54 +42,54 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
 
     cdef unique_ptr[column] ceil_datetimes(
         const column_view& column, rounding_frequency freq,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] floor_datetimes(
         const column_view& column, rounding_frequency freq,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] round_datetimes(
         const column_view& column, rounding_frequency freq,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] add_calendrical_months(
         const column_view& timestamps,
         const column_view& months,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] add_calendrical_months(
         const column_view& timestamps,
         const scalar& months,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] day_of_year(
         const column_view& column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] is_leap_year(
         const column_view& column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] last_day_of_month(
         const column_view& column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] extract_quarter(
         const column_view& column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] days_in_month(
         const column_view& column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd b/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd
index 7aea4aafcd1..399a868db71 100644
--- a/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd
@@ -1,14 +1,31 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
+from cuda.bindings.cyruntime cimport cudaStream_t
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.utilities.span cimport host_span
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+ctypedef const cudaStream_t const_cudaStream_t
 
 
-cdef extern from "cudf/detail/utilities/stream_pool.hpp" namespace "cudf::detail" nogil:
-    cdef void join_streams(
-        host_span[const cuda_stream_view] streams,
-        cuda_stream_view stream
+cdef extern from * nogil:
+    """
+    #include <cudf/detail/utilities/stream_pool.hpp>
+    #include <cudf/utilities/span.hpp>
+    #include <rmm/cuda_stream_view.hpp>
+    #include <vector>
+
+    namespace {
+    void join_streams_wrapper(
+        cudf::host_span<cudaStream_t const> streams,
+        cudaStream_t stream
+    ) {
+        std::vector<rmm::cuda_stream_view> stream_views(streams.begin(), streams.end());
+        cudf::detail::join_streams(stream_views, stream);
+    }
+    }
+    """
+    cdef void join_streams "join_streams_wrapper"(
+        host_span[const_cudaStream_t] streams,
+        cudaStream_t stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd b/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd
index 5707f34f578..2cbf79c0c17 100644
--- a/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport (
     null_policy,
     size_type,
 )
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 cdef extern from "cudf/reduction/distinct_count.hpp" namespace "cudf" nogil:
@@ -17,9 +17,9 @@ cdef extern from "cudf/reduction/distinct_count.hpp" namespace "cudf" nogil:
         column_view column,
         null_policy null_handling,
         nan_policy nan_handling,
-        cuda_stream_view stream) except +libcudf_exception_handler
+        cudaStream_t stream) except +libcudf_exception_handler
 
     cdef size_type distinct_count(
         table_view source_table,
         null_equality nulls_equal,
-        cuda_stream_view stream) except +libcudf_exception_handler
+        cudaStream_t stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/filling.pxd
index ac969cb8822..e9470a828a7 100644
--- a/python/pylibcudf/pylibcudf/libcudf/filling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/filling.pxd
@@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -22,7 +22,7 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
         size_type begin,
         size_type end,
         const scalar & value,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -31,20 +31,20 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
         size_type begin,
         size_type end,
         const scalar & value,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] repeat(
         const table_view & input,
         const column_view & count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] repeat(
         const table_view & input,
         size_type count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -52,7 +52,7 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
         size_type size,
         const scalar & init,
         const scalar & step,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -60,6 +60,6 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
         size_type n,
         const scalar& init,
         size_type months,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
index 5ba69a12290..b5ba1031813 100644
--- a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
@@ -24,7 +24,7 @@ from pylibcudf.libcudf.types cimport (
     sorted,
 )
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 # workaround for https://github.com/cython/cython/issues/3885
@@ -67,7 +67,7 @@ cdef extern from "cudf/groupby.hpp" \
             vector[aggregation_result]
         ] aggregate(
             const vector[aggregation_request]& requests,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
@@ -76,7 +76,7 @@ cdef extern from "cudf/groupby.hpp" \
             vector[aggregation_result]
         ] scan(
             const vector[scan_request]& requests,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
@@ -87,19 +87,19 @@ cdef extern from "cudf/groupby.hpp" \
             const table_view values,
             const vector[size_type] offset,
             const vector[reference_wrapper[constscalar]] fill_values,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
         groups get_groups(
             table_view values,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
         pair[unique_ptr[table], unique_ptr[table]] replace_nulls(
             const table_view& values,
             const vector[replace_policy] replace_policy,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd
index 380afc96c58..9610fa2a09f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/hash.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,64 +15,64 @@ cdef extern from "cudf/hashing.hpp" namespace "cudf::hashing" nogil:
     cdef unique_ptr[column] murmurhash3_x86_32(
         const table_view& input,
         const uint32_t seed,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] murmurhash3_x64_128(
         const table_view& input,
         const uint64_t seed,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] md5(
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha1(
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha224(
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha256(
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha384(
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha512(
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] xxhash_32(
         const table_view& input,
         const uint32_t seed,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] xxhash_64(
         const table_view& input,
         const uint64_t seed,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
index b09524a257b..78fc455dd35 100644
--- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
@@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -40,13 +40,13 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
         nogil:
     cdef unique_ptr[table] from_dlpack(
         const DLManagedTensor* managed_tensor,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     DLManagedTensor* to_dlpack(
         const table_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -65,18 +65,18 @@ cdef extern from "cudf/interop.hpp" namespace "cudf::interop" \
         arrow_column(
             ArrowSchema&& schema,
             ArrowArray&& array,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         arrow_column(
             ArrowSchema&& schema,
             ArrowDeviceArray&& array,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         arrow_column(
             ArrowArrayStream&& stream,
-            cuda_stream_view cuda_stream,
+            cudaStream_t cuda_stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         column_view view() except +libcudf_exception_handler
@@ -84,13 +84,13 @@ cdef extern from "cudf/interop.hpp" namespace "cudf::interop" \
     cdef cppclass arrow_table:
         arrow_table(
             ArrowArrayStream&& stream,
-            cuda_stream_view cuda_stream,
+            cudaStream_t cuda_stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         arrow_table(
             ArrowSchema&& schema,
             ArrowDeviceArray&& array,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         table_view view() except +libcudf_exception_handler
@@ -135,7 +135,7 @@ cdef extern from *:
     template <typename ViewType>
     ArrowArray* to_arrow_host_raw(
       ViewType const& obj,
-      rmm::cuda_stream_view stream,
+      cudaStream_t stream,
       rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) {
       ArrowArray *arr = new ArrowArray();
       auto device_arr = cudf::to_arrow_host(obj, stream, mr);
@@ -175,7 +175,7 @@ cdef extern from *:
     ArrowDeviceArray* to_arrow_device_raw(
       ViewType const& obj,
       PyObject* owner,
-      rmm::cuda_stream_view stream       = cudf::get_default_stream(),
+      cudaStream_t stream       = cudf::get_default_stream(),
       rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) {
       auto tmp = cudf::to_arrow_device(obj, stream, mr);
 
@@ -222,11 +222,11 @@ cdef extern from *:
     ) except +libcudf_exception_handler nogil
     cdef ArrowArray* to_arrow_host_raw(
         const table_view& tbl,
-        cuda_stream_view stream,
+        cudaStream_t stream,
     ) except +libcudf_exception_handler nogil
     cdef ArrowArray* to_arrow_host_raw(
         const column_view& tbl,
-        cuda_stream_view stream,
+        cudaStream_t stream,
     ) except +libcudf_exception_handler nogil
     cdef void release_arrow_array_raw(
         ArrowArray *
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd
index ff84ad922fc..521147218bf 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd
@@ -5,7 +5,7 @@ from libcpp.string cimport string
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -49,6 +49,6 @@ cdef extern from "cudf/io/avro.hpp" namespace "cudf::io" nogil:
 
     cdef cudf_io_types.table_with_metadata read_avro(
         avro_reader_options &options,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd
index 31f626b7d9d..45987fbedcd 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd
@@ -10,7 +10,7 @@ from libcpp.string cimport string
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.types cimport data_type, size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 cdef extern from "cudf/io/csv.hpp" \
@@ -263,7 +263,7 @@ cdef extern from "cudf/io/csv.hpp" \
 
     cdef cudf_io_types.table_with_metadata read_csv(
         csv_reader_options &options,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -337,7 +337,7 @@ cdef extern from "cudf/io/csv.hpp" \
 
     cdef void write_csv(
         csv_writer_options args,
-        cuda_stream_view stream,
+        cudaStream_t stream,
     ) except +libcudf_exception_handler
 
     cdef bool is_supported_write_csv(
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd b/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd
index 9f7462f6b86..8578908fc43 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd
@@ -15,7 +15,7 @@ from pylibcudf.libcudf.io.text cimport byte_range_info
 from pylibcudf.libcudf.io.types cimport table_with_metadata
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.libcudf.utilities.span cimport device_span, host_span
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 ctypedef const uint8_t const_uint8_t
@@ -61,7 +61,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \
         vector[size_type] filter_row_groups_with_stats(
             host_span[const_size_type] row_group_indices,
             const parquet_reader_options& options,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
 
         pair[
@@ -75,20 +75,20 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \
             host_span[const_device_span_const_uint8_t] dictionary_page_data,
             host_span[const_size_type] row_group_indices,
             const parquet_reader_options& options,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
 
         vector[size_type] filter_row_groups_with_bloom_filters(
             host_span[const_device_span_const_uint8_t] bloom_filter_data,
             host_span[const_size_type] row_group_indices,
             const parquet_reader_options& options,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
 
         unique_ptr[column] build_row_mask_with_page_index_stats(
             host_span[const_size_type] row_group_indices,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
@@ -103,7 +103,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \
             mutable_column_view& row_mask,
             use_data_page_mask mask_data_pages,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
@@ -118,7 +118,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \
             const column_view& row_mask,
             use_data_page_mask mask_data_pages,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
@@ -131,7 +131,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \
             host_span[const_size_type] row_group_indices,
             host_span[const_device_span_const_uint8_t] column_chunk_data,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
@@ -143,7 +143,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \
             use_data_page_mask mask_data_pages,
             host_span[const_device_span_const_uint8_t] column_chunk_data,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
@@ -159,7 +159,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \
             use_data_page_mask mask_data_pages,
             host_span[const_device_span_const_uint8_t] column_chunk_data,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
index 6d5a506d18a..af3b1e59bd1 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd
@@ -11,7 +11,7 @@ from libcpp.string cimport string
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.types cimport data_type, size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -158,7 +158,7 @@ cdef extern from "cudf/io/json.hpp" namespace "cudf::io" nogil:
 
     cdef cudf_io_types.table_with_metadata read_json(
         json_reader_options &options,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -240,7 +240,7 @@ cdef extern from "cudf/io/json.hpp" namespace "cudf::io" nogil:
 
     cdef cudf_io_types.table_with_metadata write_json(
         json_writer_options &options,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef bool is_supported_write_json(
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
index 0455c0fa1b1..bea5c1e06f0 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
@@ -11,7 +11,7 @@ from libcpp.string cimport string
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.types cimport data_type, size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -80,7 +80,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil:
 
     cdef cudf_io_types.table_with_metadata read_orc(
         orc_reader_options opts,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr,
     ) except +libcudf_exception_handler
 
@@ -150,7 +150,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil:
 
     cdef void write_orc(
         orc_writer_options options,
-        cuda_stream_view stream,
+        cudaStream_t stream,
     ) except +libcudf_exception_handler
 
     cdef bool is_supported_read_orc(
@@ -228,7 +228,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil:
         orc_chunked_writer() except +libcudf_exception_handler
         orc_chunked_writer(
             chunked_orc_writer_options args,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
         orc_chunked_writer& write(
             cudf_table_view.table_view table_,
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
index e0c67e14e1d..f365a45b34a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t
 from libcpp cimport bool
@@ -8,7 +8,7 @@ from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.io cimport types as cudf_io_types
 from pylibcudf.variant cimport monostate, variant
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 cdef extern from "cudf/io/orc_metadata.hpp" \
@@ -71,5 +71,5 @@ cdef extern from "cudf/io/orc_metadata.hpp" \
 
     cdef parsed_orc_statistics read_parsed_orc_statistics(
         const cudf_io_types.source_info& src_info,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd
index dc0dff818a3..00b62e55514 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd
@@ -22,7 +22,7 @@ from pylibcudf.libcudf.io.types cimport (
 )
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport data_type, size_type, type_id
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -124,7 +124,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
 
     cdef table_with_metadata read_parquet(
         parquet_reader_options args,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -256,7 +256,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
 
     cdef unique_ptr[vector[uint8_t]] write_parquet(
         parquet_writer_options options,
-        cuda_stream_view stream,
+        cudaStream_t stream,
     ) except +libcudf_exception_handler
 
     cdef bool is_supported_read_parquet(
@@ -288,7 +288,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         chunked_parquet_writer() except +libcudf_exception_handler
         chunked_parquet_writer(
             const chunked_parquet_writer_options& args,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
         chunked_parquet_writer& write(
             const table_view& table_,
@@ -303,14 +303,14 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         chunked_parquet_reader(
             size_t chunk_read_limit,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         chunked_parquet_reader(
             size_t chunk_read_limit,
             size_t pass_read_limit,
             const parquet_reader_options& options,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         bool has_next() except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
index 77552a80cfd..7152e5d0afb 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
@@ -6,7 +6,7 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -63,6 +63,6 @@ cdef extern from "cudf/io/text/multibyte_split.hpp" \
         data_chunk_source source,
         string delimiter,
         parse_options options,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
index 557e8856b28..45cfb4f15da 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
@@ -6,7 +6,7 @@ from libcpp.optional cimport optional
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.table.table cimport table
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -14,6 +14,6 @@ cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil:
     unique_ptr[table] make_timezone_transition_table(
         optional[string] tzif_dir,
         string timezone_name,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
index 06a7d497ad5..d13bf245119 100644
--- a/python/pylibcudf/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -13,7 +13,7 @@ from pylibcudf.libcudf.expressions cimport expression
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport null_equality, size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 from rmm.librmm.device_uvector cimport device_uvector
@@ -28,7 +28,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -36,7 +36,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -44,7 +44,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -52,7 +52,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -60,7 +60,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -68,14 +68,14 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] cross_join(
         const table_view left,
         const table_view right,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -84,7 +84,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -93,7 +93,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -101,7 +101,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -110,7 +110,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -118,7 +118,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -126,7 +126,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -135,7 +135,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -143,7 +143,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -152,7 +152,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -165,7 +165,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const expression binary_predicate,
         null_equality compare_nulls,
         output_size_data_type output_size_data,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -177,7 +177,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const expression binary_predicate,
         null_equality compare_nulls,
         output_size_data_type output_size_data,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -189,7 +189,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const expression binary_predicate,
         null_equality compare_nulls,
         output_size_data_type output_size_data,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -200,7 +200,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -211,7 +211,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -225,21 +225,21 @@ cdef extern from "cudf/join/filtered_join.hpp" namespace "cudf" nogil:
         filtered_join(
             const table_view build,
             null_equality compare_nulls,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
         filtered_join(
             const table_view build,
             null_equality compare_nulls,
             double load_factor,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
         gather_map_type semi_join(
             const table_view probe,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         gather_map_type anti_join(
             const table_view probe,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/json.pxd b/python/pylibcudf/pylibcudf/libcudf/json.pxd
index 39899490cac..bb606b86b33 100644
--- a/python/pylibcudf/pylibcudf/libcudf/json.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/json.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -30,6 +30,6 @@ cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil:
         column_view col,
         string_scalar json_path,
         get_json_object_options options,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
index ad9611511dd..0b2c1651714 100644
--- a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
@@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -21,6 +21,6 @@ cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
         inclusive left_inclusive,
         const column_view &right_edges,
         inclusive right_inclusive,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
index 66e90dcd66a..310d166df59 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table_view cimport table_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -21,19 +21,19 @@ cdef extern from "cudf/lists/combine.hpp" namespace \
     cdef unique_ptr[column] concatenate_rows(
         const table_view input_table,
         concatenate_null_policy null_policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] concatenate_list_elements(
         const table_view input_table,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] concatenate_list_elements(
         const column_view input_table,
         concatenate_null_policy null_policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
index efb2d760366..3736e42b32d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -20,20 +20,20 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] contains(
         const lists_column_view& lists,
         const scalar& search_key,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains(
         const lists_column_view& lists,
         const column_view& search_keys,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains_nulls(
         const lists_column_view& lists,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -41,7 +41,7 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& lists,
         const scalar& search_key,
         duplicate_find_option find_option,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -49,6 +49,6 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& lists,
         const column_view& search_keys,
         duplicate_find_option find_option,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
index 6203bafdc38..6fa64c8b291 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
@@ -4,13 +4,13 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
 cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] count_elements(
         const lists_column_view&,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
index b31d3a7cdca..fa15fb1eeef 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -13,6 +13,6 @@ cdef extern from "cudf/lists/explode.hpp" namespace "cudf" nogil:
     cdef unique_ptr[table] explode_outer(
         const table_view,
         size_type explode_column_idx,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
index c82a9029311..66a07f41e38 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column, column_view
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -13,12 +13,12 @@ cdef extern from "cudf/lists/extract.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] extract_list_element(
         const lists_column_view&,
         size_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] extract_list_element(
         const lists_column_view&,
         const column_view&,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
index 11cc19b86f9..1e55916d299 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
@@ -4,7 +4,7 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -12,7 +12,7 @@ cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] sequences(
         const column_view& starts,
         const column_view& sizes,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -20,6 +20,6 @@ cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil:
         const column_view& starts,
         const column_view& steps,
         const column_view& sizes,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
index bae67a96b0d..b7212bea51e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.copying cimport out_of_bounds_policy
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil:
@@ -13,6 +13,6 @@ cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& source_column,
         const lists_column_view& gather_map_list,
         out_of_bounds_policy bounds_policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd
index fe1630c1728..69a6c80f242 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd
@@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column_view cimport (
@@ -26,7 +26,7 @@ cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil:
         column_view offsets() except +libcudf_exception_handler
         column_view child() except +libcudf_exception_handler
         column_view get_sliced_child(
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
 
     cdef enum:
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
index f831024ec82..e60c8acbb38 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
@@ -4,13 +4,13 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
 cdef extern from "cudf/lists/reverse.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] reverse(
         const lists_column_view& lists_column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
index 5e02d11d95a..b56caa9adb5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.types cimport nan_equality, null_equality
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,7 +15,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& rhs,
         null_equality nulls_equal,
         nan_equality nans_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -24,7 +24,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& rhs,
         null_equality nulls_equal,
         nan_equality nans_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -33,7 +33,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& rhs,
         null_equality nulls_equal,
         nan_equality nans_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -42,6 +42,6 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& rhs,
         null_equality nulls_equal,
         nan_equality nans_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
index 4036ccec6c5..9899591d6d1 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.types cimport null_order, order
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -14,7 +14,7 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil:
         const lists_column_view source_column,
         order column_order,
         null_order null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -22,6 +22,6 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil:
         const lists_column_view source_column,
         order column_order,
         null_order null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
index dec32027402..0187642e0c7 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option
 from pylibcudf.libcudf.types cimport nan_equality, null_equality
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,7 +15,7 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \
     cdef unique_ptr[column] apply_boolean_mask(
         const lists_column_view& lists_column,
         const lists_column_view& boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -24,6 +24,6 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \
         null_equality nulls_equal,
         nan_equality nans_equal,
         duplicate_keep_option keep_option,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/merge.pxd b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
index 860e4263c1c..f4389ac991a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/merge.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,6 +17,6 @@ cdef extern from "cudf/merge.hpp" namespace "cudf" nogil:
         vector[libcudf_types.size_type] key_cols,
         vector[libcudf_types.order] column_order,
         vector[libcudf_types.null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
index 1b1b3001981..330c69f0579 100644
--- a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
@@ -8,14 +8,14 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type
 
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
 cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
     cdef device_buffer copy_bitmask (
         column_view view,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -23,7 +23,7 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
         const bitmask_type* null_mask,
         size_type begin_bit,
         size_type end_bit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -35,19 +35,19 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
     cdef device_buffer create_null_mask (
         size_type size,
         mask_state state,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef pair[device_buffer, size_type] bitmask_and(
         table_view view,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     )
 
     cdef pair[device_buffer, size_type] bitmask_or(
         table_view view,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     )
 
@@ -55,12 +55,12 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
         const bitmask_type * bitmask,
         size_type start,
         size_type stop,
-        cuda_stream_view stream
+        cudaStream_t stream
     )
 
     cdef size_type index_of_first_set_bit(
         const bitmask_type * bitmask,
         size_type start,
         size_type stop,
-        cuda_stream_view stream
+        cudaStream_t stream
     )
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
index eca30faa630..94a7fe3db9d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
@@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,7 +17,7 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[bpe_merge_pairs] load_merge_pairs(
         const column_view &merge_pairs,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -25,6 +25,6 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil:
         const column_view &strings,
         const bpe_merge_pairs &merge_pairs,
         const string_scalar &separator,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd
index 26e39c963d2..82a8581ea0a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 from rmm.librmm.device_uvector cimport device_uvector
@@ -19,7 +19,7 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil:
     cdef suffix_array_type build_suffix_array(
         column_view source_strings,
         size_type min_width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -27,7 +27,7 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil:
         column_view source_strings,
         column_view indices,
         size_type min_width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -37,6 +37,6 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil:
         column_view input2,
         column_view indices2,
         size_type min_width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
index b7f3e97a4b0..f3c10c11abf 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
@@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,12 +15,12 @@ cdef extern from "nvtext/edit_distance.hpp" namespace "nvtext" nogil:
     cdef unique_ptr[column] edit_distance(
         const column_view & strings,
         const column_view & targets,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] edit_distance_matrix(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
index 43619d356f6..3d97aaf93b1 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,14 +17,14 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil:
         const column_view &strings,
         size_type ngrams,
         const string_scalar & separator,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] generate_character_ngrams(
         const column_view &strings,
         size_type ngrams,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -32,6 +32,6 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil:
         const column_view &strings,
         size_type ngrams,
         uint32_t seed,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
index de45913fbb5..0a3ba52a3d5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,6 +15,6 @@ cdef extern from "nvtext/jaccard.hpp" namespace "nvtext" nogil:
         const column_view &input1,
         const column_view &input2,
         size_type width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
index eaf0b8c63b1..94083fbafd3 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -19,7 +19,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const column_view &a,
         const column_view &b,
         const size_type width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -29,7 +29,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const column_view &a,
         const column_view &b,
         const size_type width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -39,7 +39,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const uint32_t seed,
         const column_view &a,
         const column_view &b,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -49,6 +49,6 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const uint64_t seed,
         const column_view &a,
         const column_view &b,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
index 41d153b99a0..6e4cc18e17f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,6 +17,6 @@ cdef extern from "nvtext/ngrams_tokenize.hpp" namespace "nvtext" nogil:
         size_type ngrams,
         const string_scalar & delimiter,
         const string_scalar & separator,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
index 25678d12091..0184c1d8785 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
@@ -5,7 +5,7 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -13,7 +13,7 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[column] normalize_spaces(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -23,13 +23,13 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil:
     cdef unique_ptr[character_normalizer] create_character_normalizer(
         bool do_lower_case,
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] normalize_characters(
         const column_view & strings,
         const character_normalizer & normalizer,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
index d14ce40b168..628181b3f89 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,7 +17,7 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil:
         const column_view & targets,
         const column_view & replacements,
         const string_scalar & delimiter,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -26,6 +26,6 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil:
         size_type min_token_length,
         const string_scalar & replacement,
         const string_scalar & delimiter,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
index e6e2866008b..2088440749a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -18,7 +18,7 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[column] porter_stemmer_measure(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -26,12 +26,12 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil:
         column_view source_strings,
         letter_type ltype,
         size_type character_index,
-        cuda_stream_view stream) except +libcudf_exception_handler
+        cudaStream_t stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_letter(
         column_view source_strings,
         letter_type ltype,
         column_view indices,
-        cuda_stream_view stream) except +libcudf_exception_handler
+        cudaStream_t stream) except +libcudf_exception_handler
 
 ctypedef int32_t underlying_type_t_letter_type
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
index 3b7ae2e9b6f..1c6eccb0476 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,34 +15,34 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil:
     cdef unique_ptr[column] tokenize(
         const column_view & strings,
         const string_scalar & delimiter,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] tokenize(
         const column_view & strings,
         const column_view & delimiters,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_tokens(
         const column_view & strings,
         const string_scalar & delimiter,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_tokens(
         const column_view & strings,
         const column_view & delimiters,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] character_tokenize(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -50,7 +50,7 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil:
         const column_view & strings,
         const column_view & row_indices,
         const string_scalar & separator,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -59,7 +59,7 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[tokenize_vocabulary] load_vocabulary(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -68,6 +68,6 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil:
         const tokenize_vocabulary & vocabulary,
         const string_scalar & delimiter,
         size_type default_id,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd
index a4bcde47f80..0c43f0d21ff 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -16,7 +16,7 @@ cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[wordpiece_vocabulary] load_wordpiece_vocabulary(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -24,6 +24,6 @@ cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil:
         const column_view & strings,
         const wordpiece_vocabulary & vocabulary,
         size_type max_tokens_per_row,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
index e7c0f496de8..2e0c978f77d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.hash cimport DEFAULT_HASH_SEED
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
@@ -28,7 +28,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         int num_partitions,
         hash_id hash_function,
         uint32_t seed,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -39,7 +39,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         int num_partitions,
         hash_id hash_function,
         uint32_t seed,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -48,7 +48,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         const table_view& t,
         const column_view& partition_map,
         int num_partitions,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -57,6 +57,6 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         const table_view& input,
         int num_partitions,
         int start_partition,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
index 823bd34e4a7..8bc636da998 100644
--- a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
@@ -15,7 +15,7 @@ from pylibcudf.libcudf.types cimport (
     order_info,
     sorted,
 )
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -27,7 +27,7 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil:
         interpolation interp,
         column_view ordered_indices,
         bool exact,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -38,6 +38,6 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil:
         sorted is_input_sorted,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
index 9da4159d0c1..5fb383149a7 100644
--- a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport data_type, null_policy
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 ctypedef const scalar constscalar
@@ -22,7 +22,7 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
         const reduce_aggregation& agg,
         data_type output_type,
         optional[reference_wrapper[constscalar]] init,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -35,13 +35,13 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
         const scan_aggregation& agg,
         scan_type inclusive,
         null_policy null_handling,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[scalar], unique_ptr[scalar]] minmax(
         const column_view& col,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/replace.pxd
index 35078b64ee3..4821a13924c 100644
--- a/python/pylibcudf/pylibcudf/libcudf/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/replace.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column_view cimport (
     mutable_column_view,
 )
 from pylibcudf.libcudf.scalar.scalar cimport scalar
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -22,47 +22,47 @@ cdef extern from "cudf/replace.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] replace_nulls(
         column_view source_column,
         column_view replacement_column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_nulls(
         column_view source_column,
         scalar replacement,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_nulls(
         column_view source_column,
         replace_policy replace_policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find_and_replace_all(
         column_view source_column,
         column_view values_to_replace,
         column_view replacement_values,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] clamp(
         column_view source_column,
         scalar lo, scalar lo_replace,
         scalar hi, scalar hi_replace,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] clamp(
         column_view source_column,
         scalar lo, scalar hi,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] normalize_nans_and_zeros(
         column_view source_column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef void normalize_nans_and_zeros(
         mutable_column_view source_column,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
index 598e148d643..beda4ec09fc 100644
--- a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type, data_type
 from pylibcudf.libcudf.utilities.span cimport device_span
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 cdef extern from "cuda/functional" namespace "cuda::std":
@@ -19,17 +19,17 @@ cdef extern from "cuda/functional" namespace "cuda::std":
 cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] interleave_columns(
         table_view source_table,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[table] tile(
         table_view source_table,
         size_type count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef void table_to_array(
         table_view input_table,
         device_span[byte] output,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
index 6ea400f92d3..69cdbd6f396 100644
--- a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
@@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport data_type, null_order, order, size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -44,7 +44,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
         range_window_type preceding,
         range_window_type following,
         vector[rolling_request]& requests,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -54,7 +54,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
         column_view following_window,
         size_type min_periods,
         rolling_aggregation& agg,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rolling_window(
@@ -63,7 +63,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
         size_type following_window,
         size_type min_periods,
         rolling_aggregation& agg,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[column], unique_ptr[column]] make_range_windows(
@@ -73,7 +73,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
         null_order null_order,
         range_window_type preceding,
         range_window_type following,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/round.pxd b/python/pylibcudf/pylibcudf/libcudf/round.pxd
index 39965d025c6..f21987844f3 100644
--- a/python/pylibcudf/pylibcudf/libcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/round.pxd
@@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -20,7 +20,7 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil:
         const column_view& input,
         int32_t decimal_places,
         rounding_method method,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -28,6 +28,6 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil:
         const column_view& input,
         int32_t decimal_places,
         rounding_method method,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd
index 6c3dc71e019..10d3a42c572 100644
--- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport data_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -18,31 +18,31 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil:
         scalar(scalar other) except +libcudf_exception_handler
         data_type type() except +libcudf_exception_handler
         void set_valid_async(
-            bool is_valid, cuda_stream_view stream
+            bool is_valid, cudaStream_t stream
         ) except +libcudf_exception_handler
-        bool is_valid(cuda_stream_view stream) except +libcudf_exception_handler
+        bool is_valid(cudaStream_t stream) except +libcudf_exception_handler
 
     cdef cppclass numeric_scalar[T](scalar):
         void set_value(
             T value,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
-        T value(cuda_stream_view stream) except +libcudf_exception_handler
+        T value(cudaStream_t stream) except +libcudf_exception_handler
 
     cdef cppclass timestamp_scalar[T](scalar):
         void set_value(
             T value,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
 
     cdef cppclass duration_scalar[T](scalar):
         void set_value(
             T value,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
 
     cdef cppclass string_scalar(scalar):
-        string to_string(cuda_stream_view stream) except +libcudf_exception_handler
+        string to_string(cudaStream_t stream) except +libcudf_exception_handler
 
     cdef cppclass list_scalar(scalar):
         pass
@@ -57,4 +57,4 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil:
             scale_type scale,
             bool is_valid
         ) except +libcudf_exception_handler
-        T value(cuda_stream_view stream) except +libcudf_exception_handler
+        T value(cudaStream_t stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
index 6034b2ecc08..6b1329962cd 100644
--- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
@@ -9,49 +9,49 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type
 from pylibcudf.libcudf.types cimport int128 as int128_t
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
 cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil:
     cdef unique_ptr[scalar] make_string_scalar(
         const string & _string,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_fixed_width_scalar[T](
         T value,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_fixed_point_scalar[T](
         int128_t value,
         scale_type scale,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_numeric_scalar(
         data_type type_,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_timestamp_scalar(
         data_type type_,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_empty_scalar_like(
         const column_view &,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_duration_scalar(
         data_type type_,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_default_constructed_scalar(
         data_type type_,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/search.pxd b/python/pylibcudf/pylibcudf/libcudf/search.pxd
index b369ec05392..c1e41893d2e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/search.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/search.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table_view cimport table_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -18,7 +18,7 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil:
         table_view needles,
         vector[libcudf_types.order] column_order,
         vector[libcudf_types.null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -27,13 +27,13 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil:
         table_view needles,
         vector[libcudf_types.order] column_order,
         vector[libcudf_types.null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains(
         column_view haystack,
         column_view needles,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
index 97822e2c374..c8e252ced2c 100644
--- a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
@@ -17,7 +17,7 @@ from pylibcudf.libcudf.types cimport (
     null_order,
     size_type
 )
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -26,7 +26,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         table_view source_table,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -34,7 +34,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         table_view source_table,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -45,7 +45,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         null_policy null_handling,
         null_order null_precedence,
         bool percentage,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -53,7 +53,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const table_view& table,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream
+        cudaStream_t stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] segmented_sort_by_key(
@@ -62,7 +62,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const column_view& segment_offsets,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -72,7 +72,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const column_view& segment_offsets,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -81,7 +81,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const table_view& keys,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -90,7 +90,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const table_view& keys,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -98,7 +98,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         table_view source_table,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -106,7 +106,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         table_view source_table,
         vector[order] column_order,
         vector[null_order] null_precedence,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -114,7 +114,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const column_view& col,
         size_type k,
         order sort_order,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -122,6 +122,6 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const column_view& col,
         size_type k,
         order sort_order,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
index 0358aa4068c..9f8686da472 100644
--- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
@@ -14,7 +14,7 @@ from pylibcudf.libcudf.types cimport (
     null_equality,
     size_type,
 )
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -29,7 +29,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         table_view source_table,
         vector[size_type] keys,
         size_type keep_threshold,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -37,14 +37,14 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         table_view source_table,
         vector[size_type] keys,
         size_type keep_threshold,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] apply_boolean_mask(
         table_view source_table,
         column_view boolean_mask,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -53,7 +53,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         vector[size_type] keys,
         duplicate_keep_option keep,
         null_equality nulls_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -63,7 +63,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         duplicate_keep_option keep,
         null_equality nulls_equal,
         nan_equality nans_equals,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -72,7 +72,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         duplicate_keep_option keep,
         null_equality nulls_equal,
         nan_equality nans_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -82,7 +82,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         duplicate_keep_option keep,
         null_equality nulls_equal,
         nan_equality nans_equal,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -90,6 +90,6 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         table_view predicate_table,
         const expression& predicate_expr,
         table_view filter_table,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
index 06e95c95870..0cee9e43346 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
@@ -4,7 +4,7 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -12,15 +12,15 @@ cdef extern from "cudf/strings/attributes.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] count_characters(
         column_view source_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_bytes(
         column_view source_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] code_points(
         column_view source_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
index b615cd984db..7b8ac094311 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.char_types cimport string_character_types
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -14,18 +14,18 @@ cdef extern from "cudf/strings/capitalize.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] capitalize(
         const column_view & strings,
         const string_scalar & delimiters,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] title(
         const column_view & strings,
         string_character_types sequence_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
         ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_title(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
index 463586d9f37..a056f1b4737 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
@@ -4,22 +4,22 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
 cdef extern from "cudf/strings/case.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] to_lower(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] to_upper(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] swapcase(
         const column_view & strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
index 7706498eceb..c6af0fb73d2 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
@@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -29,7 +29,7 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \
         column_view source_strings,
         string_character_types types,
         string_character_types verify_types,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] filter_characters_of_type(
@@ -37,5 +37,5 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \
         string_character_types types_to_remove,
         string_scalar replacement,
         string_character_types types_to_keep,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
index ef831d3b167..2e2b6656797 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.table.table_view cimport table_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -27,7 +27,7 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
         string_scalar separator,
         string_scalar narep,
         separator_on_nulls separate_nulls,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] concatenate(
@@ -36,14 +36,14 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
         string_scalar separator_narep,
         string_scalar col_narep,
         separator_on_nulls separate_nulls,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] join_strings(
         column_view input,
         string_scalar separator,
         string_scalar narep,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] join_list_elements(
@@ -53,7 +53,7 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
         string_scalar string_narep,
         separator_on_nulls separate_nulls,
         output_if_empty_list empty_list_policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] join_list_elements(
@@ -62,5 +62,5 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
         string_scalar narep,
         separator_on_nulls separate_nulls,
         output_if_empty_list empty_list_policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
index f60782e93b7..cc9a7c6835d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -16,31 +16,31 @@ cdef extern from "cudf/strings/contains.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] contains_re(
         column_view source_strings,
         regex_program,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_re(
         column_view source_strings,
         regex_program,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] matches_re(
         column_view source_strings,
         regex_program,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] like(
         column_view source_strings,
         string pattern,
         string escape_character,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] like(
         column_view source_strings,
         column_view patterns,
         string_scalar escape_character,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
index b5b837878f9..8875bc62ed5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,12 +15,12 @@ cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \
     cdef unique_ptr[column] to_booleans(
         column_view input,
         string_scalar true_string,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_booleans(
         column_view booleans,
         string_scalar true_string,
         string_scalar false_string,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
index 5779839a685..92983f9dc49 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,18 +17,18 @@ cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \
         column_view input,
         data_type timestamp_type,
         string format,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_timestamps(
         column_view timestamps,
         string format,
         column_view names,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_timestamp(
         column_view input_col,
         string format,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
index 2eae8b987b9..4f22b715ef9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,11 +17,11 @@ cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \
         const column_view & input,
         data_type duration_type,
         const string & format,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_durations(
         const column_view & durations,
         const string & format,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
index e5f512c331f..8aaa0ebf4c7 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,17 +15,17 @@ cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \
     cdef unique_ptr[column] to_fixed_point(
         column_view input,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_fixed_point(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_fixed_point(
         column_view input,
         data_type decimal_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
index 4ea1cd527f4..5a111c1979d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,16 +15,16 @@ cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \
     cdef unique_ptr[column] to_floats(
         column_view strings,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_floats(
         column_view floats,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_float(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
index 306c4b66758..4d3f4ff758a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,40 +15,40 @@ cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \
     cdef unique_ptr[column] to_integers(
         column_view input,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_integers(
         column_view integers,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_integer(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_integer(
         column_view input,
         data_type int_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] hex_to_integers(
         column_view input,
         data_type output_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_hex(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] integers_to_hex(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
index d12f3992d85..00a64787957 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -13,16 +13,16 @@ cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] ipv4_to_integers(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] integers_to_ipv4(
         column_view integers,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_ipv4(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
index 8ed381e87da..bfae49bae4b 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,5 +17,5 @@ cdef extern from "cudf/strings/convert/convert_lists.hpp" namespace \
         column_view input,
         string_scalar na_rep,
         column_view separators,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
index b20c03f976b..db2d4f4efc0 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -13,10 +13,10 @@ cdef extern from "cudf/strings/convert/convert_urls.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] url_encode(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] url_decode(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
index 845de206dbf..d3e0d0fd35a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -16,18 +16,18 @@ cdef extern from "cudf/strings/extract.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[table] extract(
         column_view input,
         regex_program prog,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] extract_all_record(
         column_view input,
         regex_program prog,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] extract_single(
         column_view input,
         regex_program prog,
         size_type group,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
index b8934aeb7fe..42752152de8 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -16,37 +16,37 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] contains(
         column_view source_strings,
         string_scalar target,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains(
         column_view source_strings,
         column_view target_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] ends_with(
         column_view source_strings,
         string_scalar target,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] ends_with(
         column_view source_strings,
         column_view target_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] starts_with(
         column_view source_strings,
         string_scalar target,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] starts_with(
         column_view source_strings,
         column_view target_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find(
@@ -54,14 +54,14 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil:
         string_scalar target,
         size_type start,
         size_type stop,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find(
         column_view source_strings,
         column_view target,
         size_type start,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rfind(
@@ -69,5 +69,5 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil:
         string_scalar target,
         size_type start,
         size_type stop,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
index da751990053..1e42a476c13 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table cimport table
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -15,11 +15,11 @@ cdef extern from "cudf/strings/find_multiple.hpp" namespace "cudf::strings" \
     cdef unique_ptr[table] contains_multiple(
         column_view input,
         column_view targets,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find_multiple(
         column_view input,
         column_view targets,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
index 02ecbef7095..d72ffd09d8e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -14,11 +14,11 @@ cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] findall(
         column_view input,
         regex_program prog,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find_re(
         column_view input,
         regex_program prog,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
index 5e3e5c43f61..8b291a22a05 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.side_type cimport side_type
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -20,17 +20,17 @@ cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil:
         size_type width,
         side_type side,
         string fill_char,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] zfill(
         column_view input,
         size_type width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] zfill_by_widths(
         column_view input,
         column_view widths,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
index 05a2954af35..86519de0b90 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -16,13 +16,13 @@ cdef extern from "cudf/strings/repeat_strings.hpp" namespace "cudf::strings" \
     cdef unique_ptr[column] repeat_strings(
         column_view input,
         size_type repeat_times,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] repeat_strings(
         column_view input,
         column_view repeat_times,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
index 263b91475b8..cf2573af5ed 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -18,7 +18,7 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil:
         string_scalar repl,
         size_type start,
         size_type stop,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace(
@@ -26,12 +26,12 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil:
         string_scalar target,
         string_scalar repl,
         int32_t maxrepl,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_multiple(
         column_view source_strings,
         column_view target_strings,
         column_view repl_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
index 5f5cbaeaf55..d3e958841ab 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -22,7 +22,7 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil:
         regex_program prog,
         string_scalar replacement,
         size_type max_replace_count,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_re(
@@ -30,12 +30,12 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil:
         vector[string] patterns,
         column_view replacements,
         regex_flags flags,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_with_backrefs(
         column_view input,
         regex_program prog,
         string replacement,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd
index 6e6fc2acac4..39a3ac4b769 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd
@@ -4,12 +4,12 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 cdef extern from "cudf/strings/reverse.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] reverse(
         column_view source_strings,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
index 0c99455ea33..6c9031482ca 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.table.table cimport table
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,11 +17,11 @@ cdef extern from "cudf/strings/split/partition.hpp" namespace \
     cdef unique_ptr[table] partition(
         column_view input,
         string_scalar delimiter,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[table] rpartition(
         column_view input,
         string_scalar delimiter,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
index 9ed741b608a..5d14fefdb1b 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -20,35 +20,35 @@ cdef extern from "cudf/strings/split/split.hpp" namespace \
         column_view strings_column,
         string_scalar delimiter,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[table] rsplit(
         column_view strings_column,
         string_scalar delimiter,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] split_record(
         column_view strings,
         string_scalar delimiter,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rsplit_record(
         column_view strings,
         string_scalar delimiter,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] split_part(
         column_view strings,
         string_scalar delimiter,
         size_type index,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
 
@@ -59,26 +59,26 @@ cdef extern from "cudf/strings/split/split_re.hpp" namespace \
         const column_view& input,
         regex_program prog,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[table] rsplit_re(
         const column_view& input,
         regex_program prog,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] split_record_re(
         const column_view& input,
         regex_program prog,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rsplit_record_re(
         const column_view& input,
         regex_program prog,
         size_type maxsplit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd
index 8c72fed7219..5fa0dfb4289 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stdint cimport int64_t
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column_view cimport column_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 cdef extern from "cudf/strings/strings_column_view.hpp" namespace "cudf" nogil:
     cdef cppclass strings_column_view:
         strings_column_view(column_view) except +libcudf_exception_handler
-        int64_t chars_size(cuda_stream_view) except +libcudf_exception_handler
+        int64_t chars_size(cudaStream_t) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
index 13e017c33f7..4d56b2de5d3 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.side_type cimport side_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -16,5 +16,5 @@ cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil:
         column_view input,
         side_type side,
         string_scalar to_strip,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
index 21c2fe4a77b..d0b4f192307 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
@@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
 from pylibcudf.libcudf.types cimport size_type
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -17,7 +17,7 @@ cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil:
         numeric_scalar[size_type] start,
         numeric_scalar[size_type] end,
         numeric_scalar[size_type] step,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -25,6 +25,6 @@ cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil:
         column_view source_strings,
         column_view starts,
         column_view stops,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
index 9bdc0489a89..dcf5aa20948 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport char_utf8
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -18,7 +18,7 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] translate(
         column_view input,
         vector[pair[char_utf8, char_utf8]] chars_table,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -31,5 +31,5 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil:
         vector[pair[char_utf8, char_utf8]] characters_to_filter,
         filter_type keep_characters,
         string_scalar replacement,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
index 8aa5631a12e..2ddd924df48 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
@@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -14,5 +14,5 @@ cdef extern from "cudf/strings/wrap.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] wrap(
         column_view input,
         size_type width,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd
index 7b339782295..d51a51dfb13 100644
--- a/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd
@@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column_view cimport column_view
@@ -22,5 +22,5 @@ cdef extern from "cudf/structs/structs_column_view.hpp" namespace "cudf" nogil:
         column_view parent() except +libcudf_exception_handler
         column_view get_sliced_child(
             size_type index,
-            cuda_stream_view stream
+            cudaStream_t stream
         ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd
index 230131d5520..dcfc046a904 100644
--- a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd
@@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table_view cimport mutable_table_view, table_view
 from pylibcudf.libcudf.types cimport size_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -14,12 +14,12 @@ cdef extern from "cudf/table/table.hpp" namespace "cudf" nogil:
     cdef cppclass table:
         table(
             const table&,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         table(
             table_view,
-            cuda_stream_view stream,
+            cudaStream_t stream,
             device_async_resource_ref mr
         ) except +libcudf_exception_handler
         size_type num_columns() except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
index 9b2ace2d940..ebc9d8bfa1d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
@@ -16,14 +16,14 @@ from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type
 from pylibcudf.libcudf.types cimport null_aware, output_nullability
 
 from rmm.librmm.device_buffer cimport device_buffer
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
 cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
     cdef pair[unique_ptr[device_buffer], size_type] bools_to_mask (
         const column_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -31,19 +31,19 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
         const bitmask_type* bitmask,
         size_type begin_bit,
         size_type end_bit,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[device_buffer], size_type] nans_to_nulls(
         const column_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] column_nans_to_nulls(
         const column_view& input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
@@ -55,33 +55,33 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
         optional[void *] user_data,
         null_aware is_null_aware,
         output_nullability null_policy,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[table], unique_ptr[column]] encode(
         table_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[column], table_view] one_hot_encode(
         column_view input_column,
         column_view categories,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] compute_column(
         const table_view table,
         const expression& expr,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] compute_column_jit(
         const table_view table,
         const expression& expr,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
index 2345ab5a2d9..0ce2048ba0f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
@@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table_view cimport table_view
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -16,6 +16,6 @@ cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil:
         table_view
     ] transpose(
         table_view input_table,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/unary.pxd b/python/pylibcudf/pylibcudf/libcudf/unary.pxd
index d3fd2f2f976..6f59ff8d5e0 100644
--- a/python/pylibcudf/pylibcudf/libcudf/unary.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/unary.pxd
@@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 from rmm.librmm.memory_resource cimport device_async_resource_ref
 
 
@@ -42,32 +42,32 @@ cdef extern from "cudf/unary.hpp" namespace "cudf" nogil:
     cdef extern unique_ptr[column] unary_operation(
         column_view input,
         unary_operator op,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
 
     cdef extern unique_ptr[column] is_null(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef extern unique_ptr[column] is_valid(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef extern unique_ptr[column] cast(
         column_view input,
         data_type out_type,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr) except +libcudf_exception_handler
     cdef extern bool is_supported_cast(data_type from_, data_type to) noexcept
     cdef extern unique_ptr[column] is_nan(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
     cdef extern unique_ptr[column] is_not_nan(
         column_view input,
-        cuda_stream_view stream,
+        cudaStream_t stream,
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd b/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd
index 5954dace85e..04001f5a064 100644
--- a/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport (
     null_policy,
     size_type,
 )
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 cdef extern from "cudf/reduction/unique_count.hpp" namespace "cudf" nogil:
@@ -17,9 +17,9 @@ cdef extern from "cudf/reduction/unique_count.hpp" namespace "cudf" nogil:
         column_view column,
         null_policy null_handling,
         nan_policy nan_handling,
-        cuda_stream_view stream) except +libcudf_exception_handler
+        cudaStream_t stream) except +libcudf_exception_handler
 
     cdef size_type unique_count(
         table_view source_table,
         null_equality nulls_equal,
-        cuda_stream_view stream) except +libcudf_exception_handler
+        cudaStream_t stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd
index a9569f11706..661db24f5aa 100644
--- a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
+from cuda.bindings.cyruntime cimport cudaStream_t
 from libcpp cimport bool
 
-from rmm.librmm.cuda_stream_view cimport cuda_stream_view
-
 
 cdef extern from "cudf/utilities/default_stream.hpp" namespace "cudf" nogil:
     cdef bool is_ptds_enabled()
-    cdef cuda_stream_view get_default_stream()
+    cdef cudaStream_t get_default_stream()
diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd
index be47db18a59..88b09c01531 100644
--- a/python/pylibcudf/pylibcudf/lists.pxd
+++ b/python/pylibcudf/pylibcudf/lists.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -9,7 +9,6 @@ from pylibcudf.libcudf.copying cimport out_of_bounds_policy
 from pylibcudf.libcudf.lists.combine cimport concatenate_null_policy
 from pylibcudf.libcudf.lists.contains cimport duplicate_find_option
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -26,33 +25,33 @@ ctypedef fused ColumnOrSizeType:
 cpdef Table explode_outer(
     Table,
     size_type explode_column_idx,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column concatenate_rows(
     Table,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column concatenate_list_elements(
     Column,
     concatenate_null_policy null_policy,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column contains(
     Column,
     ColumnOrScalar,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column contains_nulls(
     Column,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -60,13 +59,13 @@ cpdef Column index_of(
     Column,
     ColumnOrScalar,
     duplicate_find_option,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column reverse(
     Column,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -74,20 +73,20 @@ cpdef Column segmented_gather(
     Column,
     Column,
     out_of_bounds_policy bounds_policy=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column extract_list_element(
     Column,
     ColumnOrSizeType,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column count_elements(
     Column,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -95,7 +94,7 @@ cpdef Column sequences(
     Column,
     Column,
     Column steps = *,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -104,7 +103,7 @@ cpdef Column sort_lists(
     order,
     null_order,
     bool stable = *,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -113,7 +112,7 @@ cpdef Column difference_distinct(
     Column,
     null_equality nulls_equal=*,
     nan_equality nans_equal=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -122,7 +121,7 @@ cpdef Column have_overlap(
     Column,
     null_equality nulls_equal=*,
     nan_equality nans_equal=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -131,7 +130,7 @@ cpdef Column intersect_distinct(
     Column,
     null_equality nulls_equal=*,
     nan_equality nans_equal=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -140,14 +139,14 @@ cpdef Column union_distinct(
     Column,
     null_equality nulls_equal=*,
     nan_equality nans_equal=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column apply_boolean_mask(
     Column,
     Column,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -155,6 +154,6 @@ cpdef Column distinct(
     Column,
     null_equality,
     nan_equality,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi
index a3bcf9f76d6..1e418b59726 100644
--- a/python/pylibcudf/pylibcudf/lists.pyi
+++ b/python/pylibcudf/pylibcudf/lists.pyi
@@ -1,16 +1,16 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.copying import OutOfBoundsPolicy
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 from pylibcudf.types import NanEquality, NullEquality, NullOrder, Order
+from pylibcudf.utils import CudaStreamLike
 
 class ConcatenateNullPolicy(IntEnum):
     IGNORE = ...
@@ -23,66 +23,66 @@ class DuplicateFindOption(IntEnum):
 def explode_outer(
     input: Table,
     explode_column_idx: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def concatenate_rows(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def concatenate_list_elements(
     input: Column,
     null_policy: ConcatenateNullPolicy,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def contains(
     input: Column,
     search_key: Column | Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def contains_nulls(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def index_of(
     input: Column,
     search_key: Column | Scalar,
     find_option: DuplicateFindOption,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def reverse(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def segmented_gather(
     input: Column,
     gather_map_list: Column,
     bounds_policy: OutOfBoundsPolicy = OutOfBoundsPolicy.DONT_CHECK,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def extract_list_element(
     input: Column,
     index: Column | int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def count_elements(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def sequences(
     starts: Column,
     sizes: Column,
     steps: Column | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def sort_lists(
@@ -90,7 +90,7 @@ def sort_lists(
     sort_order: Order,
     na_position: NullOrder,
     stable: bool = False,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def difference_distinct(
@@ -98,7 +98,7 @@ def difference_distinct(
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def have_overlap(
@@ -106,7 +106,7 @@ def have_overlap(
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def intersect_distinct(
@@ -114,7 +114,7 @@ def intersect_distinct(
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def union_distinct(
@@ -122,19 +122,19 @@ def union_distinct(
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def apply_boolean_mask(
     input: Column,
     boolean_mask: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def distinct(
     input: Column,
     nulls_equal: NullEquality,
     nans_equal: NanEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
index 0076f7da677..fd05242e44f 100644
--- a/python/pylibcudf/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -55,6 +55,7 @@ from .column cimport Column, ListsColumnView
 from .scalar cimport Scalar
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "ConcatenateNullPolicy",
@@ -82,7 +83,7 @@ __all__ = [
 cpdef Table explode_outer(
     Table input,
     size_type explode_column_idx,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Explode a column of lists into rows.
@@ -105,20 +106,21 @@ cpdef Table explode_outer(
     """
     cdef unique_ptr[table] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_explode.explode_outer(
-            input.view(), explode_column_idx, stream.view(), mr.get_mr()
+            input.view(), explode_column_idx, _cs, mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column concatenate_rows(
     Table input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Concatenate multiple lists columns into a single lists column row-wise.
@@ -139,21 +141,22 @@ cpdef Column concatenate_rows(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_concatenate_rows(
-            input.view(), concatenate_null_policy.IGNORE, stream.view(), mr.get_mr()
+            input.view(), concatenate_null_policy.IGNORE, _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column concatenate_list_elements(
     Column input,
     concatenate_null_policy null_policy,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Concatenate multiple lists on the same row into a single list.
@@ -174,21 +177,22 @@ cpdef Column concatenate_list_elements(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_concatenate_list_elements(
-            input.view(), null_policy, stream.view(), mr.get_mr()
+            input.view(), null_policy, _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column contains(
     Column input,
     ColumnOrScalar search_key,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a column of bool values indicating whether
@@ -218,7 +222,8 @@ cpdef Column contains(
     cdef unique_ptr[column] c_result
     cdef ListsColumnView list_view = input.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if not isinstance(search_key, (Column, Scalar)):
@@ -230,15 +235,15 @@ cpdef Column contains(
             search_key.view() if ColumnOrScalar is Column else dereference(
                 search_key.get()
             ),
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column contains_nulls(
     Column input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a column of bool values indicating whether
@@ -262,21 +267,22 @@ cpdef Column contains_nulls(
     cdef unique_ptr[column] c_result
     cdef ListsColumnView list_view = input.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_contains.contains_nulls(
-            list_view.view(), stream.view(), mr.get_mr()
+            list_view.view(), _cs, mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column index_of(
     Column input,
     ColumnOrScalar search_key,
     duplicate_find_option find_option,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a column of index values indicating the position of a search
@@ -307,7 +313,8 @@ cpdef Column index_of(
     cdef unique_ptr[column] c_result
     cdef ListsColumnView list_view = input.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -317,15 +324,15 @@ cpdef Column index_of(
                 search_key.get()
             ),
             find_option,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column reverse(
     Column input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Reverse the element order within each list of the input column.
@@ -347,19 +354,20 @@ cpdef Column reverse(
     cdef unique_ptr[column] c_result
     cdef ListsColumnView list_view = input.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_reverse.reverse(list_view.view(), stream.view(), mr.get_mr())
-    return Column.from_libcudf(move(c_result), stream, mr)
+        c_result = cpp_reverse.reverse(list_view.view(), _cs, mr.get_mr())
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column segmented_gather(
     Column input,
     Column gather_map_list,
     out_of_bounds_policy bounds_policy=out_of_bounds_policy.DONT_CHECK,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a column with elements gathered based on the indices in gather_map_list
@@ -394,7 +402,8 @@ cpdef Column segmented_gather(
     cdef ListsColumnView list_view1 = input.list_view()
     cdef ListsColumnView list_view2 = gather_map_list.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -402,16 +411,16 @@ cpdef Column segmented_gather(
             list_view1.view(),
             list_view2.view(),
             bounds_policy,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column extract_list_element(
     Column input,
     ColumnOrSizeType index,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a column of extracted list elements.
@@ -433,22 +442,23 @@ cpdef Column extract_list_element(
     cdef unique_ptr[column] c_result
     cdef ListsColumnView list_view = input.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_extract_list_element(
             list_view.view(),
             index.view() if ColumnOrSizeType is Column else index,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column count_elements(
     Column input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Count the number of rows in each
@@ -472,20 +482,21 @@ cpdef Column count_elements(
     cdef ListsColumnView list_view = input.list_view()
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_count_elements(list_view.view(), stream.view(), mr.get_mr())
+        c_result = cpp_count_elements(list_view.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column sequences(
     Column starts,
     Column sizes,
     Column steps = None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a lists column in which each row contains a sequence of
@@ -509,7 +520,8 @@ cpdef Column sequences(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if steps is not None:
@@ -518,22 +530,22 @@ cpdef Column sequences(
                 starts.view(),
                 steps.view(),
                 sizes.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr(),
             )
     else:
         with nogil:
             c_result = cpp_filling.sequences(
-                starts.view(), sizes.view(), stream.view(), mr.get_mr()
+                starts.view(), sizes.view(), _cs, mr.get_mr()
             )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column sort_lists(
     Column input,
     order sort_order,
     null_order na_position,
     bool stable = False,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Sort the elements within a list in each row of a list column.
@@ -561,7 +573,8 @@ cpdef Column sort_lists(
     cdef unique_ptr[column] c_result
     cdef ListsColumnView list_view = input.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -570,7 +583,7 @@ cpdef Column sort_lists(
                     list_view.view(),
                     sort_order,
                     na_position,
-                    stream.view(),
+                    _cs,
                     mr.get_mr(),
             )
         else:
@@ -578,10 +591,10 @@ cpdef Column sort_lists(
                     list_view.view(),
                     sort_order,
                     na_position,
-                    stream.view(),
+                    _cs,
                     mr.get_mr(),
             )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column difference_distinct(
@@ -589,7 +602,7 @@ cpdef Column difference_distinct(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a column of index values indicating the position of a search
@@ -617,7 +630,8 @@ cpdef Column difference_distinct(
     cdef ListsColumnView lhs_view = lhs.list_view()
     cdef ListsColumnView rhs_view = rhs.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -626,10 +640,10 @@ cpdef Column difference_distinct(
             rhs_view.view(),
             nulls_equal,
             nans_equal,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column have_overlap(
@@ -637,7 +651,7 @@ cpdef Column have_overlap(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Check if lists at each row of the given lists columns overlap.
@@ -664,7 +678,8 @@ cpdef Column have_overlap(
     cdef ListsColumnView lhs_view = lhs.list_view()
     cdef ListsColumnView rhs_view = rhs.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -673,10 +688,10 @@ cpdef Column have_overlap(
             rhs_view.view(),
             nulls_equal,
             nans_equal,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column intersect_distinct(
@@ -684,7 +699,7 @@ cpdef Column intersect_distinct(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a lists column of distinct elements common to two input lists columns.
@@ -711,7 +726,8 @@ cpdef Column intersect_distinct(
     cdef ListsColumnView lhs_view = lhs.list_view()
     cdef ListsColumnView rhs_view = rhs.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -720,10 +736,10 @@ cpdef Column intersect_distinct(
             rhs_view.view(),
             nulls_equal,
             nans_equal,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column union_distinct(
@@ -731,7 +747,7 @@ cpdef Column union_distinct(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a lists column of distinct elements found in
@@ -759,7 +775,8 @@ cpdef Column union_distinct(
     cdef ListsColumnView lhs_view = lhs.list_view()
     cdef ListsColumnView rhs_view = rhs.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -768,16 +785,16 @@ cpdef Column union_distinct(
             rhs_view.view(),
             nulls_equal,
             nans_equal,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column apply_boolean_mask(
     Column input,
     Column boolean_mask,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Filters elements in each row of the input lists column using a boolean mask
@@ -802,24 +819,25 @@ cpdef Column apply_boolean_mask(
     cdef ListsColumnView list_view = input.list_view()
     cdef ListsColumnView mask_view = boolean_mask.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_apply_boolean_mask(
             list_view.view(),
             mask_view.view(),
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column distinct(
     Column input,
     null_equality nulls_equal,
     nan_equality nans_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a new list column without duplicate elements in each list.
@@ -843,7 +861,8 @@ cpdef Column distinct(
     cdef unique_ptr[column] c_result
     cdef ListsColumnView list_view = input.list_view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -852,10 +871,10 @@ cpdef Column distinct(
             nulls_equal,
             nans_equal,
             duplicate_keep_option.KEEP_ANY,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 ConcatenateNullPolicy.__str__ = ConcatenateNullPolicy.__repr__
 DuplicateFindOption.__str__ = DuplicateFindOption.__repr__
diff --git a/python/pylibcudf/pylibcudf/merge.pxd b/python/pylibcudf/pylibcudf/merge.pxd
index aed9dda7479..07624852289 100644
--- a/python/pylibcudf/pylibcudf/merge.pxd
+++ b/python/pylibcudf/pylibcudf/merge.pxd
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from .table cimport Table
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
@@ -12,6 +11,6 @@ cpdef Table merge (
     list key_cols,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/merge.pyi b/python/pylibcudf/pylibcudf/merge.pyi
index f96e1d8534e..50e87d5bffa 100644
--- a/python/pylibcudf/pylibcudf/merge.pyi
+++ b/python/pylibcudf/pylibcudf/merge.pyi
@@ -1,17 +1,17 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.table import Table
 from pylibcudf.types import NullOrder, Order
+from pylibcudf.utils import CudaStreamLike
 
 def merge(
     tables_to_merge: list[Table],
     key_cols: list[int],
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx
index a6cbaf81051..3c0cd93a342 100644
--- a/python/pylibcudf/pylibcudf/merge.pyx
+++ b/python/pylibcudf/pylibcudf/merge.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["merge"]
 
@@ -22,7 +23,7 @@ cpdef Table merge (
     list key_cols,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Merge a set of sorted tables.
@@ -58,7 +59,8 @@ cpdef Table merge (
         c_tables_to_merge.push_back((<Table?> tbl).view())
 
     cdef unique_ptr[table] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -67,7 +69,7 @@ cpdef Table merge (
             c_key_cols,
             c_column_order,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/null_mask.pxd
index 6eb10eddb2e..e7fa70e23ae 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pxd
+++ b/python/pylibcudf/pylibcudf/null_mask.pxd
@@ -5,18 +5,19 @@ from pylibcudf.libcudf.types cimport mask_state, size_type
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 
 
-cpdef DeviceBuffer copy_bitmask(Column col, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef DeviceBuffer copy_bitmask(
+    Column col, object stream = *, DeviceMemoryResource mr=*
+)
 
 cpdef DeviceBuffer copy_bitmask_from_bitmask(
     object bitmask,
     size_type begin_bit,
     size_type end_bit,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
@@ -25,24 +26,24 @@ cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits)
 cpdef DeviceBuffer create_null_mask(
     size_type size,
     mask_state state=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
-cpdef tuple bitmask_and(list columns, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef tuple bitmask_and(list columns, object stream = *, DeviceMemoryResource mr=*)
 
-cpdef tuple bitmask_or(list columns, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef tuple bitmask_or(list columns, object stream = *, DeviceMemoryResource mr=*)
 
 cpdef size_type null_count(
     object bitmask,
     size_type start,
     size_type stop,
-    Stream stream=*
+    object stream = *
 )
 
 cpdef size_type index_of_first_set_bit(
     object bitmask,
     size_type start,
     size_type stop,
-    Stream stream=*
+    object stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyi b/python/pylibcudf/pylibcudf/null_mask.pyi
index 98f6e60fb0d..45e130b704e 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pyi
+++ b/python/pylibcudf/pylibcudf/null_mask.pyi
@@ -3,44 +3,44 @@
 
 from rmm.pylibrmm.device_buffer import DeviceBuffer
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.span import Span
 from pylibcudf.types import MaskState
+from pylibcudf.utils import CudaStreamLike
 
 def copy_bitmask(
     col: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> DeviceBuffer: ...
 def copy_bitmask_from_bitmask(
     bitmask: Span,
     begin_bit: int,
     end_bit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> DeviceBuffer: ...
 def bitmask_allocation_size_bytes(number_of_bits: int) -> int: ...
 def create_null_mask(
     size: int,
     state: MaskState = MaskState.UNINITIALIZED,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> DeviceBuffer: ...
 def bitmask_and(
     columns: list[Column],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[DeviceBuffer, int]: ...
 def bitmask_or(
     columns: list[Column],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[DeviceBuffer, int]: ...
 def null_count(
-    bitmask: Span, start: int, stop: int, stream: Stream | None = None
+    bitmask: Span, start: int, stop: int, stream: CudaStreamLike | None = None
 ) -> int: ...
 def index_of_first_set_bit(
-    bitmask: Span, start: int, stop: int, stream: Stream | None = None
+    bitmask: Span, start: int, stop: int, stream: CudaStreamLike | None = None
 ) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx
index 176e73047e2..164c51aca9f 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pyx
+++ b/python/pylibcudf/pylibcudf/null_mask.pyx
@@ -19,6 +19,7 @@ from .span import is_span as py_is_span
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "bitmask_allocation_size_bytes",
@@ -31,7 +32,7 @@ __all__ = [
 ]
 
 cdef DeviceBuffer buffer_to_python(
-    device_buffer buf, Stream stream, DeviceMemoryResource mr
+    device_buffer buf, object stream, DeviceMemoryResource mr
 ):
     return DeviceBuffer.c_from_unique_ptr(
         make_unique[device_buffer](move(buf)), stream, mr
@@ -40,7 +41,7 @@ cdef DeviceBuffer buffer_to_python(
 
 cpdef DeviceBuffer copy_bitmask(
     Column col,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Copies ``col``'s bitmask into a ``DeviceBuffer``.
@@ -63,20 +64,21 @@ cpdef DeviceBuffer copy_bitmask(
         ``DeviceBuffer`` if ``col`` is not nullable
     """
     cdef device_buffer db
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        db = cpp_null_mask.copy_bitmask(col.view(), stream.view(), mr.get_mr())
+        db = cpp_null_mask.copy_bitmask(col.view(), _cs, mr.get_mr())
 
-    return buffer_to_python(move(db), stream, mr)
+    return buffer_to_python(move(db), _stream, mr)
 
 
 cpdef DeviceBuffer copy_bitmask_from_bitmask(
     object bitmask,
     size_type begin_bit,
     size_type end_bit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Copies a portion of a bitmask into a ``DeviceBuffer``.
@@ -108,7 +110,8 @@ cpdef DeviceBuffer copy_bitmask_from_bitmask(
             f"got {type(bitmask).__name__}"
         )
     cdef device_buffer db
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     cdef uintptr_t ptr = bitmask.ptr
 
@@ -117,11 +120,11 @@ cpdef DeviceBuffer copy_bitmask_from_bitmask(
             <bitmask_type*>ptr,
             begin_bit,
             end_bit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return buffer_to_python(move(db), stream, mr)
+    return buffer_to_python(move(db), _stream, mr)
 
 
 cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits):
@@ -148,7 +151,7 @@ cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits):
 cpdef DeviceBuffer create_null_mask(
     size_type size,
     mask_state state = mask_state.UNINITIALIZED,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Creates a ``DeviceBuffer`` for use as a null value indicator bitmask of a
@@ -176,16 +179,17 @@ cpdef DeviceBuffer create_null_mask(
         state
     """
     cdef device_buffer db
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        db = cpp_null_mask.create_null_mask(size, state, stream.view(), mr.get_mr())
+        db = cpp_null_mask.create_null_mask(size, state, _cs, mr.get_mr())
 
-    return buffer_to_python(move(db), stream, mr)
+    return buffer_to_python(move(db), _stream, mr)
 
 
-cpdef tuple bitmask_and(list columns, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef tuple bitmask_and(list columns, object stream=None, DeviceMemoryResource mr=None):
     """Performs bitwise AND of the bitmasks of a list of columns.
 
     For details, see :cpp:func:`bitmask_and`.
@@ -206,16 +210,19 @@ cpdef tuple bitmask_and(list columns, Stream stream=None, DeviceMemoryResource m
     """
     cdef Table c_table = Table(columns)
     cdef pair[device_buffer, size_type] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_null_mask.bitmask_and(c_table.view(), stream.view(), mr.get_mr())
+        c_result = cpp_null_mask.bitmask_and(
+            c_table.view(), _cs, mr.get_mr()
+        )
 
-    return buffer_to_python(move(c_result.first), stream, mr), c_result.second
+    return buffer_to_python(move(c_result.first), _stream, mr), c_result.second
 
 
-cpdef tuple bitmask_or(list columns, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef tuple bitmask_or(list columns, object stream=None, DeviceMemoryResource mr=None):
     """Performs bitwise OR of the bitmasks of a list of columns.
 
     For details, see :cpp:func:`bitmask_or`.
@@ -236,20 +243,21 @@ cpdef tuple bitmask_or(list columns, Stream stream=None, DeviceMemoryResource mr
     """
     cdef Table c_table = Table(columns)
     cdef pair[device_buffer, size_type] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_null_mask.bitmask_or(c_table.view(), stream.view(), mr.get_mr())
+        c_result = cpp_null_mask.bitmask_or(c_table.view(), _cs, mr.get_mr())
 
-    return buffer_to_python(move(c_result.first), stream, mr), c_result.second
+    return buffer_to_python(move(c_result.first), _stream, mr), c_result.second
 
 
 cpdef size_type null_count(
     object bitmask,
     size_type start,
     size_type stop,
-    Stream stream=None
+    object stream=None
 ):
     """Given a validity bitmask, counts the number of null elements.
 
@@ -277,20 +285,21 @@ cpdef size_type null_count(
             f"got {type(bitmask).__name__}"
         )
     cdef uintptr_t ptr = bitmask.ptr
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     with nogil:
         return cpp_null_mask.null_count(
             <bitmask_type*>ptr,
             start,
             stop,
-            stream.view()
+            _cs
         )
 
 cpdef size_type index_of_first_set_bit(
     object bitmask,
     size_type start,
     size_type stop,
-    Stream stream=None
+    object stream=None
 ):
     """Given a validity bitmask, returns the index of the first valid element
     relative to ``start``.
@@ -319,11 +328,12 @@ cpdef size_type index_of_first_set_bit(
             f"got {type(bitmask).__name__}"
         )
     cdef uintptr_t ptr = bitmask.ptr
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     with nogil:
         return cpp_null_mask.index_of_first_set_bit(
             <bitmask_type*>ptr,
             start,
             stop,
-            stream.view()
+            _cs
         )
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd
index 8cd73fe41ad..2bc3f75b174 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -6,7 +6,6 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.byte_pair_encode cimport bpe_merge_pairs
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cdef class BPEMergePairs:
@@ -16,6 +15,6 @@ cpdef Column byte_pair_encoding(
     Column input,
     BPEMergePairs merge_pairs,
     Scalar separator=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
index 4abf1f52b4d..7ee48f72209 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
@@ -1,17 +1,17 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 class BPEMergePairs:
     def __init__(
         self,
         merge_pairs: Column,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ): ...
 
@@ -19,6 +19,6 @@ def byte_pair_encoding(
     input: Column,
     merge_pairs: BPEMergePairs,
     separator: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
index 001b9dfca1e..023e00a1169 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["BPEMergePairs", "byte_pair_encoding"]
 
@@ -30,14 +31,17 @@ cdef class BPEMergePairs:
     def __cinit__(
         self,
         Column merge_pairs,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         cdef column_view c_pairs = merge_pairs.view()
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
-            self.c_obj = move(cpp_load_merge_pairs(c_pairs, stream.view(), mr.get_mr()))
+            self.c_obj = move(
+                cpp_load_merge_pairs(c_pairs, _cs, mr.get_mr())
+            )
 
     __hash__ = None
 
@@ -45,7 +49,7 @@ cpdef Column byte_pair_encoding(
     Column input,
     BPEMergePairs merge_pairs,
     Scalar separator=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -70,12 +74,13 @@ cpdef Column byte_pair_encoding(
         An encoded column of strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if separator is None:
         separator = Scalar.from_libcudf(
-            cpp_make_string_scalar(" ".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar(" ".encode(), _stream.view().value(), mr.get_mr())
         )
 
     with nogil:
@@ -84,9 +89,9 @@ cpdef Column byte_pair_encoding(
                 input.view(),
                 dereference(merge_pairs.c_obj.get()),
                 dereference(<const string_scalar*>separator.c_obj.get()),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd
index ecca0a495a1..d038d4a3e27 100644
--- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd
@@ -1,22 +1,21 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 cpdef Column build_suffix_array(
     Column input,
     size_type min_width,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 cpdef Column resolve_duplicates(
     Column input,
     Column indices,
     size_type min_width,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 cpdef Column resolve_duplicates_pair(
@@ -25,6 +24,6 @@ cpdef Column resolve_duplicates_pair(
     Column input2,
     Column indices2,
     size_type min_width,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi
index 6e3d6883df4..653ee588f61 100644
--- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi
@@ -1,22 +1,22 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def build_suffix_array(
     input: Column,
     min_width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def resolve_duplicates(
     input: Column,
     indices: Column,
     min_width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def resolve_duplicates_pair(
@@ -25,6 +25,6 @@ def resolve_duplicates_pair(
     input2: Column,
     indices2: Column,
     min_width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx
index c71ae479674..e679841a792 100644
--- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator import dereference
@@ -18,6 +18,7 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "build_suffix_array",
@@ -36,14 +37,12 @@ cdef Column _column_from_suffix_array(
                 device_buffer(),
                 0
             )
-        ),
-        stream,
-        mr
+        ), stream, mr
     )
 
 
 cpdef Column build_suffix_array(
-    Column input, size_type min_width, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, size_type min_width, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Builds a suffix array for the input strings column.
@@ -68,22 +67,23 @@ cpdef Column build_suffix_array(
         New column of suffix array
     """
     cdef cpp_suffix_array_type c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_build_suffix_array(
-            input.view(), min_width, stream.view(), mr.get_mr()
+            input.view(), min_width, _cs, mr.get_mr()
         )
 
-    return _column_from_suffix_array(move(c_result), stream, mr)
+    return _column_from_suffix_array(move(c_result), _stream, mr)
 
 
 cpdef Column resolve_duplicates(
     Column input,
     Column indices,
     size_type min_width,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -111,15 +111,16 @@ cpdef Column resolve_duplicates(
         New column of duplicate strings
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_resolve_duplicates(
-            input.view(), indices.view(), min_width, stream.view(), mr.get_mr()
+            input.view(), indices.view(), min_width, _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column resolve_duplicates_pair(
@@ -128,7 +129,7 @@ cpdef Column resolve_duplicates_pair(
     Column input2,
     Column indices2,
     size_type min_width,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -161,7 +162,8 @@ cpdef Column resolve_duplicates_pair(
 
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -171,8 +173,8 @@ cpdef Column resolve_duplicates_pair(
             input2.view(),
             indices2.view(),
             min_width,
-            stream.view(),
+            _cs,
             mr.get_mr(),
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
index aca87ac4882..c0297ebd887 100644
--- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
@@ -1,20 +1,19 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column edit_distance(
     Column input,
     Column targets,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column edit_distance_matrix(
     Column input,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
index 8c0e97b9951..5a6bde4cb66 100644
--- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
@@ -1,19 +1,19 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def edit_distance(
     input: Column,
     targets: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def edit_distance_matrix(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
index 14d3b4539dc..4b9d3f6bcc3 100644
--- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
@@ -17,13 +17,14 @@ from rmm.pylibrmm.stream cimport Stream
 
 from ..column cimport Column
 from ..utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["edit_distance", "edit_distance_matrix"]
 
 cpdef Column edit_distance(
     Column input,
     Column targets,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -48,18 +49,19 @@ cpdef Column edit_distance(
     cdef column_view c_strings = input.view()
     cdef column_view c_targets = targets.view()
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_edit_distance(c_strings, c_targets, stream.view(), mr.get_mr())
+        c_result = cpp_edit_distance(c_strings, c_targets, _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column edit_distance_matrix(
     Column input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -88,10 +90,11 @@ cpdef Column edit_distance_matrix(
     )
     cdef column_view c_strings = input.view()
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_edit_distance_matrix(c_strings, stream.view(), mr.get_mr())
+        c_result = cpp_edit_distance_matrix(c_strings, _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
index 1eb55f1fcf6..85477223954 100644
--- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stdint cimport uint32_t
@@ -6,21 +6,20 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column generate_ngrams(
     Column input,
     size_type ngrams,
     Scalar separator,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column generate_character_ngrams(
     Column input,
     size_type ngrams=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -28,6 +27,6 @@ cpdef Column hash_character_ngrams(
     Column input,
     size_type ngrams,
     uint32_t seed,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
index 7a522acc5a9..317fdb9ee73 100644
--- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from typing import Any
@@ -6,28 +6,28 @@ from typing import Any
 import numpy as np
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def generate_ngrams(
     input: Column,
     ngrams: int,
     separator: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def generate_character_ngrams(
     input: Column,
     ngrams: int = 2,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def hash_character_ngrams(
     input: Column,
     ngrams: int,
     seed: int | np.unsignedinteger[Any],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
index ca8a21c279c..6d70751a5a0 100644
--- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stdint cimport uint32_t
@@ -18,6 +18,7 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "generate_ngrams",
@@ -29,7 +30,7 @@ cpdef Column generate_ngrams(
     Column input,
     size_type ngrams,
     Scalar separator,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -56,7 +57,8 @@ cpdef Column generate_ngrams(
     cdef column_view c_strings = input.view()
     cdef const string_scalar* c_separator = <const string_scalar*>separator.c_obj.get()
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -64,16 +66,16 @@ cpdef Column generate_ngrams(
             c_strings,
             ngrams,
             c_separator[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column generate_character_ngrams(
     Column input,
     size_type ngrams = 2,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -97,24 +99,25 @@ cpdef Column generate_character_ngrams(
     """
     cdef column_view c_strings = input.view()
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_generate_character_ngrams(
             c_strings,
             ngrams,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column hash_character_ngrams(
     Column input,
     size_type ngrams,
     uint32_t seed,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -140,7 +143,8 @@ cpdef Column hash_character_ngrams(
     """
     cdef column_view c_strings = input.view()
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -148,7 +152,7 @@ cpdef Column hash_character_ngrams(
             c_strings,
             ngrams,
             seed,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
index fbf8e99ac55..1e3a26454a1 100644
--- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
@@ -1,16 +1,15 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column jaccard_index(
     Column input1,
     Column input2,
     size_type width,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
index abc86597c0e..355d2d7a92f 100644
--- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
@@ -1,15 +1,15 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def jaccard_index(
     input1: Column,
     input2: Column,
     width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
index 4089853ca77..24a343e4508 100644
--- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -13,6 +13,7 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["jaccard_index"]
 
@@ -20,7 +21,7 @@ cpdef Column jaccard_index(
     Column input1,
     Column input2,
     size_type width,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -49,7 +50,8 @@ cpdef Column jaccard_index(
     cdef column_view c_input1 = input1.view()
     cdef column_view c_input2 = input2.view()
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -57,8 +59,8 @@ cpdef Column jaccard_index(
             c_input1,
             c_input2,
             width,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
index 0647337324d..f26b1e30245 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stdint cimport uint32_t, uint64_t
@@ -6,7 +6,6 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -18,7 +17,7 @@ cpdef Column minhash(
     Column a,
     Column b,
     size_type width,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
@@ -28,7 +27,7 @@ cpdef Column minhash64(
     Column a,
     Column b,
     size_type width,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
@@ -38,7 +37,7 @@ cpdef Column minhash_ngrams(
     uint32_t seed,
     Column a,
     Column b,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
@@ -48,6 +47,6 @@ cpdef Column minhash64_ngrams(
     uint64_t seed,
     Column a,
     Column b,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
index ee924f8d7aa..5bce73dc991 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from typing import Any
@@ -6,9 +6,9 @@ from typing import Any
 import numpy as np
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def minhash(
     input: Column,
@@ -16,7 +16,7 @@ def minhash(
     a: Column,
     b: Column,
     width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def minhash64(
@@ -25,7 +25,7 @@ def minhash64(
     a: Column,
     b: Column,
     width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def minhash_ngrams(
@@ -34,7 +34,7 @@ def minhash_ngrams(
     seed: int | np.unsignedinteger[Any],
     a: Column,
     b: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def minhash64_ngrams(
@@ -43,6 +43,6 @@ def minhash64_ngrams(
     seed: int | np.unsignedinteger[Any],
     a: Column,
     b: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
index 1329d88060c..3029ed54c50 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stdint cimport uint32_t, uint64_t
@@ -16,6 +16,7 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "minhash",
@@ -30,7 +31,7 @@ cpdef Column minhash(
     Column a,
     Column b,
     size_type width,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """
@@ -58,7 +59,8 @@ cpdef Column minhash(
         List column of minhash values for each string per seed
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -68,11 +70,11 @@ cpdef Column minhash(
             a.view(),
             b.view(),
             width,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column minhash64(
     Column input,
@@ -80,7 +82,7 @@ cpdef Column minhash64(
     Column a,
     Column b,
     size_type width,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """
@@ -110,7 +112,8 @@ cpdef Column minhash64(
         List column of minhash values for each string per seed
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -120,11 +123,11 @@ cpdef Column minhash64(
             a.view(),
             b.view(),
             width,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column minhash_ngrams(
     Column input,
@@ -132,7 +135,7 @@ cpdef Column minhash_ngrams(
     uint32_t seed,
     Column a,
     Column b,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """
@@ -163,7 +166,8 @@ cpdef Column minhash_ngrams(
         value in columns a and b.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -173,11 +177,11 @@ cpdef Column minhash_ngrams(
             seed,
             a.view(),
             b.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column minhash64_ngrams(
     Column input,
@@ -185,7 +189,7 @@ cpdef Column minhash64_ngrams(
     uint64_t seed,
     Column a,
     Column b,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """
@@ -216,7 +220,8 @@ cpdef Column minhash64_ngrams(
         value in columns a and b.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -226,8 +231,8 @@ cpdef Column minhash64_ngrams(
             seed,
             a.view(),
             b.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd
index f410d778cb1..5deaa45c73f 100644
--- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column ngrams_tokenize(
@@ -13,6 +12,6 @@ cpdef Column ngrams_tokenize(
     size_type ngrams,
     Scalar delimiter,
     Scalar separator,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
index 1347b7e7087..99c309a21ff 100644
--- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
@@ -1,17 +1,17 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def ngrams_tokenize(
     input: Column,
     ngrams: int,
     delimiter: Scalar,
     separator: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
index f9f36244a1d..959c47d595d 100644
--- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -15,6 +15,7 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["ngrams_tokenize"]
 
@@ -23,7 +24,7 @@ cpdef Column ngrams_tokenize(
     size_type ngrams,
     Scalar delimiter,
     Scalar separator,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -52,7 +53,8 @@ cpdef Column ngrams_tokenize(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -61,7 +63,7 @@ cpdef Column ngrams_tokenize(
             ngrams,
             dereference(<const string_scalar*>delimiter.get()),
             dereference(<const string_scalar*>separator.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd
index 8c8623e07a3..30e459f75a5 100644
--- a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -6,16 +6,17 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.normalize cimport character_normalizer
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 cdef class CharacterNormalizer:
     cdef unique_ptr[character_normalizer] c_obj
 
-cpdef Column normalize_spaces(Column input, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef Column normalize_spaces(
+    Column input, object stream = *, DeviceMemoryResource mr=*
+)
 
 cpdef Column normalize_characters(
   Column input,
   CharacterNormalizer normalizer,
-  Stream stream=*,
+  object stream = *,
   DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
index 958adb10ada..0fbd2e7e725 100644
--- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
@@ -1,28 +1,28 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 class CharacterNormalizer:
     def __init__(
         self,
         do_lower_case: bool,
         special_tokens: Column,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ): ...
 
 def normalize_spaces(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def normalize_characters(
     input: Column,
     normalizer: CharacterNormalizer,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
index 5f62189f2f5..8e29aad9121 100644
--- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.nvtext cimport normalize as cpp_normalize
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "CharacterNormalizer"
@@ -28,18 +29,19 @@ cdef class CharacterNormalizer:
         self,
         bool do_lower_case,
         Column tokens,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         cdef column_view c_tokens = tokens.view()
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
             self.c_obj = move(
                 cpp_normalize.create_character_normalizer(
                     do_lower_case,
                     c_tokens,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
@@ -47,7 +49,7 @@ cdef class CharacterNormalizer:
     __hash__ = None
 
 cpdef Column normalize_spaces(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new strings column by normalizing the whitespace in
@@ -68,21 +70,22 @@ cpdef Column normalize_spaces(
         New strings columns of normalized strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_normalize.normalize_spaces(
-            input.view(), stream.view(), mr.get_mr()
+            input.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column normalize_characters(
     Column input,
     CharacterNormalizer normalizer,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -105,15 +108,16 @@ cpdef Column normalize_characters(
         Normalized strings column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_normalize.normalize_characters(
             input.view(),
             dereference(normalizer.c_obj.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/nvtext/replace.pxd
index c6a9ed5ba67..1265f75a514 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column replace_tokens(
@@ -13,7 +12,7 @@ cpdef Column replace_tokens(
     Column targets,
     Column replacements,
     Scalar delimiter=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -22,6 +21,6 @@ cpdef Column filter_tokens(
     size_type min_token_length,
     Scalar replacement=*,
     Scalar delimiter=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyi b/python/pylibcudf/pylibcudf/nvtext/replace.pyi
index 09187c1edf1..a5e451cdb16 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyi
@@ -1,18 +1,18 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def replace_tokens(
     input: Column,
     targets: Column,
     replacements: Column,
     delimiter: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def filter_tokens(
@@ -20,6 +20,6 @@ def filter_tokens(
     min_token_length: int,
     replacement: Scalar | None = None,
     delimiter: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
index db375e6993f..4b00d76bd64 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["filter_tokens", "replace_tokens"]
 
@@ -27,7 +28,7 @@ cpdef Column replace_tokens(
     Column targets,
     Column replacements,
     Scalar delimiter=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -55,11 +56,12 @@ cpdef Column replace_tokens(
         New strings column with replaced strings
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
     with nogil:
         c_result = cpp_replace_tokens(
@@ -67,10 +69,10 @@ cpdef Column replace_tokens(
             targets.view(),
             replacements.view(),
             dereference(<const string_scalar*>delimiter.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column filter_tokens(
@@ -78,7 +80,7 @@ cpdef Column filter_tokens(
     size_type min_token_length,
     Scalar replacement=None,
     Scalar delimiter=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -107,15 +109,16 @@ cpdef Column filter_tokens(
         New strings column of filtered strings
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
     if replacement is None:
         replacement = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     with nogil:
@@ -124,8 +127,8 @@ cpdef Column filter_tokens(
             min_token_length,
             dereference(<const string_scalar*>replacement.get()),
             dereference(<const string_scalar*>delimiter.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd
index 0b19c699ea8..d9f9ef1549c 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -6,7 +6,6 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.stemmer cimport letter_type
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrSize:
     Column
@@ -16,10 +15,10 @@ cpdef Column is_letter(
     Column input,
     bool check_vowels,
     ColumnOrSize indices,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column porter_stemmer_measure(
-    Column input, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
index ae53ce887a4..5fef689a895 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
@@ -1,20 +1,20 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def is_letter(
     input: Column,
     check_vowels: bool,
     indices: Column | int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def porter_stemmer_measure(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
index 44dc6be5c60..e038cd03fb2 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -18,6 +18,7 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from pylibcudf.libcudf.nvtext.stemmer import letter_type as LetterType # no-cython-lint
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["is_letter", "porter_stemmer_measure", "LetterType"]
 
@@ -25,7 +26,7 @@ cpdef Column is_letter(
     Column input,
     bool check_vowels,
     ColumnOrSize indices,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -55,7 +56,8 @@ cpdef Column is_letter(
         New boolean column.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -63,14 +65,14 @@ cpdef Column is_letter(
             input.view(),
             letter_type.VOWEL if check_vowels else letter_type.CONSONANT,
             indices if ColumnOrSize is size_type else indices.view(),
-            stream.view()
+            _cs
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column porter_stemmer_measure(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns the Porter Stemmer measurements of a strings column.
@@ -92,12 +94,13 @@ cpdef Column porter_stemmer_measure(
         New column of measure values
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_porter_stemmer_measure(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_porter_stemmer_measure(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 LetterType.__str__ = LetterType.__repr__
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd
index 2ad694d1eca..8346d420440 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -7,36 +7,35 @@ from pylibcudf.libcudf.nvtext.tokenize cimport tokenize_vocabulary
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 cdef class TokenizeVocabulary:
     cdef unique_ptr[tokenize_vocabulary] c_obj
 
 cpdef Column tokenize_scalar(
-    Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column tokenize_column(
-    Column input, Column delimiters, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Column delimiters, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column count_tokens_scalar(
-    Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column count_tokens_column(
-    Column input, Column delimiters, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Column delimiters, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column character_tokenize(
-    Column input, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column detokenize(
     Column input,
     Column row_indices,
     Scalar separator=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -45,6 +44,6 @@ cpdef Column tokenize_with_vocabulary(
     TokenizeVocabulary vocabulary,
     Scalar delimiter,
     size_type default_id=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
index c6e2d4cfcb4..72a5209902e 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
@@ -1,54 +1,54 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 class TokenizeVocabulary:
     def __init__(
         self,
         vocab: Column,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ): ...
 
 def tokenize_scalar(
     input: Column,
     delimiter: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def tokenize_column(
     input: Column,
     delimiters: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def count_tokens_scalar(
     input: Column,
     delimiter: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def count_tokens_column(
     input: Column,
     delimiters: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def character_tokenize(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def detokenize(
     input: Column,
     row_indices: Column,
     separator: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def tokenize_with_vocabulary(
@@ -56,6 +56,6 @@ def tokenize_with_vocabulary(
     vocabulary: TokenizeVocabulary,
     delimiter: Scalar,
     default_id: int = -1,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
index e296ea38a58..4e44d781cc4 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -24,6 +24,7 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "TokenizeVocabulary",
@@ -41,19 +42,20 @@ cdef class TokenizeVocabulary:
 
     For details, see :cpp:class:`cudf::nvtext::tokenize_vocabulary`.
     """
-    def __cinit__(self, Column vocab, Stream stream=None, DeviceMemoryResource mr=None):
+    def __cinit__(self, Column vocab, object stream=None, DeviceMemoryResource mr=None):
         cdef column_view c_vocab = vocab.view()
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
-            self.c_obj = move(cpp_load_vocabulary(c_vocab, stream.view(), mr.get_mr()))
+            self.c_obj = move(cpp_load_vocabulary(c_vocab, _cs, mr.get_mr()))
 
     __hash__ = None
 
 cpdef Column tokenize_scalar(
     Column input,
     Scalar delimiter=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -77,26 +79,27 @@ cpdef Column tokenize_scalar(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     with nogil:
         c_result = cpp_tokenize(
             input.view(),
             dereference(<const string_scalar*>delimiter.c_obj.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column tokenize_column(
-    Column input, Column delimiters, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, Column delimiters, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a single column of strings by tokenizing the input
@@ -119,23 +122,24 @@ cpdef Column tokenize_column(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_tokenize(
             input.view(),
             delimiters.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column count_tokens_scalar(
     Column input,
     Scalar delimiter=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -159,26 +163,27 @@ cpdef Column count_tokens_scalar(
         New column of token counts
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     with nogil:
         c_result = cpp_count_tokens(
             input.view(),
             dereference(<const string_scalar*>delimiter.c_obj.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column count_tokens_column(
-    Column input, Column delimiters, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, Column delimiters, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns the number of tokens in each string of a strings column
@@ -201,21 +206,22 @@ cpdef Column count_tokens_column(
         New column of token counts
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_count_tokens(
             input.view(),
             delimiters.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column character_tokenize(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a single column of strings by converting
@@ -236,18 +242,19 @@ cpdef Column character_tokenize(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = cpp_character_tokenize(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_character_tokenize(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column detokenize(
     Column input,
     Column row_indices,
     Scalar separator=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -273,12 +280,13 @@ cpdef Column detokenize(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if separator is None:
         separator = Scalar.from_libcudf(
-            cpp_make_string_scalar(" ".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar(" ".encode(), _stream.view().value(), mr.get_mr())
         )
 
     with nogil:
@@ -286,18 +294,18 @@ cpdef Column detokenize(
             input.view(),
             row_indices.view(),
             dereference(<const string_scalar*>separator.c_obj.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column tokenize_with_vocabulary(
     Column input,
     TokenizeVocabulary vocabulary,
     Scalar delimiter,
     size_type default_id=-1,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -325,7 +333,8 @@ cpdef Column tokenize_with_vocabulary(
         Lists column of token ids
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -334,8 +343,8 @@ cpdef Column tokenize_with_vocabulary(
             dereference(vocabulary.c_obj.get()),
             dereference(<const string_scalar*>delimiter.c_obj.get()),
             default_id,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd
index 3f7685903e0..604a566c701 100644
--- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -6,7 +6,6 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.wordpiece_tokenize cimport wordpiece_vocabulary
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 cdef class WordPieceVocabulary:
     cdef unique_ptr[wordpiece_vocabulary] c_obj
@@ -15,6 +14,6 @@ cpdef Column wordpiece_tokenize(
     Column input,
     WordPieceVocabulary vocabulary,
     size_type max_words_per_row,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi
index e91cfc8f21e..e77a8c86a69 100644
--- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi
@@ -1,16 +1,16 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 class WordPieceVocabulary:
     def __init__(
         self,
         vocab: Column,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ): ...
 
@@ -18,6 +18,6 @@ def wordpiece_tokenize(
     input: Column,
     vocabulary: WordPieceVocabulary,
     max_words_per_row: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx
index b6c516cf739..dfdb563087d 100644
--- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -15,6 +15,7 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "WordPieceVocabulary",
@@ -29,15 +30,16 @@ cdef class WordPieceVocabulary:
     def __cinit__(
         self,
         Column vocab,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ):
         cdef column_view c_vocab = vocab.view()
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
         with nogil:
             self.c_obj = move(cpp_load_wordpiece_vocabulary(
-                c_vocab, stream.view(), mr.get_mr()
+                c_vocab, _cs, mr.get_mr()
             ))
 
     __hash__ = None
@@ -46,7 +48,7 @@ cpdef Column wordpiece_tokenize(
     Column input,
     WordPieceVocabulary vocabulary,
     size_type max_words_per_row,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -73,7 +75,8 @@ cpdef Column wordpiece_tokenize(
         Lists column of token ids
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -81,8 +84,8 @@ cpdef Column wordpiece_tokenize(
             input.view(),
             dereference(vocabulary.c_obj.get()),
             max_words_per_row,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/partitioning.pxd
index 096b4eb99e8..84c9b647691 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/partitioning.pxd
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from libc.stdint cimport uint32_t
@@ -20,7 +19,7 @@ cpdef tuple[Table, list] hash_partition(
     int num_partitions,
     hash_id hash_function = *,
     uint32_t seed = *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -28,7 +27,7 @@ cpdef tuple[Table, list] partition(
     Table t,
     Column partition_map,
     int num_partitions,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -36,6 +35,6 @@ cpdef tuple[Table, list] round_robin_partition(
     Table input,
     int num_partitions,
     int start_partition=*,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyi b/python/pylibcudf/pylibcudf/partitioning.pyi
index 081ee53731f..971346421ea 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyi
+++ b/python/pylibcudf/pylibcudf/partitioning.pyi
@@ -4,10 +4,10 @@
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 class HashId(IntEnum):
     HASH_IDENTITY = ...
@@ -19,20 +19,20 @@ def hash_partition(
     num_partitions: int,
     hash_function: HashId = ...,
     seed: int = ...,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Table, list[int]]: ...
 def partition(
     t: Table,
     partition_map: Column,
     num_partitions: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Table, list[int]]: ...
 def round_robin_partition(
     input: Table,
     num_partitions: int,
     start_partition: int = 0,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Table, list[int]]: ...
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
index b8da9249656..62e35ab9cca 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyx
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = [
@@ -29,7 +30,7 @@ cpdef tuple[Table, list] hash_partition(
     int num_partitions,
     cpp_partitioning.hash_id hash_function = cpp_partitioning.hash_id.HASH_MURMUR3,
     uint32_t seed = cpp_partitioning.DEFAULT_HASH_SEED,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -63,7 +64,8 @@ cpdef tuple[Table, list] hash_partition(
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
     cdef int c_num_partitions = num_partitions
     cdef vector[libcudf_types.size_type] columns_to_hash
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     if TableOrList is Table:
         with nogil:
@@ -73,7 +75,7 @@ cpdef tuple[Table, list] hash_partition(
                 c_num_partitions,
                 hash_function,
                 seed,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
@@ -85,17 +87,17 @@ cpdef tuple[Table, list] hash_partition(
                 c_num_partitions,
                 hash_function,
                 seed,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-    return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second)
+    return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second)
 
 
 cpdef tuple[Table, list] partition(
     Table t,
     Column partition_map,
     int num_partitions,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -126,7 +128,8 @@ cpdef tuple[Table, list] partition(
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
     cdef int c_num_partitions = num_partitions
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -134,18 +137,18 @@ cpdef tuple[Table, list] partition(
             t.view(),
             partition_map.view(),
             c_num_partitions,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second)
+    return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second)
 
 
 cpdef tuple[Table, list] round_robin_partition(
     Table input,
     int num_partitions,
     int start_partition=0,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -176,7 +179,8 @@ cpdef tuple[Table, list] round_robin_partition(
     cdef int c_num_partitions = num_partitions
     cdef int c_start_partition = start_partition
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -184,8 +188,8 @@ cpdef tuple[Table, list] round_robin_partition(
             input.view(),
             c_num_partitions,
             c_start_partition,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second)
+    return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second)
diff --git a/python/pylibcudf/pylibcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/quantiles.pxd
index 9492ef8ce38..668e8015688 100644
--- a/python/pylibcudf/pylibcudf/quantiles.pxd
+++ b/python/pylibcudf/pylibcudf/quantiles.pxd
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.vector cimport vector
 from pylibcudf.libcudf.types cimport interpolation, sorted
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -15,7 +14,7 @@ cpdef Column quantile(
     interpolation interp = *,
     Column ordered_indices = *,
     bint exact = *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -26,6 +25,6 @@ cpdef Table quantiles(
     sorted is_input_sorted = *,
     list column_order = *,
     list null_precedence = *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/quantiles.pyi b/python/pylibcudf/pylibcudf/quantiles.pyi
index 2e414357651..9af646407ab 100644
--- a/python/pylibcudf/pylibcudf/quantiles.pyi
+++ b/python/pylibcudf/pylibcudf/quantiles.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from collections.abc import Iterable
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 from pylibcudf.types import Interpolation, NullOrder, Order, Sorted
+from pylibcudf.utils import CudaStreamLike
 
 def quantile(
     input: Column,
@@ -16,7 +16,7 @@ def quantile(
     interp: Interpolation = Interpolation.LINEAR,
     ordered_indices: Column | None = None,
     exact: bool = True,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def quantiles(
@@ -26,6 +26,6 @@ def quantiles(
     is_input_sorted: Sorted = Sorted.NO,
     column_order: list[Order] | None = None,
     null_precedence: list[NullOrder] | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx
index de1ee3344d3..f02643754cb 100644
--- a/python/pylibcudf/pylibcudf/quantiles.pyx
+++ b/python/pylibcudf/pylibcudf/quantiles.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -20,6 +20,7 @@ from .column cimport Column
 from .table cimport Table
 from .types cimport interpolation
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["quantile", "quantiles"]
 
@@ -29,7 +30,7 @@ cpdef Column quantile(
     interpolation interp = interpolation.LINEAR,
     Column ordered_indices = None,
     bool exact=True,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes quantiles with interpolation.
@@ -74,7 +75,8 @@ cpdef Column quantile(
     else:
         ordered_indices_view = ordered_indices.view()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -84,11 +86,11 @@ cpdef Column quantile(
             interp,
             ordered_indices_view,
             exact,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table quantiles(
@@ -98,7 +100,7 @@ cpdef Table quantiles(
     sorted is_input_sorted = sorted.NO,
     list column_order = None,
     list null_precedence = None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes row quantiles with interpolation.
@@ -156,7 +158,8 @@ cpdef Table quantiles(
     if null_precedence is not None:
         null_precedence_vec = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -167,8 +170,8 @@ cpdef Table quantiles(
             is_input_sorted,
             column_order_vec,
             null_precedence_vec,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/reduce.pxd b/python/pylibcudf/pylibcudf/reduce.pxd
index e9acd2aaed5..dc33d7053f4 100644
--- a/python/pylibcudf/pylibcudf/reduce.pxd
+++ b/python/pylibcudf/pylibcudf/reduce.pxd
@@ -4,7 +4,6 @@
 from libcpp cimport bool
 from pylibcudf.libcudf.reduce cimport scan_type
 from pylibcudf.libcudf.types cimport nan_policy, null_policy, size_type
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .aggregation cimport Aggregation
@@ -18,7 +17,7 @@ cpdef Scalar reduce(
     Aggregation agg,
     DataType data_type,
     Scalar init = *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -26,11 +25,11 @@ cpdef Column scan(
     Column col,
     Aggregation agg,
     scan_type inclusive,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
-cpdef tuple minmax(Column col, Stream stream = *, DeviceMemoryResource mr = *)
+cpdef tuple minmax(Column col, object stream = *, DeviceMemoryResource mr = *)
 
 cpdef bool is_valid_reduce_aggregation(DataType source, Aggregation agg)
 
@@ -38,12 +37,12 @@ cpdef size_type unique_count(
     Column source,
     null_policy null_handling,
     nan_policy nan_handling,
-    Stream stream = *
+    object stream = *
 )
 
 cpdef size_type distinct_count(
     Column source,
     null_policy null_handling,
     nan_policy nan_handling,
-    Stream stream = *
+    object stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi
index 5956b93661c..9e1c643b0cd 100644
--- a/python/pylibcudf/pylibcudf/reduce.pyi
+++ b/python/pylibcudf/pylibcudf/reduce.pyi
@@ -4,12 +4,12 @@
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.aggregation import Aggregation
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.types import DataType, NanPolicy, NullPolicy
+from pylibcudf.utils import CudaStreamLike
 
 class ScanType(IntEnum):
     INCLUSIVE = ...
@@ -19,19 +19,19 @@ def reduce(
     col: Column,
     agg: Aggregation,
     data_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Scalar: ...
 def scan(
     col: Column,
     agg: Aggregation,
     inclusive: ScanType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def minmax(
     col: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Scalar, Scalar]: ...
 def is_valid_reduce_aggregation(
@@ -41,11 +41,11 @@ def unique_count(
     source: Column,
     null_handling: NullPolicy,
     nan_handling: NanPolicy,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> int: ...
 def distinct_count(
     source: Column,
     null_handling: NullPolicy,
     nan_handling: NanPolicy,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx
index 54036b73e85..95c3555d021 100644
--- a/python/pylibcudf/pylibcudf/reduce.pyx
+++ b/python/pylibcudf/pylibcudf/reduce.pyx
@@ -31,6 +31,7 @@ from .types cimport DataType
 from .utils cimport _get_stream, _get_memory_resource
 
 from pylibcudf.libcudf.reduce import scan_type as ScanType  # no-cython-lint
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "ScanType",
@@ -47,7 +48,7 @@ cpdef Scalar reduce(
     Aggregation agg,
     DataType data_type,
     Scalar init=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a reduction on a column
@@ -79,7 +80,8 @@ cpdef Scalar reduce(
     cdef optional[reference_wrapper[constscalar]] c_init
     cdef const scalar* c_init_ptr
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if init is not None:
@@ -96,7 +98,7 @@ cpdef Scalar reduce(
             dereference(c_agg),
             data_type.c_obj,
             c_init,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return Scalar.from_libcudf(move(result))
@@ -106,7 +108,7 @@ cpdef Column scan(
     Column col,
     Aggregation agg,
     scan_type inclusive,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a scan on a column
@@ -134,7 +136,8 @@ cpdef Column scan(
     cdef unique_ptr[column] result
     cdef const scan_aggregation *c_agg = agg.view_underlying_as_scan()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -143,13 +146,13 @@ cpdef Column scan(
             dereference(c_agg),
             inclusive,
             null_policy.EXCLUDE,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
-cpdef tuple minmax(Column col, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef tuple minmax(Column col, object stream=None, DeviceMemoryResource mr=None):
     """Compute the minimum and maximum of a column
 
     For details, see ``cudf::minmax`` documentation.
@@ -173,11 +176,12 @@ cpdef tuple minmax(Column col, Stream stream=None, DeviceMemoryResource mr=None)
     cdef Scalar min_scalar
     cdef Scalar max_scalar
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_minmax(col.view(), stream.view(), mr.get_mr())
+        result = cpp_minmax(col.view(), _cs, mr.get_mr())
 
     min_scalar = Scalar.from_libcudf(move(result.first))
     max_scalar = Scalar.from_libcudf(move(result.second))
@@ -206,7 +210,7 @@ cpdef size_type unique_count(
     Column source,
     null_policy null_handling,
     nan_policy nan_handling,
-    Stream stream=None
+    object stream=None
 ):
     """Returns the number of unique consecutive elements in the input column.
 
@@ -231,10 +235,10 @@ cpdef size_type unique_count(
     If the input column is sorted, then unique_count can produce the
     same result as distinct_count, but faster.
     """
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
 
     return cpp_unique_count.unique_count(
-        source.view(), null_handling, nan_handling, stream.view()
+        source.view(), null_handling, nan_handling, _stream.view().value()
     )
 
 
@@ -242,7 +246,7 @@ cpdef size_type distinct_count(
     Column source,
     null_policy null_handling,
     nan_policy nan_handling,
-    Stream stream=None
+    object stream=None
 ):
     """Returns the number of distinct elements in the input column.
 
@@ -262,10 +266,10 @@ cpdef size_type distinct_count(
     size_type
         The number of distinct elements in the input column.
     """
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
 
     return cpp_distinct_count.distinct_count(
-        source.view(), null_handling, nan_handling, stream.view()
+        source.view(), null_handling, nan_handling, _stream.view().value()
     )
 
 
diff --git a/python/pylibcudf/pylibcudf/replace.pxd b/python/pylibcudf/pylibcudf/replace.pxd
index 49b57753eb1..7e78e92d514 100644
--- a/python/pylibcudf/pylibcudf/replace.pxd
+++ b/python/pylibcudf/pylibcudf/replace.pxd
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
 from pylibcudf.libcudf.replace cimport replace_policy
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
@@ -22,7 +21,7 @@ ctypedef fused ReplacementType:
 cpdef Column replace_nulls(
     Column source_column,
     ReplacementType replacement,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -30,7 +29,7 @@ cpdef Column find_and_replace_all(
     Column source_column,
     Column values_to_replace,
     Column replacement_values,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -40,13 +39,13 @@ cpdef Column clamp(
     Scalar hi,
     Scalar lo_replace=*,
     Scalar hi_replace=*,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column normalize_nans_and_zeros(
     Column source_column,
     bool inplace=*,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi
index d7a35721769..f74e06c3909 100644
--- a/python/pylibcudf/pylibcudf/replace.pyi
+++ b/python/pylibcudf/pylibcudf/replace.pyi
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 class ReplacePolicy(IntEnum):
     PRECEDING = ...
@@ -16,14 +16,14 @@ class ReplacePolicy(IntEnum):
 def replace_nulls(
     source_column: Column,
     replacement: Column | Scalar | ReplacePolicy,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def find_and_replace_all(
     source_column: Column,
     values_to_replace: Column,
     replacement_values: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def clamp(
@@ -32,12 +32,12 @@ def clamp(
     hi: Scalar,
     lo_replace: Scalar | None = None,
     hi_replace: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def normalize_nans_and_zeros(
     source_column: Column,
     inplace: bool = False,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx
index c3730e3971f..4a5cc162551 100644
--- a/python/pylibcudf/pylibcudf/replace.pyx
+++ b/python/pylibcudf/pylibcudf/replace.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -18,6 +18,7 @@ from pylibcudf.libcudf.replace import \
 from .column cimport Column
 from .scalar cimport Scalar
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "ReplacePolicy",
@@ -31,7 +32,7 @@ __all__ = [
 cpdef Column replace_nulls(
     Column source_column,
     ReplacementType replacement,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Replace nulls in source_column.
@@ -70,7 +71,8 @@ cpdef Column replace_nulls(
     cdef unique_ptr[column] c_result
     cdef replace_policy policy
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     # Due to https://github.com/cython/cython/issues/5984, if this function is
@@ -84,10 +86,10 @@ cpdef Column replace_nulls(
                 c_result = cpp_replace.replace_nulls(
                     source_column.view(),
                     policy,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
-            return Column.from_libcudf(move(c_result), stream, mr)
+            return Column.from_libcudf(move(c_result), _stream, mr)
         else:
             raise TypeError("replacement must be a Column, Scalar, or replace_policy")
 
@@ -96,33 +98,33 @@ cpdef Column replace_nulls(
             c_result = cpp_replace.replace_nulls(
                 source_column.view(),
                 replacement.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         elif ReplacementType is Scalar:
             c_result = cpp_replace.replace_nulls(
                 source_column.view(),
                 dereference(replacement.c_obj),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         elif ReplacementType is replace_policy:
             c_result = cpp_replace.replace_nulls(
                 source_column.view(),
                 replacement,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         else:
             assert False, "Internal error. Please contact pylibcudf developers"
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column find_and_replace_all(
     Column source_column,
     Column values_to_replace,
     Column replacement_values,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Replace all occurrences of values_to_replace with replacement_values.
@@ -150,7 +152,8 @@ cpdef Column find_and_replace_all(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -158,10 +161,10 @@ cpdef Column find_and_replace_all(
             source_column.view(),
             values_to_replace.view(),
             replacement_values.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column clamp(
@@ -170,7 +173,7 @@ cpdef Column clamp(
     Scalar hi,
     Scalar lo_replace=None,
     Scalar hi_replace=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Clamp the values in source_column to the range [lo, hi].
@@ -206,7 +209,8 @@ cpdef Column clamp(
 
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -215,7 +219,7 @@ cpdef Column clamp(
                 source_column.view(),
                 dereference(lo.c_obj),
                 dereference(hi.c_obj),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         else:
@@ -225,16 +229,16 @@ cpdef Column clamp(
                 dereference(lo_replace.c_obj),
                 dereference(hi.c_obj),
                 dereference(hi_replace.c_obj),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column normalize_nans_and_zeros(
     Column source_column,
     bool inplace=False,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Normalize NaNs and zeros in source_column.
@@ -260,24 +264,25 @@ cpdef Column normalize_nans_and_zeros(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         if inplace:
             cpp_replace.normalize_nans_and_zeros(
                 source_column.mutable_view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         else:
             c_result = cpp_replace.normalize_nans_and_zeros(
                 source_column.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
 
     if not inplace:
-        return Column.from_libcudf(move(c_result), stream, mr)
+        return Column.from_libcudf(move(c_result), _stream, mr)
 
 ReplacePolicy.__str__ = ReplacePolicy.__repr__
diff --git a/python/pylibcudf/pylibcudf/reshape.pxd b/python/pylibcudf/pylibcudf/reshape.pxd
index fd2eb9f31ec..09a111770b5 100644
--- a/python/pylibcudf/pylibcudf/reshape.pxd
+++ b/python/pylibcudf/pylibcudf/reshape.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stddef cimport size_t
@@ -6,7 +6,6 @@ from libc.stdint cimport uintptr_t
 
 from pylibcudf.libcudf.types cimport size_type
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
@@ -17,17 +16,17 @@ from .types cimport DataType
 
 
 cpdef Column interleave_columns(
-    Table source_table, Stream stream=*, DeviceMemoryResource mr=*
+    Table source_table, object stream = *, DeviceMemoryResource mr=*
 )
 cpdef Table tile(
     Table source_table,
     size_type count,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 cpdef void table_to_array(
     Table input_table,
     uintptr_t ptr,
     size_t size,
-    Stream stream=*
+    object stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/reshape.pyi b/python/pylibcudf/pylibcudf/reshape.pyi
index c8ca83be981..03acda18353 100644
--- a/python/pylibcudf/pylibcudf/reshape.pyi
+++ b/python/pylibcudf/pylibcudf/reshape.pyi
@@ -1,26 +1,26 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def interleave_columns(
     source_table: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def tile(
     source_table: Table,
     count: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def table_to_array(
     input_table: Table,
     ptr: int,
     size: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> None: ...
diff --git a/python/pylibcudf/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx
index b001b289794..a81dadf62ce 100644
--- a/python/pylibcudf/pylibcudf/reshape.pyx
+++ b/python/pylibcudf/pylibcudf/reshape.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libc.stddef cimport size_t
@@ -24,11 +24,12 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["interleave_columns", "tile", "table_to_array"]
 
 cpdef Column interleave_columns(
-    Table source_table, Stream stream=None, DeviceMemoryResource mr=None
+    Table source_table, object stream=None, DeviceMemoryResource mr=None
 ):
     """Interleave columns of a table into a single column.
 
@@ -55,21 +56,22 @@ cpdef Column interleave_columns(
         A new column which is the result of interleaving the input columns
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_interleave_columns(
-            source_table.view(), stream.view(), mr.get_mr()
+            source_table.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table tile(
     Table source_table,
     size_type count,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Repeats the rows from input table count times to form a new table.
@@ -93,22 +95,23 @@ cpdef Table tile(
         The table containing the tiled "rows"
     """
     cdef unique_ptr[table] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_tile(
-            source_table.view(), count, stream.view(), mr.get_mr()
+            source_table.view(), count, _cs, mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef void table_to_array(
     Table input_table,
     uintptr_t ptr,
     size_t size,
-    Stream stream=None
+    object stream=None
 ):
     """
     Copy a table into a preallocated column-major device array.
@@ -129,7 +132,8 @@ cpdef void table_to_array(
         raise ValueError(
             "Size exceeds the size_t limit."
         )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
 
     cdef device_span[byte] span = device_span[byte](
         <byte*> ptr, size
@@ -139,5 +143,5 @@ cpdef void table_to_array(
         cpp_table_to_array(
             input_table.view(),
             span,
-            stream.view()
+            _cs
         )
diff --git a/python/pylibcudf/pylibcudf/rolling.pxd b/python/pylibcudf/pylibcudf/rolling.pxd
index 5ea7dc747f4..94a6a8a6d89 100644
--- a/python/pylibcudf/pylibcudf/rolling.pxd
+++ b/python/pylibcudf/pylibcudf/rolling.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -8,7 +8,6 @@ from pylibcudf.libcudf.rolling cimport (
     bounded_closed, bounded_open, current_row, rolling_request, unbounded
 )
 from pylibcudf.libcudf.types cimport null_order, order, size_type
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .aggregation cimport Aggregation
@@ -63,7 +62,7 @@ cpdef Table grouped_range_rolling_window(
     PrecedingRangeWindowType preceding,
     FollowingRangeWindowType following,
     list requests,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -73,7 +72,7 @@ cpdef Column rolling_window(
     WindowType following_window,
     size_type min_periods,
     Aggregation agg,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -86,6 +85,6 @@ cpdef tuple make_range_windows(
     null_order null_order,
     PrecedingRangeWindowType preceding,
     FollowingRangeWindowType following,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/rolling.pyi b/python/pylibcudf/pylibcudf/rolling.pyi
index 239ce9ddbd8..883f62d0d3f 100644
--- a/python/pylibcudf/pylibcudf/rolling.pyi
+++ b/python/pylibcudf/pylibcudf/rolling.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.aggregation import Aggregation
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 from pylibcudf.types import DataType, NullOrder, Order
+from pylibcudf.utils import CudaStreamLike
 
 class Unbounded: ...
 class CurrentRow: ...
@@ -36,7 +36,7 @@ def grouped_range_rolling_window(
     preceding: RangeWindowType,
     following: RangeWindowType,
     requests: list[RollingRequest],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def rolling_window[WindowType: (Column, int)](
@@ -45,7 +45,7 @@ def rolling_window[WindowType: (Column, int)](
     following_window: WindowType,
     min_periods: int,
     agg: Aggregation,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_valid_rolling_aggregation(
@@ -58,6 +58,6 @@ def make_range_windows(
     null_order: NullOrder,
     preceding: RangeWindowType,
     following: RangeWindowType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
diff --git a/python/pylibcudf/pylibcudf/rolling.pyx b/python/pylibcudf/pylibcudf/rolling.pyx
index 73c10e53d57..ae9d7665d69 100644
--- a/python/pylibcudf/pylibcudf/rolling.pyx
+++ b/python/pylibcudf/pylibcudf/rolling.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -20,6 +20,7 @@ from .column cimport Column
 from .scalar cimport Scalar
 from .types cimport DataType
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 
 __all__ = [
@@ -125,7 +126,7 @@ cpdef Table grouped_range_rolling_window(
     PrecedingRangeWindowType preceding,
     FollowingRangeWindowType following,
     list requests,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -164,7 +165,8 @@ cpdef Table grouped_range_rolling_window(
     for req in requests:
         crequests.push_back(move((<RollingRequest?>req).view()))
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -176,10 +178,10 @@ cpdef Table grouped_range_rolling_window(
             dereference(preceding.c_obj.get()),
             dereference(following.c_obj.get()),
             crequests,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(result), stream, mr)
+    return Table.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column rolling_window(
@@ -188,7 +190,7 @@ cpdef Column rolling_window(
     WindowType following_window,
     size_type min_periods,
     Aggregation agg,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a rolling window operation on a column
@@ -224,7 +226,8 @@ cpdef Column rolling_window(
     # reclaim the GIL internally for just the necessary scope like column.view()
     cdef const rolling_aggregation *c_agg = agg.view_underlying_as_rolling()
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if WindowType is Column:
@@ -235,7 +238,7 @@ cpdef Column rolling_window(
                 following_window.view(),
                 min_periods,
                 dereference(c_agg),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
@@ -246,11 +249,11 @@ cpdef Column rolling_window(
                 following_window,
                 min_periods,
                 dereference(c_agg),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef bool is_valid_rolling_aggregation(DataType source, Aggregation agg):
@@ -278,7 +281,7 @@ cpdef tuple make_range_windows(
     null_order null_order,
     PrecedingRangeWindowType preceding,
     FollowingRangeWindowType following,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -308,7 +311,8 @@ cpdef tuple make_range_windows(
     """
     cdef pair[unique_ptr[column], unique_ptr[column]] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -319,10 +323,10 @@ cpdef tuple make_range_windows(
             null_order,
             dereference(preceding.c_obj.get()),
             dereference(following.c_obj.get()),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
     return (
-        Column.from_libcudf(move(result.first), stream, mr),
-        Column.from_libcudf(move(result.second), stream, mr)
+        Column.from_libcudf(move(result.first), _stream, mr),
+        Column.from_libcudf(move(result.second), _stream, mr)
     )
diff --git a/python/pylibcudf/pylibcudf/round.pxd b/python/pylibcudf/pylibcudf/round.pxd
index ecd72c62c0a..0ac0c22346f 100644
--- a/python/pylibcudf/pylibcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/round.pxd
@@ -5,7 +5,6 @@ from pylibcudf.libcudf.round cimport rounding_method
 
 from .column cimport Column
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
@@ -13,7 +12,7 @@ cpdef Column round(
     Column source,
     int32_t decimal_places = *,
     rounding_method round_method = *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *
 )
 
@@ -21,6 +20,6 @@ cpdef Column round_decimal(
     Column source,
     int32_t decimal_places = *,
     rounding_method round_method = *,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *
 )
diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi
index 848e43aeda7..30d08f234d5 100644
--- a/python/pylibcudf/pylibcudf/round.pyi
+++ b/python/pylibcudf/pylibcudf/round.pyi
@@ -4,9 +4,9 @@
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 class RoundingMethod(IntEnum):
     HALF_UP = ...
@@ -16,13 +16,13 @@ def round(
     source: Column,
     decimal_places: int = 0,
     round_method: RoundingMethod = RoundingMethod.HALF_UP,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def round_decimal(
     source: Column,
     decimal_places: int = 0,
     round_method: RoundingMethod = RoundingMethod.HALF_UP,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx
index 84a7ba6dbdf..f5baa6bbd23 100644
--- a/python/pylibcudf/pylibcudf/round.pyx
+++ b/python/pylibcudf/pylibcudf/round.pyx
@@ -19,6 +19,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["RoundingMethod", "round"]
 
@@ -26,7 +27,7 @@ cpdef Column round(
     Column source,
     int32_t decimal_places = 0,
     rounding_method round_method = rounding_method.HALF_UP,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Rounds all the values in a column to the specified number of decimal places.
@@ -58,7 +59,8 @@ cpdef Column round(
         A Column with values rounded
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -66,18 +68,18 @@ cpdef Column round(
             source.view(),
             decimal_places,
             round_method,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column round_decimal(
     Column source,
     int32_t decimal_places = 0,
     rounding_method round_method = rounding_method.HALF_UP,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """Rounds all the values in a column to the specified number of decimal places.
@@ -106,7 +108,8 @@ cpdef Column round_decimal(
         A Column with values rounded
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -114,10 +117,10 @@ cpdef Column round_decimal(
             source.view(),
             decimal_places,
             round_method,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 RoundingMethod.__str__ = RoundingMethod.__repr__
diff --git a/python/pylibcudf/pylibcudf/scalar.pxd b/python/pylibcudf/pylibcudf/scalar.pxd
index 5230c0316be..b628b9185a6 100644
--- a/python/pylibcudf/pylibcudf/scalar.pxd
+++ b/python/pylibcudf/pylibcudf/scalar.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -6,7 +6,6 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .types cimport DataType
@@ -24,10 +23,10 @@ cdef class Scalar:
     cdef const scalar* get(self) noexcept nogil
 
     cpdef DataType type(self)
-    cpdef bool is_valid(self, Stream stream=*)
+    cpdef bool is_valid(self, object stream = *)
 
     @staticmethod
-    cdef Scalar empty_like(Column column, Stream stream, DeviceMemoryResource mr)
+    cdef Scalar empty_like(Column column, object stream, DeviceMemoryResource mr)
 
     @staticmethod
     cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*)
diff --git a/python/pylibcudf/pylibcudf/scalar.pyi b/python/pylibcudf/pylibcudf/scalar.pyi
index ef940d8c021..a204894afd8 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyi
+++ b/python/pylibcudf/pylibcudf/scalar.pyi
@@ -3,11 +3,10 @@
 
 from typing import Any
 
-from rmm.pylibrmm.stream import Stream
-
 from pylibcudf._interop_helpers import ColumnMetadata
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 NpGeneric = type[Any]
 
@@ -16,31 +15,33 @@ PaScalar = type[Any]
 class Scalar:
     def __init__(self): ...
     def type(self) -> DataType: ...
-    def is_valid(self, stream: Stream) -> bool: ...
+    def is_valid(self, stream: CudaStreamLike) -> bool: ...
     @staticmethod
-    def empty_like(column: Column, stream: Stream | None = None) -> Scalar: ...
+    def empty_like(
+        column: Column, stream: CudaStreamLike | None = None
+    ) -> Scalar: ...
     def to_arrow(
         self,
         metadata: ColumnMetadata | str | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> PaScalar: ...
     @staticmethod
     def from_arrow(
         pa_val: Any,
         dtype: DataType | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> Scalar: ...
     @classmethod
     def from_py(
         cls,
         py_val: Any,
         dtype: DataType | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> Scalar: ...
     @classmethod
     def from_numpy(
-        cls, np_val: NpGeneric, stream: Stream | None = None
+        cls, np_val: NpGeneric, stream: CudaStreamLike | None = None
     ) -> Scalar: ...
     def to_py(
-        self, stream: Stream | None = None
+        self, stream: CudaStreamLike | None = None
     ) -> None | int | float | str | bool: ...
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 8771b4a75fd..54e088787a5 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -57,6 +57,7 @@ from rmm.pylibrmm.memory_resource cimport (
     get_current_device_resource,
 )
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 from .column cimport Column
 from .traits cimport is_floating_point
@@ -151,10 +152,11 @@ cdef class Scalar:
         """The type of data in the column."""
         return self._data_type
 
-    cpdef bool is_valid(self, Stream stream = None):
+    cpdef bool is_valid(self, object stream = None):
         """True if the scalar is valid, false if not"""
-        stream = _get_stream(stream)
-        return self.get().is_valid(stream.view())
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
+        return self.get().is_valid(_cs)
 
     def to_arrow(
         self,
@@ -176,7 +178,9 @@ cdef class Scalar:
         """
         # Note that metadata for scalars is primarily important for preserving
         # information on nested types since names are otherwise irrelevant.
-        return Column.from_scalar(self, 1, stream).to_arrow(metadata=metadata)[0]
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
+        return Column.from_scalar(self, 1, _stream).to_arrow(metadata=metadata)[0]
 
     @staticmethod
     def from_arrow(
@@ -205,7 +209,7 @@ cdef class Scalar:
         return _from_arrow(pa_val, dtype, stream)
 
     @staticmethod
-    cdef Scalar empty_like(Column column, Stream stream, DeviceMemoryResource mr):
+    cdef Scalar empty_like(Column column, object stream, DeviceMemoryResource mr):
         """Construct a null scalar with the same type as column.
 
         Parameters
@@ -221,8 +225,10 @@ cdef class Scalar:
         -------
         New empty (null) scalar of the given type.
         """
+        cdef Stream _stream = <Stream>stream
+        cdef cudaStream_t _cs = _stream.view().value()
         return Scalar.from_libcudf(
-            move(make_empty_scalar_like(column.view(), stream.view(), mr.get_mr()))
+            move(make_empty_scalar_like(column.view(), _cs, mr.get_mr()))
         )
 
     @staticmethod
@@ -266,9 +272,10 @@ cdef class Scalar:
         Scalar
             New pylibcudf.Scalar
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
-        return _from_py(py_val, dtype, stream, mr)
+        return _from_py(py_val, dtype, _stream, mr)
 
     @classmethod
     def from_numpy(
@@ -294,9 +301,10 @@ cdef class Scalar:
         Scalar
             New pylibcudf.Scalar
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
-        return _from_numpy(np_val, stream, mr)
+        return _from_numpy(np_val, _stream, mr)
 
     def to_py(self, stream: Stream | None = None):
         """
@@ -312,39 +320,40 @@ cdef class Scalar:
         Python scalar
             A Python scalar associated with the type of the Scalar.
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         if not self.is_valid(stream):
             return None
 
         cdef type_id tid = self.type().id()
         cdef const scalar* slr = self.c_obj.get()
         if tid == type_id.BOOL8:
-            return (<numeric_scalar[cbool]*>slr).value(stream.view())
+            return (<numeric_scalar[cbool]*>slr).value(_cs)
         elif tid == type_id.STRING:
-            return (<string_scalar*>slr).to_string(stream.view()).decode()
+            return (<string_scalar*>slr).to_string(_cs).decode()
         elif tid == type_id.FLOAT32:
-            return (<numeric_scalar[float]*>slr).value(stream.view())
+            return (<numeric_scalar[float]*>slr).value(_cs)
         elif tid == type_id.FLOAT64:
-            return (<numeric_scalar[double]*>slr).value(stream.view())
+            return (<numeric_scalar[double]*>slr).value(_cs)
         elif tid == type_id.INT8:
-            return (<numeric_scalar[int8_t]*>slr).value(stream.view())
+            return (<numeric_scalar[int8_t]*>slr).value(_cs)
         elif tid == type_id.INT16:
-            return (<numeric_scalar[int16_t]*>slr).value(stream.view())
+            return (<numeric_scalar[int16_t]*>slr).value(_cs)
         elif tid == type_id.INT32:
-            return (<numeric_scalar[int32_t]*>slr).value(stream.view())
+            return (<numeric_scalar[int32_t]*>slr).value(_cs)
         elif tid == type_id.INT64:
-            return (<numeric_scalar[int64_t]*>slr).value(stream.view())
+            return (<numeric_scalar[int64_t]*>slr).value(_cs)
         elif tid == type_id.UINT8:
-            return (<numeric_scalar[uint8_t]*>slr).value(stream.view())
+            return (<numeric_scalar[uint8_t]*>slr).value(_cs)
         elif tid == type_id.UINT16:
-            return (<numeric_scalar[uint16_t]*>slr).value(stream.view())
+            return (<numeric_scalar[uint16_t]*>slr).value(_cs)
         elif tid == type_id.UINT32:
-            return (<numeric_scalar[uint32_t]*>slr).value(stream.view())
+            return (<numeric_scalar[uint32_t]*>slr).value(_cs)
         elif tid == type_id.UINT64:
-            return (<numeric_scalar[uint64_t]*>slr).value(stream.view())
+            return (<numeric_scalar[uint64_t]*>slr).value(_cs)
         elif tid == type_id.DECIMAL128:
             return decimal.Decimal(
-                (<fixed_point_scalar[decimal128]*>slr).value(stream.view()).value()
+                (<fixed_point_scalar[decimal128]*>slr).value(_cs).value()
             ).scaleb(
                 (<fixed_point_scalar[decimal128]*>slr).type().scale()
             )
@@ -375,6 +384,8 @@ def _from_py(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     cdef DataType c_dtype
     if dtype is None:
         raise ValueError("Must specify a dtype for a None value.")
@@ -382,7 +393,7 @@ def _(
         c_dtype = <DataType>dtype
     cdef unique_ptr[scalar] c_obj = make_default_constructed_scalar(
         c_dtype.c_obj,
-        stream.view(),
+        _cs,
         mr.get_mr()
     )
     return _new_scalar(move(c_obj), dtype)
@@ -402,6 +413,8 @@ def _(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     cdef unique_ptr[scalar] c_obj
     cdef DataType c_dtype
     if dtype is None:
@@ -414,11 +427,11 @@ def _(
     if tid == type_id.FLOAT32:
         if abs(py_val) > numeric_limits[float].max():
             raise OverflowError(f"{py_val} out of range for FLOAT32 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[float]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[float]*>c_obj.get()).set_value(py_val, _cs)
     elif tid == type_id.FLOAT64:
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[double]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[double]*>c_obj.get()).set_value(py_val, _cs)
     else:
         typ = c_dtype.id()
         raise TypeError(f"Cannot convert float to Scalar with dtype {typ.name}")
@@ -430,6 +443,8 @@ def _(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     cdef unique_ptr[scalar] c_obj
     cdef DataType c_dtype
     cdef duration_ns c_duration_ns
@@ -440,7 +455,7 @@ def _(
     if dtype is None:
         c_dtype = dtype = DataType(type_id.INT64)
     elif is_floating_point(dtype):
-        return _from_py(float(py_val), dtype, stream, mr)
+        return _from_py(float(py_val), dtype, _stream, mr)
     else:
         c_dtype = <DataType>dtype
     cdef type_id tid = c_dtype.id()
@@ -450,80 +465,80 @@ def _(
             numeric_limits[int8_t].min() <= py_val <= numeric_limits[int8_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT8 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[int8_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[int8_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.INT16:
         if not (
             numeric_limits[int16_t].min() <= py_val <= numeric_limits[int16_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT16 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[int16_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[int16_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.INT32:
         if not (
             numeric_limits[int32_t].min() <= py_val <= numeric_limits[int32_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT32 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[int32_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[int32_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.INT64:
         if not (
             numeric_limits[int64_t].min() <= py_val <= numeric_limits[int64_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT64 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[int64_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[int64_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.UINT8:
         if py_val < 0:
             raise ValueError("Cannot assign negative value to UINT8 scalar")
         if py_val > numeric_limits[uint8_t].max():
             raise OverflowError(f"{py_val} out of range for UINT8 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[uint8_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[uint8_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.UINT16:
         if py_val < 0:
             raise ValueError("Cannot assign negative value to UINT16 scalar")
         if py_val > numeric_limits[uint16_t].max():
             raise OverflowError(f"{py_val} out of range for UINT16 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[uint16_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[uint16_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.UINT32:
         if py_val < 0:
             raise ValueError("Cannot assign negative value to UINT32 scalar")
         if py_val > numeric_limits[uint32_t].max():
             raise OverflowError(f"{py_val} out of range for UINT32 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[uint32_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[uint32_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.UINT64:
         if py_val < 0:
             raise ValueError("Cannot assign negative value to UINT64 scalar")
         if py_val > numeric_limits[uint64_t].max():
             raise OverflowError(f"{py_val} out of range for UINT64 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[uint64_t]*>c_obj.get()).set_value(py_val, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[uint64_t]*>c_obj.get()).set_value(py_val, _cs)
 
     elif tid == type_id.BOOL8:
         if py_val not in (0, 1):
             raise ValueError(f"Cannot convert {py_val} to BOOL8 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
-        (<numeric_scalar[cbool]*>c_obj.get()).set_value(py_val != 0, stream.view())
+        c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr())
+        (<numeric_scalar[cbool]*>c_obj.get()).set_value(py_val != 0, _cs)
 
     elif tid == type_id.DURATION_NANOSECONDS:
         if py_val > numeric_limits[int64_t].max():
             raise OverflowError(
                 f"{py_val} nanoseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_ns = duration_ns(<int64_t>py_val)
         (<duration_scalar[duration_ns]*>c_obj.get()).set_value(
-            c_duration_ns, stream.view()
+            c_duration_ns, _cs
         )
 
     elif tid == type_id.DURATION_MICROSECONDS:
@@ -531,10 +546,10 @@ def _(
             raise OverflowError(
                 f"{py_val} microseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_us = duration_us(<int64_t>py_val)
         (<duration_scalar[duration_us]*>c_obj.get()).set_value(
-            c_duration_us, stream.view()
+            c_duration_us, _cs
         )
 
     elif tid == type_id.DURATION_MILLISECONDS:
@@ -542,10 +557,10 @@ def _(
             raise OverflowError(
                 f"{py_val} milliseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_ms = duration_ms(<int64_t>py_val)
         (<duration_scalar[duration_ms]*>c_obj.get()).set_value(
-            c_duration_ms, stream.view()
+            c_duration_ms, _cs
         )
 
     elif tid == type_id.DURATION_SECONDS:
@@ -553,10 +568,10 @@ def _(
             raise OverflowError(
                 f"{py_val} seconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_s = duration_s(<int64_t>py_val)
         (<duration_scalar[duration_s]*>c_obj.get()).set_value(
-            c_duration_s, stream.view()
+            c_duration_s, _cs
         )
 
     elif tid == type_id.DURATION_DAYS:
@@ -564,10 +579,10 @@ def _(
             raise OverflowError(
                 f"{py_val} days out of range for INT32 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_D = duration_D(<int32_t>py_val)
         (<duration_scalar[duration_D]*>c_obj.get()).set_value(
-            c_duration_D, stream.view()
+            c_duration_D, _cs
         )
 
     else:
@@ -581,6 +596,8 @@ def _(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     if dtype is None:
         dtype = DataType(type_id.BOOL8)
     elif dtype.id() != type_id.BOOL8:
@@ -591,10 +608,10 @@ def _(
 
     cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
         (<DataType>dtype).c_obj,
-        stream.view(),
+        _cs,
         mr.get_mr()
     )
-    (<numeric_scalar[cbool]*>c_obj.get()).set_value(py_val, stream.view())
+    (<numeric_scalar[cbool]*>c_obj.get()).set_value(py_val, _cs)
     return _new_scalar(move(c_obj), dtype)
 
 
@@ -602,6 +619,8 @@ def _(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     if dtype is None:
         dtype = DataType(type_id.STRING)
     elif dtype.id() != type_id.STRING:
@@ -610,7 +629,7 @@ def _(
             f"Cannot convert str to Scalar with dtype {tid.name}"
         )
     cdef unique_ptr[scalar] c_obj = make_string_scalar(
-        py_val.encode(), stream.view(), mr.get_mr()
+        py_val.encode(), _cs, mr.get_mr()
     )
     return _new_scalar(move(c_obj), dtype)
 
@@ -619,6 +638,8 @@ def _(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     cdef unique_ptr[scalar] c_obj
     cdef duration_us c_duration_us
     cdef duration_ns c_duration_ns
@@ -637,10 +658,10 @@ def _(
             raise OverflowError(
                 f"{total_nanoseconds} nanoseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_ns = duration_ns(<int64_t>total_nanoseconds)
         (<duration_scalar[duration_ns]*>c_obj.get()).set_value(
-            c_duration_ns, stream.view()
+            c_duration_ns, _cs
         )
     elif tid == type_id.DURATION_MICROSECONDS:
         total_microseconds = int(total_seconds * 1_000_000)
@@ -648,10 +669,10 @@ def _(
             raise OverflowError(
                 f"{total_microseconds} microseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_us = duration_us(<int64_t>total_microseconds)
         (<duration_scalar[duration_us]*>c_obj.get()).set_value(
-            c_duration_us, stream.view()
+            c_duration_us, _cs
         )
     elif tid == type_id.DURATION_MILLISECONDS:
         total_milliseconds = int(total_seconds * 1_000)
@@ -659,10 +680,10 @@ def _(
             raise OverflowError(
                 f"{total_milliseconds} milliseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_ms = duration_ms(<int64_t>total_milliseconds)
         (<duration_scalar[duration_ms]*>c_obj.get()).set_value(
-            c_duration_ms, stream.view()
+            c_duration_ms, _cs
         )
     elif tid == type_id.DURATION_SECONDS:
         total_seconds = int(total_seconds)
@@ -670,10 +691,10 @@ def _(
             raise OverflowError(
                 f"{total_seconds} seconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_s = duration_s(<int64_t>total_seconds)
         (<duration_scalar[duration_s]*>c_obj.get()).set_value(
-            c_duration_s, stream.view()
+            c_duration_s, _cs
         )
     elif tid == type_id.DURATION_DAYS:
         total_days = int(total_seconds // 86400)
@@ -681,10 +702,10 @@ def _(
             raise OverflowError(
                 f"{total_days} days out of range for INT32 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_D = duration_D(<int32_t>total_days)
         (<duration_scalar[duration_D]*>c_obj.get()).set_value(
-            c_duration_D, stream.view()
+            c_duration_D, _cs
         )
     else:
         typ = c_dtype.id()
@@ -696,6 +717,8 @@ def _(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     cdef unique_ptr[scalar] c_obj
     cdef duration_us c_duration_us
     cdef duration_ns c_duration_ns
@@ -727,11 +750,11 @@ def _(
             raise OverflowError(
                 f"{epoch_nanoseconds} nanoseconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_ns = duration_ns(<int64_t>epoch_nanoseconds)
         c_timestamp_ns = timestamp_ns(c_duration_ns)
         (<timestamp_scalar[timestamp_ns]*>c_obj.get()).set_value(
-            c_timestamp_ns, stream.view()
+            c_timestamp_ns, _cs
         )
     elif tid == type_id.TIMESTAMP_MICROSECONDS:
         epoch_microseconds = int(epoch_seconds * 1_000_000)
@@ -739,11 +762,11 @@ def _(
             raise OverflowError(
                 f"{epoch_microseconds} microseconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_us = duration_us(<int64_t>epoch_microseconds)
         c_timestamp_us = timestamp_us(c_duration_us)
         (<timestamp_scalar[timestamp_us]*>c_obj.get()).set_value(
-            c_timestamp_us, stream.view()
+            c_timestamp_us, _cs
         )
     elif tid == type_id.TIMESTAMP_MILLISECONDS:
         epoch_milliseconds = int(epoch_seconds * 1_000)
@@ -751,11 +774,11 @@ def _(
             raise OverflowError(
                 f"{epoch_milliseconds} milliseconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_ms = duration_ms(<int64_t>epoch_milliseconds)
         c_timestamp_ms = timestamp_ms(c_duration_ms)
         (<timestamp_scalar[timestamp_ms]*>c_obj.get()).set_value(
-            c_timestamp_ms, stream.view()
+            c_timestamp_ms, _cs
         )
     elif tid == type_id.TIMESTAMP_SECONDS:
         epoch_seconds = int(epoch_seconds)
@@ -763,11 +786,11 @@ def _(
             raise OverflowError(
                 f"{epoch_seconds} seconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_s = duration_s(<int64_t>epoch_seconds)
         c_timestamp_s = timestamp_s(c_duration_s)
         (<timestamp_scalar[timestamp_s]*>c_obj.get()).set_value(
-            c_timestamp_s, stream.view()
+            c_timestamp_s, _cs
         )
     elif tid == type_id.TIMESTAMP_DAYS:
         epoch_days = int(epoch_seconds // 86400)
@@ -775,11 +798,11 @@ def _(
             raise OverflowError(
                 f"{epoch_days} days out of range for INT32 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr())
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr())
         c_duration_D = duration_D(<int32_t>epoch_days)
         c_timestamp_D = timestamp_D(c_duration_D)
         (<timestamp_scalar[timestamp_D]*>c_obj.get()).set_value(
-            c_timestamp_D, stream.view()
+            c_timestamp_D, _cs
         )
     else:
         typ = c_dtype.id()
@@ -791,6 +814,8 @@ def _(
 def _(
     py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource
 ):
+    cdef Stream _stream = stream
+    cdef cudaStream_t _cs = _stream.view().value()
     scale = py_val.as_tuple().exponent
     as_int = int(py_val.scaleb(-scale))
 
@@ -804,7 +829,7 @@ def _(
     cdef unique_ptr[scalar] c_obj = make_fixed_point_scalar[decimal128](
         val,
         scale_type(<int32_t>scale),
-        stream.view(),
+        _cs,
         mr.get_mr()
     )
     return _new_scalar(move(c_obj), dtype)
@@ -829,21 +854,25 @@ if np is not None:
 
     @_from_numpy.register(np.bool_)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         cdef DataType dtype = DataType(type_id.BOOL8)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
         cdef cbool c_val = np_val
-        (<numeric_scalar[cbool]*>c_obj.get()).set_value(c_val, stream.view())
+        (<numeric_scalar[cbool]*>c_obj.get()).set_value(c_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.str_)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         cdef DataType dtype = DataType(type_id.STRING)
         cdef unique_ptr[scalar] c_obj = make_string_scalar(
             np_val.item().encode(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
@@ -851,101 +880,121 @@ if np is not None:
 
     @_from_numpy.register(np.int8)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.INT8)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[int8_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[int8_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.int16)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.INT16)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[int16_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[int16_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.int32)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.INT32)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[int32_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[int32_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.int64)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.INT64)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[int64_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[int64_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint8)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.UINT8)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[uint8_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[uint8_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint16)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.UINT16)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[uint16_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[uint16_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint32)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.UINT32)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[uint32_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[uint32_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint64)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.UINT64)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[uint64_t]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[uint64_t]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.float32)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.FLOAT32)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[float]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[float]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.float64)
     def _(np_val, stream: Stream, mr: DeviceMemoryResource):
+        cdef Stream _stream = stream
+        cdef cudaStream_t _cs = _stream.view().value()
         dtype = DataType(type_id.FLOAT64)
         cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
-            dtype.c_obj, stream.view(), mr.get_mr()
+            dtype.c_obj, _cs, mr.get_mr()
         )
-        (<numeric_scalar[double]*>c_obj.get()).set_value(np_val, stream.view())
+        (<numeric_scalar[double]*>c_obj.get()).set_value(np_val, _cs)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
diff --git a/python/pylibcudf/pylibcudf/search.pxd b/python/pylibcudf/pylibcudf/search.pxd
index 7b0725bf60b..c26a6689240 100644
--- a/python/pylibcudf/pylibcudf/search.pxd
+++ b/python/pylibcudf/pylibcudf/search.pxd
@@ -1,7 +1,6 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
@@ -13,7 +12,7 @@ cpdef Column lower_bound(
     Table needles,
     list column_order,
     list null_precedence,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -22,10 +21,10 @@ cpdef Column upper_bound(
     Table needles,
     list column_order,
     list null_precedence,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Column contains(
-    Column haystack, Column needles, Stream stream = *, DeviceMemoryResource mr = *
+    Column haystack, Column needles, object stream = *, DeviceMemoryResource mr = *
 )
diff --git a/python/pylibcudf/pylibcudf/search.pyi b/python/pylibcudf/pylibcudf/search.pyi
index eaec283a32a..6cc58946f56 100644
--- a/python/pylibcudf/pylibcudf/search.pyi
+++ b/python/pylibcudf/pylibcudf/search.pyi
@@ -1,19 +1,19 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 from pylibcudf.types import NullOrder, Order
+from pylibcudf.utils import CudaStreamLike
 
 def lower_bound(
     haystack: Table,
     needles: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def upper_bound(
@@ -21,12 +21,12 @@ def upper_bound(
     needles: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def contains(
     haystack: Column,
     needles: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx
index 4915b1b8be9..885d25f2d49 100644
--- a/python/pylibcudf/pylibcudf/search.pyx
+++ b/python/pylibcudf/pylibcudf/search.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -13,6 +13,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["contains", "lower_bound", "upper_bound"]
 
@@ -21,7 +22,7 @@ cpdef Column lower_bound(
     Table needles,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Find smallest indices in haystack where needles may be inserted to retain order.
@@ -52,7 +53,8 @@ cpdef Column lower_bound(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -61,10 +63,10 @@ cpdef Column lower_bound(
             needles.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column upper_bound(
@@ -72,7 +74,7 @@ cpdef Column upper_bound(
     Table needles,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Find largest indices in haystack where needles may be inserted to retain order.
@@ -103,7 +105,8 @@ cpdef Column upper_bound(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -112,14 +115,14 @@ cpdef Column upper_bound(
             needles.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column contains(
-    Column haystack, Column needles, Stream stream=None, DeviceMemoryResource mr=None
+    Column haystack, Column needles, object stream=None, DeviceMemoryResource mr=None
 ):
     """Check whether needles are present in haystack.
 
@@ -143,14 +146,15 @@ cpdef Column contains(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_search.contains(
             haystack.view(),
             needles.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/sorting.pxd b/python/pylibcudf/pylibcudf/sorting.pxd
index 701b6803c34..a081ece747a 100644
--- a/python/pylibcudf/pylibcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/sorting.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
 from pylibcudf.libcudf.aggregation cimport rank_method
 from pylibcudf.libcudf.types cimport null_order, null_policy, order, size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -15,7 +14,7 @@ cpdef Column sorted_order(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -23,7 +22,7 @@ cpdef Column stable_sorted_order(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -34,12 +33,12 @@ cpdef Column rank(
     null_policy null_handling,
     null_order null_precedence,
     bool percentage,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef bool is_sorted(
-    Table table, list column_order, list null_precedence, Stream stream=*
+    Table table, list column_order, list null_precedence, object stream = *
 )
 
 cpdef Table segmented_sort_by_key(
@@ -48,7 +47,7 @@ cpdef Table segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -58,7 +57,7 @@ cpdef Table stable_segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -67,7 +66,7 @@ cpdef Table sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -76,7 +75,7 @@ cpdef Table stable_sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -84,7 +83,7 @@ cpdef Table sort(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -92,7 +91,7 @@ cpdef Table stable_sort(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -100,7 +99,7 @@ cpdef Column top_k(
     Column col,
     size_type k,
     order sort_order=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -108,6 +107,6 @@ cpdef Column top_k_order(
     Column col,
     size_type k,
     order sort_order=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi
index 8f00fcade6e..a06586a8f39 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyi
+++ b/python/pylibcudf/pylibcudf/sorting.pyi
@@ -1,26 +1,26 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.aggregation import RankMethod
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 from pylibcudf.types import NullOrder, NullPolicy, Order
+from pylibcudf.utils import CudaStreamLike
 
 def sorted_order(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def stable_sorted_order(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def rank(
@@ -30,14 +30,14 @@ def rank(
     null_handling: NullPolicy,
     null_precedence: NullOrder,
     percentage: bool,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_sorted(
     tbl: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
 ) -> bool: ...
 def segmented_sort_by_key(
     values: Table,
@@ -45,7 +45,7 @@ def segmented_sort_by_key(
     segment_offsets: Column,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def stable_segmented_sort_by_key(
@@ -54,7 +54,7 @@ def stable_segmented_sort_by_key(
     segment_offsets: Column,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def sort_by_key(
@@ -62,7 +62,7 @@ def sort_by_key(
     keys: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def stable_sort_by_key(
@@ -70,34 +70,34 @@ def stable_sort_by_key(
     keys: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def sort(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def stable_sort(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def top_k(
     col: Column,
     k: int,
     sort_order: Order = Order.DESCENDING,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def top_k_order(
     col: Column,
     k: int,
     sort_order: Order = Order.DESCENDING,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx
index be668ff2526..fa0ed78b709 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyx
+++ b/python/pylibcudf/pylibcudf/sorting.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -15,6 +15,7 @@ from rmm.pylibrmm.stream cimport Stream
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "is_sorted",
@@ -33,7 +34,7 @@ cpdef Column sorted_order(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the row indices required to sort the table.
@@ -58,7 +59,8 @@ cpdef Column sorted_order(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -66,17 +68,17 @@ cpdef Column sorted_order(
             source_table.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column stable_sorted_order(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the row indices required to sort the table,
@@ -102,7 +104,8 @@ cpdef Column stable_sorted_order(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -110,10 +113,10 @@ cpdef Column stable_sorted_order(
             source_table.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column rank(
@@ -123,7 +126,7 @@ cpdef Column rank(
     null_policy null_handling,
     null_order null_precedence,
     bool percentage,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Computes the rank of each element in the column.
@@ -152,7 +155,8 @@ cpdef Column rank(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -163,14 +167,14 @@ cpdef Column rank(
             null_handling,
             null_precedence,
             percentage,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef bool is_sorted(
-    Table tbl, list column_order, list null_precedence, Stream stream=None
+    Table tbl, list column_order, list null_precedence, object stream=None
 ):
     """Checks if the table is sorted.
 
@@ -194,14 +198,15 @@ cpdef bool is_sorted(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
 
     with nogil:
         c_result = cpp_sorting.is_sorted(
             tbl.view(),
             c_orders,
             c_null_precedence,
-            stream.view()
+            _cs
         )
     return c_result
 
@@ -212,7 +217,7 @@ cpdef Table segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Sorts the table by key, within segments.
@@ -241,7 +246,8 @@ cpdef Table segmented_sort_by_key(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -251,10 +257,10 @@ cpdef Table segmented_sort_by_key(
             segment_offsets.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table stable_segmented_sort_by_key(
@@ -263,7 +269,7 @@ cpdef Table stable_segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Sorts the table by key preserving order of equal elements,
@@ -293,7 +299,8 @@ cpdef Table stable_segmented_sort_by_key(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -303,10 +310,10 @@ cpdef Table stable_segmented_sort_by_key(
             segment_offsets.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table sort_by_key(
@@ -314,7 +321,7 @@ cpdef Table sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Sorts the table by key.
@@ -341,7 +348,8 @@ cpdef Table sort_by_key(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -350,10 +358,10 @@ cpdef Table sort_by_key(
             keys.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table stable_sort_by_key(
@@ -361,7 +369,7 @@ cpdef Table stable_sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Sorts the table by key preserving order of equal elements.
@@ -388,7 +396,8 @@ cpdef Table stable_sort_by_key(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -397,17 +406,17 @@ cpdef Table stable_sort_by_key(
             keys.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table sort(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Sorts the table.
@@ -432,7 +441,8 @@ cpdef Table sort(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -440,17 +450,17 @@ cpdef Table sort(
             source_table.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table stable_sort(
     Table source_table,
     list column_order,
     list null_precedence,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Sorts the table preserving order of equal elements.
@@ -475,7 +485,8 @@ cpdef Table stable_sort(
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -483,17 +494,17 @@ cpdef Table stable_sort(
             source_table.view(),
             c_orders,
             c_null_precedence,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column top_k(
     Column col,
     size_type k,
     order sort_order = order.DESCENDING,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -518,7 +529,8 @@ cpdef Column top_k(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -526,17 +538,17 @@ cpdef Column top_k(
             col.view(),
             k,
             sort_order,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column top_k_order(
     Column col,
     size_type k,
     order sort_order = order.DESCENDING,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -564,7 +576,8 @@ cpdef Column top_k_order(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -572,7 +585,7 @@ cpdef Column top_k_order(
             col.view(),
             k,
             sort_order,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd
index 03b463f5f3a..6e904e11ce1 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd
@@ -8,7 +8,6 @@ from pylibcudf.libcudf.types cimport (
     size_type,
 )
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .expressions cimport Expression
@@ -19,7 +18,7 @@ cpdef Table drop_nulls(
     Table source_table,
     list keys,
     size_type keep_threshold,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -27,14 +26,14 @@ cpdef Table drop_nans(
     Table source_table,
     list keys,
     size_type keep_threshold,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef Table apply_boolean_mask(
     Table source_table,
     Column boolean_mask,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -43,7 +42,7 @@ cpdef Table unique(
     list keys,
     duplicate_keep_option keep,
     null_equality nulls_equal,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -53,7 +52,7 @@ cpdef Table distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -62,7 +61,7 @@ cpdef Column distinct_indices(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -72,7 +71,7 @@ cpdef Table stable_distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -80,6 +79,6 @@ cpdef Table filter(
     Table predicate_table,
     Expression predicate_expr,
     Table filter_table,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi
index 49c44f82486..afdd692dde2 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyi
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi
@@ -4,12 +4,12 @@
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.expressions import Expression
 from pylibcudf.table import Table
 from pylibcudf.types import NanEquality, NullEquality
+from pylibcudf.utils import CudaStreamLike
 
 class DuplicateKeepOption(IntEnum):
     KEEP_ANY = ...
@@ -21,20 +21,20 @@ def drop_nulls(
     source_table: Table,
     keys: list[int],
     keep_threshold: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def drop_nans(
     source_table: Table,
     keys: list[int],
     keep_threshold: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def apply_boolean_mask(
     source_table: Table,
     boolean_mask: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def unique(
@@ -42,7 +42,7 @@ def unique(
     keys: list[int],
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def distinct(
@@ -51,7 +51,7 @@ def distinct(
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
     nans_equal: NanEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def distinct_indices(
@@ -59,7 +59,7 @@ def distinct_indices(
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
     nans_equal: NanEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def stable_distinct(
@@ -68,13 +68,13 @@ def stable_distinct(
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
     nans_equal: NanEquality,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def filter(
     predicate_table: Table,
     predicate_expr: Expression,
     filter_table: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx
index 4e676602cf8..b4751078acb 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyx
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx
@@ -24,6 +24,7 @@ from .column cimport Column
 from .expressions cimport Expression
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "DuplicateKeepOption",
@@ -41,7 +42,7 @@ cpdef Table drop_nulls(
     Table source_table,
     list keys,
     size_type keep_threshold,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Filters out rows from the input table based on the presence of nulls.
@@ -65,21 +66,22 @@ cpdef Table drop_nulls(
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_stream_compaction.drop_nulls(
-            source_table.view(), c_keys, keep_threshold, stream.view(), mr.get_mr()
+            source_table.view(), c_keys, keep_threshold, _cs, mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table drop_nans(
     Table source_table,
     list keys,
     size_type keep_threshold,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Filters out rows from the input table based on the presence of NaNs.
@@ -103,20 +105,21 @@ cpdef Table drop_nans(
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_stream_compaction.drop_nans(
-            source_table.view(), c_keys, keep_threshold, stream.view(), mr.get_mr()
+            source_table.view(), c_keys, keep_threshold, _cs, mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table apply_boolean_mask(
     Table source_table,
     Column boolean_mask,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Filters out rows from the input table based on a boolean mask.
@@ -137,14 +140,15 @@ cpdef Table apply_boolean_mask(
     """
     cdef unique_ptr[table] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_stream_compaction.apply_boolean_mask(
-            source_table.view(), boolean_mask.view(), stream.view(), mr.get_mr()
+            source_table.view(), boolean_mask.view(), _cs, mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table unique(
@@ -152,7 +156,7 @@ cpdef Table unique(
     list keys,
     duplicate_keep_option keep,
     null_equality nulls_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Filter duplicate consecutive rows from the input table.
@@ -184,14 +188,15 @@ cpdef Table unique(
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_stream_compaction.unique(
-            input.view(), c_keys, keep, nulls_equal, stream.view(), mr.get_mr()
+            input.view(), c_keys, keep, nulls_equal, _cs, mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table distinct(
@@ -200,7 +205,7 @@ cpdef Table distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Get the distinct rows from the input table.
@@ -229,15 +234,16 @@ cpdef Table distinct(
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_stream_compaction.distinct(
-            input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view(),
+            input.view(), c_keys, keep, nulls_equal, nans_equal, _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column distinct_indices(
@@ -245,7 +251,7 @@ cpdef Column distinct_indices(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Get the indices of the distinct rows from the input table.
@@ -270,14 +276,15 @@ cpdef Column distinct_indices(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_stream_compaction.distinct_indices(
-            input.view(), keep, nulls_equal, nans_equal, stream.view(), mr.get_mr()
+            input.view(), keep, nulls_equal, nans_equal, _cs, mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table stable_distinct(
@@ -286,7 +293,7 @@ cpdef Table stable_distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Get the distinct rows from the input table, preserving input order.
@@ -315,22 +322,23 @@ cpdef Table stable_distinct(
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_stream_compaction.stable_distinct(
-            input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view(),
+            input.view(), c_keys, keep, nulls_equal, nans_equal, _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table filter(
     Table predicate_table,
     Expression predicate_expr,
     Table filter_table,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Filters a table using a predicate expression.
@@ -353,7 +361,8 @@ cpdef Table filter(
     """
     cdef unique_ptr[table] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -361,9 +370,9 @@ cpdef Table filter(
             predicate_table.view(),
             dereference(predicate_expr.c_obj.get()),
             filter_table.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 DuplicateKeepOption.__str__ = DuplicateKeepOption.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/strings/attributes.pxd
index 68b1ce9b5a0..64533b1ce3d 100644
--- a/python/pylibcudf/pylibcudf/strings/attributes.pxd
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pxd
@@ -1,19 +1,18 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column count_characters(
-    Column source_strings, Stream stream=*, DeviceMemoryResource mr=*
+    Column source_strings, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column count_bytes(
-    Column source_strings, Stream stream=*, DeviceMemoryResource mr=*
+    Column source_strings, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column code_points(
-    Column source_strings, Stream stream=*, DeviceMemoryResource mr=*
+    Column source_strings, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyi b/python/pylibcudf/pylibcudf/strings/attributes.pyi
index 06b76e669d3..2e28fb9f186 100644
--- a/python/pylibcudf/pylibcudf/strings/attributes.pyi
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pyi
@@ -1,23 +1,23 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def count_characters(
     source_strings: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def count_bytes(
     source_strings: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def code_points(
     source_strings: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyx b/python/pylibcudf/pylibcudf/strings/attributes.pyx
index 2449d51122f..334270ea834 100644
--- a/python/pylibcudf/pylibcudf/strings/attributes.pyx
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -9,11 +9,12 @@ from pylibcudf.libcudf.strings cimport attributes as cpp_attributes
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["code_points", "count_bytes", "count_characters"]
 
 cpdef Column count_characters(
-    Column source_strings, Stream stream=None, DeviceMemoryResource mr=None
+    Column source_strings, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a column containing character lengths of each string
@@ -32,19 +33,20 @@ cpdef Column count_characters(
         New column with lengths for each string
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_attributes.count_characters(
-            source_strings.view(), stream.view(), mr.get_mr()
+            source_strings.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column count_bytes(
-    Column source_strings, Stream stream=None, DeviceMemoryResource mr=None
+    Column source_strings, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a column containing byte lengths of each string
@@ -63,19 +65,20 @@ cpdef Column count_bytes(
         New column with the number of bytes for each string
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_attributes.count_bytes(
-            source_strings.view(), stream.view(), mr.get_mr()
+            source_strings.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column code_points(
-    Column source_strings, Stream stream=None, DeviceMemoryResource mr=None
+    Column source_strings, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Creates a numeric column with code point values (integers)
@@ -94,12 +97,13 @@ cpdef Column code_points(
         New column with code point integer values for each character
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_attributes.code_points(
-            source_strings.view(), stream.view(), mr.get_mr()
+            source_strings.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/strings/capitalize.pxd
index ccbe15b3794..1a68c29e05c 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pxd
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pxd
@@ -1,20 +1,19 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.libcudf.strings.char_types cimport string_character_types
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column capitalize(
-    Column input, Scalar delimiters=*, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Scalar delimiters=*, object stream = *, DeviceMemoryResource mr=*
 )
 cpdef Column title(
     Column input,
     string_character_types sequence_type=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
-cpdef Column is_title(Column input, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef Column is_title(Column input, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
index 35554e6fff3..031d244bf25 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pyi
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
@@ -1,27 +1,27 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.char_types import StringCharacterTypes
+from pylibcudf.utils import CudaStreamLike
 
 def capitalize(
     input: Column,
     delimiters: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def title(
     input: Column,
     sequence_type: StringCharacterTypes = StringCharacterTypes.ALPHA,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_title(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
index 11291bd1243..be8c52a59b5 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pyx
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -17,13 +17,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["capitalize", "is_title", "title"]
 
 cpdef Column capitalize(
     Column input,
     Scalar delimiters=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
     # TODO: default scalar values
     # https://github.com/rapidsai/cudf/issues/15505
@@ -45,12 +46,13 @@ cpdef Column capitalize(
         Column of strings capitalized from the input column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if delimiters is None:
         delimiters = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     cdef const string_scalar* cpp_delimiters = <const string_scalar*>(
@@ -61,17 +63,17 @@ cpdef Column capitalize(
         c_result = cpp_capitalize.capitalize(
             input.view(),
             dereference(cpp_delimiters),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column title(
     Column input,
     string_character_types sequence_type=string_character_types.ALPHA,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Modifies first character of each word to upper-case and lower-cases
@@ -92,17 +94,18 @@ cpdef Column title(
         Column of titled strings
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
         c_result = cpp_capitalize.title(
-            input.view(), sequence_type, stream.view(), mr.get_mr()
+            input.view(), sequence_type, _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
-cpdef Column is_title(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_title(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Checks if the strings in the input column are title formatted.
 
     For details, see :cpp:func:`is_title`.
@@ -118,9 +121,10 @@ cpdef Column is_title(Column input, Stream stream=None, DeviceMemoryResource mr=
         Column of type BOOL8
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = cpp_capitalize.is_title(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_capitalize.is_title(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/strings/case.pxd
index 8a959fb61d5..fea9f68e95e 100644
--- a/python/pylibcudf/pylibcudf/strings/case.pxd
+++ b/python/pylibcudf/pylibcudf/strings/case.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column to_lower(Column input, Stream stream=*, DeviceMemoryResource mr=*)
-cpdef Column to_upper(Column input, Stream stream=*, DeviceMemoryResource mr=*)
-cpdef Column swapcase(Column input, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef Column to_lower(Column input, object stream = *, DeviceMemoryResource mr=*)
+cpdef Column to_upper(Column input, object stream = *, DeviceMemoryResource mr=*)
+cpdef Column swapcase(Column input, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/strings/case.pyi b/python/pylibcudf/pylibcudf/strings/case.pyi
index ecdb614fcd7..1337e7df5a9 100644
--- a/python/pylibcudf/pylibcudf/strings/case.pyi
+++ b/python/pylibcudf/pylibcudf/strings/case.pyi
@@ -1,23 +1,23 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def to_lower(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def to_upper(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def swapcase(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/case.pyx b/python/pylibcudf/pylibcudf/strings/case.pyx
index 5e7d20f01f8..ec6539f42e1 100644
--- a/python/pylibcudf/pylibcudf/strings/case.pyx
+++ b/python/pylibcudf/pylibcudf/strings/case.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -9,10 +9,11 @@ from pylibcudf.libcudf.strings cimport case as cpp_case
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["swapcase", "to_lower", "to_upper"]
 
-cpdef Column to_lower(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column to_lower(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Returns a column of lowercased strings.
 
     For details, see :cpp:func:`to_lower`.
@@ -32,14 +33,15 @@ cpdef Column to_lower(Column input, Stream stream=None, DeviceMemoryResource mr=
         Column of strings lowercased from the input column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = cpp_case.to_lower(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_case.to_lower(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
-cpdef Column to_upper(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column to_upper(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Returns a column of uppercased strings.
 
     For details, see :cpp:func:`to_upper`.
@@ -59,14 +61,15 @@ cpdef Column to_upper(Column input, Stream stream=None, DeviceMemoryResource mr=
         Column of strings uppercased from the input column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = cpp_case.to_upper(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_case.to_upper(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
-cpdef Column swapcase(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column swapcase(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Returns a column of strings where the lowercase characters
     are converted to uppercase and the uppercase characters
     are converted to lowercase.
@@ -88,9 +91,10 @@ cpdef Column swapcase(Column input, Stream stream=None, DeviceMemoryResource mr=
         Column of strings
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = cpp_case.swapcase(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_case.swapcase(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/strings/char_types.pxd
index 009886f3e9f..59c045dba15 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pxd
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pxd
@@ -1,18 +1,17 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.strings.char_types cimport string_character_types
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column all_characters_of_type(
     Column source_strings,
     string_character_types types,
     string_character_types verify_types,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
@@ -21,6 +20,6 @@ cpdef Column filter_characters_of_type(
     string_character_types types_to_remove,
     Scalar replacement,
     string_character_types types_to_keep,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi
index 12749d79f6d..1740a67eb00 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pyi
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 class StringCharacterTypes(IntEnum):
     DECIMAL = ...
@@ -25,7 +25,7 @@ def all_characters_of_type(
     source_strings: Column,
     types: StringCharacterTypes,
     verify_types: StringCharacterTypes,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def filter_characters_of_type(
@@ -33,6 +33,6 @@ def filter_characters_of_type(
     types_to_remove: StringCharacterTypes,
     replacement: Scalar,
     types_to_keep: StringCharacterTypes,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx
index 5cb5025798e..2567ab8ee4b 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pyx
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
+from cuda.bindings.cyruntime cimport cudaStream_t
 from pylibcudf.libcudf.strings.char_types import \
     string_character_types as StringCharacterTypes  # no-cython-lint
 
@@ -27,7 +28,7 @@ cpdef Column all_characters_of_type(
     Column source_strings,
     string_character_types types,
     string_character_types verify_types,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -50,7 +51,8 @@ cpdef Column all_characters_of_type(
         New column of boolean results for each string
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -58,18 +60,18 @@ cpdef Column all_characters_of_type(
             source_strings.view(),
             types,
             verify_types,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column filter_characters_of_type(
     Column source_strings,
     string_character_types types_to_remove,
     Scalar replacement,
     string_character_types types_to_keep,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -99,7 +101,8 @@ cpdef Column filter_characters_of_type(
         replacement.c_obj.get()
     )
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -108,10 +111,10 @@ cpdef Column filter_characters_of_type(
             types_to_remove,
             dereference(c_replacement),
             types_to_keep,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 StringCharacterTypes.__str__ = StringCharacterTypes.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/strings/combine.pxd
index b889169c7c7..32a58abdc23 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pxd
+++ b/python/pylibcudf/pylibcudf/strings/combine.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
@@ -9,7 +9,6 @@ from pylibcudf.libcudf.strings.combine cimport (
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.table cimport Table
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -21,7 +20,7 @@ cpdef Column concatenate(
     Scalar narep=*,
     Scalar col_narep=*,
     separator_on_nulls separate_nulls=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -29,7 +28,7 @@ cpdef Column join_strings(
     Column input,
     Scalar separator,
     Scalar narep,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -40,6 +39,6 @@ cpdef Column join_list_elements(
     Scalar string_narep,
     separator_on_nulls separate_nulls,
     output_if_empty_list empty_list_policy,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi
index fa568046fa8..3186709996f 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyi
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 class SeparatorOnNulls(IntEnum):
     YES = ...
@@ -24,14 +24,14 @@ def concatenate(
     narep: Scalar | None = None,
     col_narep: Scalar | None = None,
     separate_nulls: SeparatorOnNulls = SeparatorOnNulls.YES,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def join_strings(
     input: Column,
     separator: Scalar,
     narep: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def join_list_elements(
@@ -41,6 +41,6 @@ def join_list_elements(
     string_narep: Scalar,
     separate_nulls: SeparatorOnNulls,
     empty_list_policy: OutputIfEmptyList,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx
index e570a18c585..82903002907 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyx
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -16,6 +16,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
+from cuda.bindings.cyruntime cimport cudaStream_t
 from pylibcudf.libcudf.strings.combine import \
     output_if_empty_list as OutputIfEmptyList  # no-cython-lint
 from pylibcudf.libcudf.strings.combine import \
@@ -35,7 +36,7 @@ cpdef Column concatenate(
     Scalar narep=None,
     Scalar col_narep=None,
     separator_on_nulls separate_nulls=separator_on_nulls.YES,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -68,12 +69,13 @@ cpdef Column concatenate(
     cdef unique_ptr[column] c_result
     cdef const string_scalar* c_col_narep
     cdef const string_scalar* c_separator
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if narep is None:
         narep = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
     cdef const string_scalar* c_narep = <const string_scalar*>(
         narep.c_obj.get()
@@ -82,7 +84,7 @@ cpdef Column concatenate(
     if ColumnOrScalar is Column:
         if col_narep is None:
             col_narep = Scalar.from_libcudf(
-                cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+                cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
             )
         c_col_narep = <const string_scalar*>(
             col_narep.c_obj.get()
@@ -95,7 +97,7 @@ cpdef Column concatenate(
                     dereference(c_narep),
                     dereference(c_col_narep),
                     separate_nulls,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
@@ -112,20 +114,20 @@ cpdef Column concatenate(
                     dereference(c_separator),
                     dereference(c_narep),
                     separate_nulls,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
     else:
         raise ValueError("separator must be a Column or a Scalar")
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column join_strings(
     Column input,
     Scalar separator,
     Scalar narep,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -149,7 +151,8 @@ cpdef Column join_strings(
         New column containing one string
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     cdef const string_scalar* c_separator = <const string_scalar*>(
         separator.c_obj.get()
@@ -163,12 +166,12 @@ cpdef Column join_strings(
                 input.view(),
                 dereference(c_separator),
                 dereference(c_narep),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column join_list_elements(
@@ -178,7 +181,7 @@ cpdef Column join_list_elements(
     Scalar string_narep,
     separator_on_nulls separate_nulls,
     output_if_empty_list empty_list_policy,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -217,7 +220,8 @@ cpdef Column join_list_elements(
         New strings column with concatenated results
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     cdef const string_scalar* c_separator_narep = <const string_scalar*>(
         separator_narep.c_obj.get()
@@ -237,7 +241,7 @@ cpdef Column join_list_elements(
                     dereference(c_string_narep),
                     separate_nulls,
                     empty_list_policy,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
@@ -251,13 +255,13 @@ cpdef Column join_list_elements(
                     dereference(c_separator_narep),
                     separate_nulls,
                     empty_list_policy,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
     else:
         raise ValueError("separator must be a Column or a Scalar")
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 OutputIfEmptyList.__str__ = OutputIfEmptyList.__repr__
 SeparatorOnNulls.__str__ = SeparatorOnNulls.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/strings/contains.pxd
index b3b0f06efb5..585f2fac1ff 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pxd
+++ b/python/pylibcudf/pylibcudf/strings/contains.pxd
@@ -1,28 +1,27 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.strings.regex_program cimport RegexProgram
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column contains_re(
-    Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column count_re(
-    Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column matches_re(
-    Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column like(
     Column input,
     str pattern,
     str escape_character=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyi b/python/pylibcudf/pylibcudf/strings/contains.pyi
index 3685cf5345a..b751ef0b24c 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pyi
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyi
@@ -1,34 +1,34 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.strings.regex_program import RegexProgram
+from pylibcudf.utils import CudaStreamLike
 
 def contains_re(
     input: Column,
     prog: RegexProgram,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def count_re(
     input: Column,
     prog: RegexProgram,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def matches_re(
     input: Column,
     prog: RegexProgram,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def like(
     input: Column,
     pattern: str,
     escape_character: str | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx
index 8fe74228854..495d1637d8a 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pyx
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -11,13 +11,14 @@ from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["contains_re", "count_re", "like", "matches_re"]
 
 cpdef Column contains_re(
     Column input,
     RegexProgram prog,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Returns a boolean column identifying rows which match the given
@@ -39,24 +40,27 @@ cpdef Column contains_re(
     """
 
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
+    if _stream is None:
+        _stream = _get_stream(None)
     mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_contains.contains_re(
             input.view(),
             prog.c_obj.get()[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column count_re(
     Column input,
     RegexProgram prog,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Returns the number of times the given regex_program's pattern
@@ -78,24 +82,25 @@ cpdef Column count_re(
     """
 
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_contains.count_re(
             input.view(),
             prog.c_obj.get()[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column matches_re(
     Column input,
     RegexProgram prog,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Returns a boolean column identifying rows which
@@ -118,25 +123,26 @@ cpdef Column matches_re(
     """
 
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_contains.matches_re(
             input.view(),
             prog.c_obj.get()[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column like(
     Column input,
     str pattern,
     str escape_character=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -161,7 +167,8 @@ cpdef Column like(
         New column of boolean results for each string
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if escape_character is None:
@@ -175,9 +182,9 @@ cpdef Column like(
             input.view(),
             c_pattern,
             c_escape_character,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    stream.synchronize()
+    _stream.synchronize()
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
index cc1206cf29b..0929544287f 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
@@ -1,20 +1,19 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column to_booleans(
-    Column input, Scalar true_string, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Scalar true_string, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column from_booleans(
     Column booleans,
     Scalar true_string,
     Scalar false_string,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
index 608b47bad8c..10c7b96bfc0 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
@@ -1,22 +1,22 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def to_booleans(
     input: Column,
     true_string: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def from_booleans(
     booleans: Column,
     true_string: Scalar,
     false_string: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
index 6f7965f8a3b..e8f963cf0f3 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -15,11 +15,12 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from cython.operator import dereference
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["from_booleans", "to_booleans"]
 
 cpdef Column to_booleans(
-    Column input, Scalar true_string, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, Scalar true_string, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new bool column by parsing boolean values from the strings
@@ -47,24 +48,25 @@ cpdef Column to_booleans(
     cdef const string_scalar* c_true_string = <const string_scalar*>(
         true_string.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_convert_booleans.to_booleans(
             input.view(),
             dereference(c_true_string),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column from_booleans(
     Column booleans,
     Scalar true_string,
     Scalar false_string,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -99,7 +101,8 @@ cpdef Column from_booleans(
     cdef const string_scalar* c_false_string = <const string_scalar*>(
         false_string.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -107,8 +110,8 @@ cpdef Column from_booleans(
             booleans.view(),
             dereference(c_true_string),
             dereference(c_false_string),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
index 407eb06ce6a..d0a5d2fc829 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
@@ -1,18 +1,17 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.string cimport string
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column to_timestamps(
     Column input,
     DataType timestamp_type,
     str format,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
@@ -20,13 +19,13 @@ cpdef Column from_timestamps(
     Column timestamps,
     str format,
     Column input_strings_names,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
 cpdef Column is_timestamp(
     Column input,
     str format,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
index 5fdc863705d..99f067ecb04 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
@@ -1,29 +1,29 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 def to_timestamps(
     input: Column,
     timestamp_type: DataType,
     format: str,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def from_timestamps(
     timestamps: Column,
     format: str,
     input_strings_names: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_timestamp(
     input: Column,
     format: str,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
index 07b35de7c54..633445a7383 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from pylibcudf.types import DataType
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["from_timestamps", "is_timestamp", "to_timestamps"]
 
@@ -21,7 +22,7 @@ cpdef Column to_timestamps(
     Column input,
     DataType timestamp_type,
     str format,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -51,24 +52,25 @@ cpdef Column to_timestamps(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
         c_result = cpp_convert_datetime.to_timestamps(
             input.view(),
             timestamp_type.c_obj,
             c_format,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column from_timestamps(
     Column timestamps,
     str format,
     Column input_strings_names,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -98,23 +100,24 @@ cpdef Column from_timestamps(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
         c_result = cpp_convert_datetime.from_timestamps(
             timestamps.view(),
             c_format,
             input_strings_names.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column is_timestamp(
     Column input,
     str format,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -141,14 +144,15 @@ cpdef Column is_timestamp(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
         c_result = cpp_convert_datetime.is_timestamp(
             input.view(),
             c_format,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
index 62b372d0af4..a912d939a83 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
@@ -1,24 +1,23 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.string cimport string
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column to_durations(
     Column input,
     DataType duration_type,
     str format,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
 cpdef Column from_durations(
     Column durations,
     str format=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
index 95ba392ec94..ac9fd9825dc 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
@@ -1,22 +1,22 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 def to_durations(
     input: Column,
     duration_type: DataType,
     format: str,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def from_durations(
     durations: Column,
     format: str | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
index 9bf8eb96009..548df7398b4 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from pylibcudf.types import DataType
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["from_durations", "to_durations"]
 
@@ -21,7 +22,7 @@ cpdef Column to_durations(
     Column input,
     DataType duration_type,
     str format,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """
@@ -51,7 +52,8 @@ cpdef Column to_durations(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -59,16 +61,16 @@ cpdef Column to_durations(
             input.view(),
             duration_type.c_obj,
             c_format,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column from_durations(
     Column durations,
     str format=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None
 ):
     """
@@ -95,7 +97,8 @@ cpdef Column from_durations(
         New strings column with formatted durations.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if format is None:
@@ -106,8 +109,8 @@ cpdef Column from_durations(
         c_result = cpp_convert_durations.from_durations(
             durations.view(),
             c_format,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
index 046556db181..439f8884008 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
@@ -1,26 +1,25 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column to_fixed_point(
     Column input,
     DataType output_type,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
 cpdef Column from_fixed_point(
-    Column input, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column is_fixed_point(
     Column input,
     DataType decimal_type=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
index 7269f970069..a9d4a0eac98 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
@@ -1,26 +1,26 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 def to_fixed_point(
     input: Column,
     output_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def from_fixed_point(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_fixed_point(
     input: Column,
     decimal_type: DataType | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
index 13020a5ee73..059373790c5 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -12,12 +12,13 @@ from pylibcudf.types cimport DataType, type_id
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["from_fixed_point", "is_fixed_point", "to_fixed_point"]
 
 
 cpdef Column to_fixed_point(
-    Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new fixed-point column parsing decimal values from the
@@ -42,21 +43,22 @@ cpdef Column to_fixed_point(
         New column of output_type.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_fixed_point.to_fixed_point(
             input.view(),
             output_type.c_obj,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column from_fixed_point(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new strings column converting the fixed-point values
@@ -78,20 +80,21 @@ cpdef Column from_fixed_point(
         New strings column.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_fixed_point.from_fixed_point(
-            input.view(), stream.view(), mr.get_mr()
+            input.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column is_fixed_point(
     Column input,
     DataType decimal_type=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -118,7 +121,8 @@ cpdef Column is_fixed_point(
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if decimal_type is None:
@@ -128,8 +132,8 @@ cpdef Column is_fixed_point(
         c_result = cpp_fixed_point.is_fixed_point(
             input.view(),
             decimal_type.c_obj,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd
index a2b98fa0b74..0d394fa1fe7 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd
@@ -1,16 +1,15 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column to_floats(
-    Column strings, DataType output_type, Stream stream=*, DeviceMemoryResource mr=*
+    Column strings, DataType output_type, object stream = *, DeviceMemoryResource mr=*
 )
 
-cpdef Column from_floats(Column floats, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef Column from_floats(Column floats, object stream = *, DeviceMemoryResource mr=*)
 
-cpdef Column is_float(Column input, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef Column is_float(Column input, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
index b5c8d7e7497..b334dfef9c7 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
@@ -1,25 +1,25 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 def to_floats(
     strings: Column,
     output_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def from_floats(
     floats: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_float(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
index 59ac17a3e1c..d4901ce7be6 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -12,13 +12,14 @@ from pylibcudf.types cimport DataType
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["from_floats", "is_float", "to_floats"]
 
 cpdef Column to_floats(
     Column strings,
     DataType output_type,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -44,22 +45,23 @@ cpdef Column to_floats(
         New column with floats converted from strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_convert_floats.to_floats(
             strings.view(),
             output_type.c_obj,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column from_floats(
-    Column floats, Stream stream=None, DeviceMemoryResource mr=None
+    Column floats, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new strings column converting the float values from the
@@ -81,18 +83,19 @@ cpdef Column from_floats(
         New strings column with floats as strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_convert_floats.from_floats(
-            floats.view(), stream.view(), mr.get_mr()
+            floats.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
-cpdef Column is_float(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_float(Column input, object stream=None, DeviceMemoryResource mr=None):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to floats.
@@ -113,10 +116,13 @@ cpdef Column is_float(Column input, Stream stream=None, DeviceMemoryResource mr=
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_convert_floats.is_float(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_convert_floats.is_float(
+            input.view(), _cs, mr.get_mr()
+        )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd
index 376081e9b20..059e8c31f19 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd
@@ -1,32 +1,31 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column to_integers(
-    Column input, DataType output_type, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, DataType output_type, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column from_integers(
-    Column integers, Stream stream=*, DeviceMemoryResource mr=*
+    Column integers, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column is_integer(
-    Column input, DataType int_type=*, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, DataType int_type=*, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column hex_to_integers(
-    Column input, DataType output_type, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, DataType output_type, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column is_hex(
-    Column input, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column integers_to_hex(
-    Column input, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
index 4625ee5e883..88a66350466 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
@@ -1,42 +1,42 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 def to_integers(
     input: Column,
     output_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def from_integers(
     integers: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_integer(
     input: Column,
     int_type: DataType | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def hex_to_integers(
     input: Column,
     output_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_hex(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def integers_to_hex(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
index c5945e5e1e5..b717ddbbcda 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -12,6 +12,7 @@ from pylibcudf.types cimport DataType
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "from_integers",
@@ -23,7 +24,7 @@ __all__ = [
 ]
 
 cpdef Column to_integers(
-    Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new integer numeric column parsing integer values from the
@@ -48,7 +49,8 @@ cpdef Column to_integers(
         New column with integers converted from strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -56,16 +58,16 @@ cpdef Column to_integers(
             cpp_convert_integers.to_integers(
                 input.view(),
                 output_type.c_obj,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column from_integers(
-    Column integers, Stream stream=None, DeviceMemoryResource mr=None
+    Column integers, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new strings column converting the integer values from the
@@ -87,25 +89,26 @@ cpdef Column from_integers(
         New strings column with integers as strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.from_integers(
                 integers.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column is_integer(
     Column input,
     DataType int_type=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -133,7 +136,8 @@ cpdef Column is_integer(
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if int_type is None:
@@ -141,7 +145,7 @@ cpdef Column is_integer(
             c_result = move(
                 cpp_convert_integers.is_integer(
                     input.view(),
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
@@ -151,16 +155,16 @@ cpdef Column is_integer(
                 cpp_convert_integers.is_integer(
                     input.view(),
                     int_type.c_obj,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column hex_to_integers(
-    Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new integer numeric column parsing hexadecimal values
@@ -185,7 +189,8 @@ cpdef Column hex_to_integers(
         New column with integers converted from strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -193,15 +198,15 @@ cpdef Column hex_to_integers(
             cpp_convert_integers.hex_to_integers(
                 input.view(),
                 output_type.c_obj,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
-cpdef Column is_hex(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_hex(Column input, object stream=None, DeviceMemoryResource mr=None):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to integers from hex.
@@ -222,23 +227,24 @@ cpdef Column is_hex(Column input, Stream stream=None, DeviceMemoryResource mr=No
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.is_hex(
                 input.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column integers_to_hex(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a new strings column converting integer columns to hexadecimal
@@ -260,16 +266,17 @@ cpdef Column integers_to_hex(
         New strings column with hexadecimal characters.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.integers_to_hex(
                 input.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
index 53a3927af41..04df2862c31 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
@@ -1,19 +1,18 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column ipv4_to_integers(
-    Column input, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column integers_to_ipv4(
-    Column integers, Stream stream=*, DeviceMemoryResource mr=*
+    Column integers, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column is_ipv4(
-    Column input, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
index 86a969a4021..16e4d8d990a 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
@@ -1,23 +1,23 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def ipv4_to_integers(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def integers_to_ipv4(
     integers: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_ipv4(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
index 72021e85a9d..45b98190aa7 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -9,11 +9,12 @@ from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["integers_to_ipv4", "ipv4_to_integers", "is_ipv4"]
 
 cpdef Column ipv4_to_integers(
-    Column input, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Converts IPv4 addresses into integers.
@@ -34,19 +35,20 @@ cpdef Column ipv4_to_integers(
         New uint32 column converted from strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_convert_ipv4.ipv4_to_integers(
-            input.view(), stream.view(), mr.get_mr()
+            input.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column integers_to_ipv4(
-    Column integers, Stream stream=None, DeviceMemoryResource mr=None
+    Column integers, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Converts integers into IPv4 addresses as strings.
@@ -67,18 +69,19 @@ cpdef Column integers_to_ipv4(
         New strings column.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_convert_ipv4.integers_to_ipv4(
-            integers.view(), stream.view(), mr.get_mr()
+            integers.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
-cpdef Column is_ipv4(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_ipv4(Column input, object stream=None, DeviceMemoryResource mr=None):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to integers from IPv4 format.
@@ -99,10 +102,11 @@ cpdef Column is_ipv4(Column input, Stream stream=None, DeviceMemoryResource mr=N
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_convert_ipv4.is_ipv4(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_convert_ipv4.is_ipv4(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd
index a2dcc15dacd..c25cf9d7146 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd
@@ -1,16 +1,15 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column format_list_column(
     Column input,
     Scalar na_rep=*,
     Column separators=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
index cf301dd9a1b..29f94a30123 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
@@ -1,16 +1,16 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def format_list_column(
     input: Column,
     na_rep: Scalar | None = None,
     separators: Column | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
index 79648efcc3f..9c8f9d7b02e 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -20,6 +20,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["format_list_column"]
 
@@ -27,7 +28,7 @@ cpdef Column format_list_column(
     Column input,
     Scalar na_rep=None,
     Column separators=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -58,12 +59,13 @@ cpdef Column format_list_column(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if na_rep is None:
         na_rep = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     cdef const string_scalar* c_na_rep = <const string_scalar*>(
@@ -78,8 +80,8 @@ cpdef Column format_list_column(
             input.view(),
             dereference(c_na_rep),
             separators.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd
index dce44f5e547..56b1f803d38 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd
@@ -1,15 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column url_encode(
-    Column Input, Stream stream=*, DeviceMemoryResource mr=*
+    Column Input, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column url_decode(
-    Column Input, Stream stream=*, DeviceMemoryResource mr=*
+    Column Input, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
index 6a248cdc974..8707da953b5 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
@@ -1,18 +1,18 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def url_encode(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def url_decode(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
index 30ca51f27f7..efe009e6c02 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -10,10 +10,11 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource
 
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["url_decode", "url_encode"]
 
-cpdef Column url_encode(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column url_encode(Column input, object stream=None, DeviceMemoryResource mr=None):
     """
     Encodes each string using URL encoding.
 
@@ -33,16 +34,19 @@ cpdef Column url_encode(Column input, Stream stream=None, DeviceMemoryResource m
         New strings column.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_convert_urls.url_encode(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_convert_urls.url_encode(
+            input.view(), _cs, mr.get_mr()
+        )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
-cpdef Column url_decode(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column url_decode(Column input, object stream=None, DeviceMemoryResource mr=None):
     """
     Decodes each string using URL encoding.
 
@@ -62,10 +66,13 @@ cpdef Column url_decode(Column input, Stream stream=None, DeviceMemoryResource m
         New strings column.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_convert_urls.url_decode(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_convert_urls.url_decode(
+            input.view(), _cs, mr.get_mr()
+        )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/strings/extract.pxd
index c8fcb900d2b..85f722970c8 100644
--- a/python/pylibcudf/pylibcudf/strings/extract.pxd
+++ b/python/pylibcudf/pylibcudf/strings/extract.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
@@ -6,21 +6,20 @@ from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.table cimport Table
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Table extract(
-    Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column extract_all_record(
-    Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column extract_single(
     Column input,
     RegexProgram prog,
     size_type group,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyi b/python/pylibcudf/pylibcudf/strings/extract.pyi
index 853420a8091..a9607266bbc 100644
--- a/python/pylibcudf/pylibcudf/strings/extract.pyi
+++ b/python/pylibcudf/pylibcudf/strings/extract.pyi
@@ -1,29 +1,29 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.strings.regex_program import RegexProgram
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def extract(
     input: Column,
     prog: RegexProgram,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def extract_all_record(
     input: Column,
     prog: RegexProgram,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def extract_single(
     input: Column,
     prog: RegexProgram,
     group: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyx b/python/pylibcudf/pylibcudf/strings/extract.pyx
index bac20c2cd15..c670b226e84 100644
--- a/python/pylibcudf/pylibcudf/strings/extract.pyx
+++ b/python/pylibcudf/pylibcudf/strings/extract.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -13,11 +13,12 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["extract", "extract_all_record", "extract_single"]
 
 cpdef Table extract(
-    Column input, RegexProgram prog, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, RegexProgram prog, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a table of strings columns where each column
@@ -41,22 +42,23 @@ cpdef Table extract(
         Columns of strings extracted from the input column.
     """
     cdef unique_ptr[table] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_extract.extract(
             input.view(),
             prog.c_obj.get()[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column extract_all_record(
-    Column input, RegexProgram prog, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, RegexProgram prog, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a lists column of strings where each string column
@@ -80,25 +82,26 @@ cpdef Column extract_all_record(
         Lists column containing strings extracted from the input column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_extract.extract_all_record(
             input.view(),
             prog.c_obj.get()[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column extract_single(
     Column input,
     RegexProgram prog,
     size_type group,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -124,7 +127,8 @@ cpdef Column extract_single(
         Column of strings extracted from the input column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -132,8 +136,8 @@ cpdef Column extract_single(
             input.view(),
             prog.c_obj.get()[0],
             group,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/strings/find.pxd
index 3ec32563c5a..1a04cf4eca2 100644
--- a/python/pylibcudf/pylibcudf/strings/find.pxd
+++ b/python/pylibcudf/pylibcudf/strings/find.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -16,7 +15,7 @@ cpdef Column find(
     ColumnOrScalar target,
     size_type start=*,
     size_type stop=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
@@ -25,27 +24,27 @@ cpdef Column rfind(
     Scalar target,
     size_type start=*,
     size_type stop=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column contains(
     Column input,
     ColumnOrScalar target,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column starts_with(
     Column input,
     ColumnOrScalar target,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column ends_with(
     Column input,
     ColumnOrScalar target,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/find.pyi b/python/pylibcudf/pylibcudf/strings/find.pyi
index a566fbdd72a..a8b3ca1da7c 100644
--- a/python/pylibcudf/pylibcudf/strings/find.pyi
+++ b/python/pylibcudf/pylibcudf/strings/find.pyi
@@ -1,18 +1,18 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def find(
     input: Column,
     target: Column | Scalar,
     start: int = 0,
     stop: int = -1,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def rfind(
@@ -20,24 +20,24 @@ def rfind(
     target: Scalar,
     start: int = 0,
     stop: int = -1,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def contains(
     input: Column,
     target: Column | Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def starts_with(
     input: Column,
     target: Column | Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def ends_with(
     input: Column,
     target: Column | Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/find.pyx b/python/pylibcudf/pylibcudf/strings/find.pyx
index 7323a924342..102a8787651 100644
--- a/python/pylibcudf/pylibcudf/strings/find.pyx
+++ b/python/pylibcudf/pylibcudf/strings/find.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -14,6 +14,7 @@ from rmm.pylibrmm.stream cimport Stream
 from cython.operator import dereference
 
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["contains", "ends_with", "find", "rfind", "starts_with"]
 
@@ -22,7 +23,7 @@ cpdef Column find(
     ColumnOrScalar target,
     size_type start=0,
     size_type stop=-1,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Returns a column of character position values where the target string is
@@ -58,7 +59,8 @@ cpdef Column find(
         New integer column with character position values
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     if ColumnOrScalar is Column:
         with nogil:
@@ -66,7 +68,7 @@ cpdef Column find(
                 input.view(),
                 target.view(),
                 start,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif ColumnOrScalar is Scalar:
@@ -76,13 +78,13 @@ cpdef Column find(
                 dereference(<string_scalar*>(target.c_obj.get())),
                 start,
                 stop,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column rfind(
@@ -90,7 +92,7 @@ cpdef Column rfind(
     Scalar target,
     size_type start=0,
     size_type stop=-1,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -119,7 +121,8 @@ cpdef Column rfind(
         New integer column with character position values
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
         result = cpp_find.rfind(
@@ -127,16 +130,16 @@ cpdef Column rfind(
             dereference(<string_scalar*>(target.c_obj.get())),
             start,
             stop,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column contains(
     Column input,
     ColumnOrScalar target,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -169,14 +172,15 @@ cpdef Column contains(
         New boolean column with True for each string that contains the target
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     if ColumnOrScalar is Column:
         with nogil:
             result = cpp_find.contains(
                 input.view(),
                 target.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif ColumnOrScalar is Scalar:
@@ -184,19 +188,19 @@ cpdef Column contains(
             result = cpp_find.contains(
                 input.view(),
                 dereference(<string_scalar*>(target.c_obj.get())),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column starts_with(
     Column input,
     ColumnOrScalar target,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -229,7 +233,8 @@ cpdef Column starts_with(
         New boolean column with True for each string that starts with the target
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if ColumnOrScalar is Column:
@@ -237,7 +242,7 @@ cpdef Column starts_with(
             result = cpp_find.starts_with(
                 input.view(),
                 target.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif ColumnOrScalar is Scalar:
@@ -245,18 +250,18 @@ cpdef Column starts_with(
             result = cpp_find.starts_with(
                 input.view(),
                 dereference(<string_scalar*>(target.c_obj.get())),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef Column ends_with(
     Column input,
     ColumnOrScalar target,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -288,14 +293,15 @@ cpdef Column ends_with(
         New boolean column with True for each string that ends with the target
     """
     cdef unique_ptr[column] result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     if ColumnOrScalar is Column:
         with nogil:
             result = cpp_find.ends_with(
                 input.view(),
                 target.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif ColumnOrScalar is Scalar:
@@ -303,10 +309,10 @@ cpdef Column ends_with(
             result = cpp_find.ends_with(
                 input.view(),
                 dereference(<string_scalar*>(target.c_obj.get())),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd
index f6677607c5e..e01cb33fdb8 100644
--- a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd
@@ -1,21 +1,20 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.table cimport Table
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column find_multiple(
     Column input,
     Column targets,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 cpdef Table contains_multiple(
     Column input,
     Column targets,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
index 48de0eac0e1..76115cd7496 100644
--- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
@@ -1,21 +1,21 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def find_multiple(
     input: Column,
     targets: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def contains_multiple(
     input: Column,
     targets: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
index e18b178f803..ed5f0d78506 100644
--- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -11,13 +11,14 @@ from pylibcudf.table cimport Table
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["find_multiple", "contains_multiple"]
 
 cpdef Column find_multiple(
     Column input,
     Column targets,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -41,24 +42,25 @@ cpdef Column find_multiple(
         Lists column with character position values
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_find_multiple.find_multiple(
             input.view(),
             targets.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table contains_multiple(
     Column input,
     Column targets,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -82,15 +84,16 @@ cpdef Table contains_multiple(
         Columns of booleans
     """
     cdef unique_ptr[table] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_find_multiple.contains_multiple(
             input.view(),
             targets.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/strings/findall.pxd
index 2dc75fa6d34..ec7e01f7539 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pxd
+++ b/python/pylibcudf/pylibcudf/strings/findall.pxd
@@ -1,15 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.strings.regex_program cimport RegexProgram
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column find_re(
-    Column input, RegexProgram pattern, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, RegexProgram pattern, object stream = *, DeviceMemoryResource mr=*
 )
 cpdef Column findall(
-    Column input, RegexProgram pattern, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, RegexProgram pattern, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyi b/python/pylibcudf/pylibcudf/strings/findall.pyi
index 5677a99d325..f72e786cf1d 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pyi
+++ b/python/pylibcudf/pylibcudf/strings/findall.pyi
@@ -1,21 +1,21 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.strings.regex_program import RegexProgram
+from pylibcudf.utils import CudaStreamLike
 
 def find_re(
     input: Column,
     pattern: RegexProgram,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def findall(
     input: Column,
     pattern: RegexProgram,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx
index 881664faced..5647a791ef1 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pyx
+++ b/python/pylibcudf/pylibcudf/strings/findall.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -10,11 +10,12 @@ from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["findall", "find_re"]
 
 cpdef Column findall(
-    Column input, RegexProgram pattern, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, RegexProgram pattern, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns a lists column of strings for each matching occurrence using
@@ -37,22 +38,23 @@ cpdef Column findall(
         New lists column of strings
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_findall.findall(
             input.view(),
             pattern.c_obj.get()[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column find_re(
-    Column input, RegexProgram pattern, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, RegexProgram pattern, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Returns character positions where the pattern first matches
@@ -75,15 +77,16 @@ cpdef Column find_re(
         New column of integers
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_findall.find_re(
             input.view(),
             pattern.c_obj.get()[0],
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/strings/padding.pxd
index 1dfbbd9950f..61dcaf7cba9 100644
--- a/python/pylibcudf/pylibcudf/strings/padding.pxd
+++ b/python/pylibcudf/pylibcudf/strings/padding.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.string cimport string
@@ -6,7 +6,6 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.strings.side_type cimport side_type
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column pad(
@@ -14,14 +13,14 @@ cpdef Column pad(
     size_type width,
     side_type side,
     str fill_char,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column zfill(
-    Column input, size_type width, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, size_type width, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column zfill_by_widths(
-    Column input, Column widths, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Column widths, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyi b/python/pylibcudf/pylibcudf/strings/padding.pyi
index 26af5429acb..904b0022317 100644
--- a/python/pylibcudf/pylibcudf/strings/padding.pyi
+++ b/python/pylibcudf/pylibcudf/strings/padding.pyi
@@ -1,29 +1,29 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.strings.side_type import SideType
+from pylibcudf.utils import CudaStreamLike
 
 def pad(
     input: Column,
     width: int,
     side: SideType,
     fill_char: str,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def zfill(
     input: Column,
     width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def zfill_by_widths(
     input: Column,
     widths: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyx b/python/pylibcudf/pylibcudf/strings/padding.pyx
index 9409970b075..d8eb4f1da4a 100644
--- a/python/pylibcudf/pylibcudf/strings/padding.pyx
+++ b/python/pylibcudf/pylibcudf/strings/padding.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["pad", "zfill", "zfill_by_widths"]
 
@@ -18,7 +19,7 @@ cpdef Column pad(
     size_type width,
     side_type side,
     str fill_char,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -46,7 +47,8 @@ cpdef Column pad(
     """
     cdef unique_ptr[column] c_result
     cdef string c_fill_char = fill_char.encode("utf-8")
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -55,14 +57,14 @@ cpdef Column pad(
             width,
             side,
             c_fill_char,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column zfill(
-    Column input, size_type width, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, size_type width, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Add '0' as padding to the left of each string.
@@ -84,21 +86,22 @@ cpdef Column zfill(
         New column of strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_padding.zfill(
             input.view(),
             width,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column zfill_by_widths(
-    Column input, Column widths, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, Column widths, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Add '0' as padding to the left of each string.
@@ -120,15 +123,16 @@ cpdef Column zfill_by_widths(
         New column of strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_padding.zfill_by_widths(
             input.view(),
             widths.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/strings/repeat.pxd
index f1abe23ce59..60725aa688e 100644
--- a/python/pylibcudf/pylibcudf/strings/repeat.pxd
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnorSizeType:
     Column
@@ -13,6 +12,6 @@ ctypedef fused ColumnorSizeType:
 cpdef Column repeat_strings(
     Column input,
     ColumnorSizeType repeat_times,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyi b/python/pylibcudf/pylibcudf/strings/repeat.pyi
index 5b47213e956..fedb7dee76c 100644
--- a/python/pylibcudf/pylibcudf/strings/repeat.pyi
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def repeat_strings(
     input: Column,
     repeat_times: Column | int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyx b/python/pylibcudf/pylibcudf/strings/repeat.pyx
index 84a305bf866..7a9c5285d02 100644
--- a/python/pylibcudf/pylibcudf/strings/repeat.pyx
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -11,13 +11,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from ..utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["repeat_strings"]
 
 cpdef Column repeat_strings(
     Column input,
     ColumnorSizeType repeat_times,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -44,7 +45,8 @@ cpdef Column repeat_strings(
         New column containing the repeated strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if ColumnorSizeType is Column:
@@ -52,7 +54,7 @@ cpdef Column repeat_strings(
             c_result = cpp_repeat.repeat_strings(
                 input.view(),
                 repeat_times.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     elif ColumnorSizeType is size_type:
@@ -60,10 +62,10 @@ cpdef Column repeat_strings(
             c_result = cpp_repeat.repeat_strings(
                 input.view(),
                 repeat_times,
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
         raise ValueError("repeat_times must be size_type or integer")
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/strings/replace.pxd
index a486869aada..aea2296b5f9 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pxd
+++ b/python/pylibcudf/pylibcudf/strings/replace.pxd
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column replace(
@@ -13,7 +12,7 @@ cpdef Column replace(
     Scalar target,
     Scalar repl,
     size_type maxrepl=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 cpdef Column replace_multiple(
@@ -21,7 +20,7 @@ cpdef Column replace_multiple(
     Column target,
     Column repl,
     size_type maxrepl=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
 cpdef Column replace_slice(
@@ -29,6 +28,6 @@ cpdef Column replace_slice(
     Scalar repl=*,
     size_type start=*,
     size_type stop=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyi b/python/pylibcudf/pylibcudf/strings/replace.pyi
index 3e62a76d2bf..0e76eb402f7 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pyi
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyi
@@ -1,18 +1,18 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def replace(
     input: Column,
     target: Scalar,
     repl: Scalar,
     maxrepl: int = -1,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def replace_multiple(
@@ -20,7 +20,7 @@ def replace_multiple(
     target: Column,
     repl: Column,
     maxrepl: int = -1,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def replace_slice(
@@ -28,6 +28,6 @@ def replace_slice(
     repl: Scalar | None = None,
     start: int = 0,
     stop: int = -1,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx
index e1d88fed464..ccd6c924441 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["replace", "replace_multiple", "replace_slice"]
 
@@ -27,7 +28,7 @@ cpdef Column replace(
     Scalar target,
     Scalar repl,
     size_type maxrepl=-1,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Replaces target string within each string with the specified replacement string.
@@ -60,7 +61,8 @@ cpdef Column replace(
 
     target_str = <string_scalar *>(target.c_obj.get())
     repl_str = <string_scalar *>(repl.c_obj.get())
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -69,11 +71,11 @@ cpdef Column replace(
             target_str[0],
             repl_str[0],
             maxrepl,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column replace_multiple(
@@ -81,7 +83,7 @@ cpdef Column replace_multiple(
     Column target,
     Column repl,
     size_type maxrepl=-1,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Replaces target string within each string with the specified replacement string.
@@ -109,7 +111,8 @@ cpdef Column replace_multiple(
         New string column with target replaced.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -117,11 +120,11 @@ cpdef Column replace_multiple(
             input.view(),
             target.view(),
             repl.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column replace_slice(
@@ -131,7 +134,7 @@ cpdef Column replace_slice(
     Scalar repl=None,
     size_type start=0,
     size_type stop=-1,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Replaces each string in the column with the provided repl string
@@ -162,12 +165,13 @@ cpdef Column replace_slice(
         New string column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if repl is None:
         repl = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     cdef const string_scalar* scalar_str = <string_scalar*>(repl.c_obj.get())
@@ -178,8 +182,8 @@ cpdef Column replace_slice(
             scalar_str[0],
             start,
             stop,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/strings/replace_re.pxd
index fc833a61045..0d360f8de6f 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pxd
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
@@ -7,7 +7,6 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_flags cimport regex_flags
 from pylibcudf.strings.regex_program cimport RegexProgram
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused Replacement:
     Column
@@ -24,7 +23,7 @@ cpdef Column replace_re(
     Replacement replacement=*,
     size_type max_replace_count=*,
     regex_flags flags=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
 
@@ -32,6 +31,6 @@ cpdef Column replace_with_backrefs(
     Column input,
     RegexProgram prog,
     str replacement,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyi b/python/pylibcudf/pylibcudf/strings/replace_re.pyi
index 29f8ddfe925..64970928323 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pyi
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyi
@@ -1,15 +1,15 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from typing import overload
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.regex_flags import RegexFlags
 from pylibcudf.strings.regex_program import RegexProgram
+from pylibcudf.utils import CudaStreamLike
 
 @overload
 def replace_re(
@@ -17,7 +17,7 @@ def replace_re(
     pattern: RegexProgram,
     replacement: Scalar,
     max_replace_count: int = -1,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 @overload
@@ -27,13 +27,13 @@ def replace_re(
     replacement: Column,
     max_replace_count: int = -1,
     flags: RegexFlags = RegexFlags.DEFAULT,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def replace_with_backrefs(
     input: Column,
     prog: RegexProgram,
     replacement: str,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
index 1819dd0ba2b..60e9c4c1666 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -19,6 +19,7 @@ from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["replace_re", "replace_with_backrefs"]
 
@@ -28,7 +29,7 @@ cpdef Column replace_re(
     Replacement replacement=None,
     size_type max_replace_count=-1,
     regex_flags flags=regex_flags.DEFAULT,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -64,13 +65,14 @@ cpdef Column replace_re(
     """
     cdef unique_ptr[column] c_result
     cdef vector[string] c_patterns
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if Patterns is RegexProgram and Replacement is Scalar:
         if replacement is None:
             replacement = Scalar.from_libcudf(
-                cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+                cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
             )
         with nogil:
             c_result = move(
@@ -79,12 +81,12 @@ cpdef Column replace_re(
                     patterns.c_obj.get()[0],
                     dereference(<string_scalar*>(replacement.get())),
                     max_replace_count,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
 
-        return Column.from_libcudf(move(c_result), stream, mr)
+        return Column.from_libcudf(move(c_result), _stream, mr)
     elif Patterns is list and Replacement is Column:
         c_patterns.reserve(len(patterns))
         for pattern in patterns:
@@ -97,12 +99,12 @@ cpdef Column replace_re(
                     c_patterns,
                     replacement.view(),
                     flags,
-                    stream.view(),
+                    _cs,
                     mr.get_mr()
                 )
             )
 
-        return Column.from_libcudf(move(c_result), stream, mr)
+        return Column.from_libcudf(move(c_result), _stream, mr)
     else:
         raise TypeError("Must pass either a RegexProgram and a Scalar or a list")
 
@@ -111,7 +113,7 @@ cpdef Column replace_with_backrefs(
     Column input,
     RegexProgram prog,
     str replacement,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -137,7 +139,8 @@ cpdef Column replace_with_backrefs(
         New strings column.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     cdef string c_replacement = replacement.encode()
 
@@ -146,8 +149,8 @@ cpdef Column replace_with_backrefs(
             input.view(),
             prog.c_obj.get()[0],
             c_replacement,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyi b/python/pylibcudf/pylibcudf/strings/reverse.pyi
index 182f4768825..48c602e2d28 100644
--- a/python/pylibcudf/pylibcudf/strings/reverse.pyi
+++ b/python/pylibcudf/pylibcudf/strings/reverse.pyi
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def reverse(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyx b/python/pylibcudf/pylibcudf/strings/reverse.pyx
index 49792b5661b..f1d06248523 100644
--- a/python/pylibcudf/pylibcudf/strings/reverse.pyx
+++ b/python/pylibcudf/pylibcudf/strings/reverse.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -9,10 +9,11 @@ from pylibcudf.libcudf.strings cimport reverse as cpp_reverse
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["reverse"]
 
-cpdef Column reverse(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column reverse(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Reverses the characters within each string.
 
     Any null string entries return corresponding null output column entries.
@@ -32,9 +33,10 @@ cpdef Column reverse(Column input, Stream stream=None, DeviceMemoryResource mr=N
         New strings column
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
     with nogil:
-        c_result = cpp_reverse.reverse(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_reverse.reverse(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pxd b/python/pylibcudf/pylibcudf/strings/slice.pxd
index 6bb5a8d3611..9612ead3108 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pxd
+++ b/python/pylibcudf/pylibcudf/strings/slice.pxd
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -15,6 +14,6 @@ cpdef Column slice_strings(
     ColumnOrScalar start=*,
     ColumnOrScalar stop=*,
     Scalar step=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyi b/python/pylibcudf/pylibcudf/strings/slice.pyi
index 73ee8c31b5b..ac2e4d12f1f 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pyi
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyi
@@ -1,17 +1,17 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 def slice_strings(
     input: Column,
     start: Column | Scalar | None = None,
     stop: Column | Scalar | None = None,
     step: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx
index 2b5bbf2f621..b3ac2cd8bfe 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pyx
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -18,6 +18,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from ..utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["slice_strings"]
 
@@ -26,7 +27,7 @@ cpdef Column slice_strings(
     ColumnOrScalar start=None,
     ColumnOrScalar stop=None,
     Scalar step=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Perform a slice operation on a strings column.
@@ -60,7 +61,8 @@ cpdef Column slice_strings(
     cdef numeric_scalar[size_type]* cpp_start
     cdef numeric_scalar[size_type]* cpp_stop
     cdef numeric_scalar[size_type]* cpp_step
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if input is None:
@@ -80,22 +82,22 @@ cpdef Column slice_strings(
                 input.view(),
                 start.view(),
                 stop.view(),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
 
     elif ColumnOrScalar is Scalar:
         if start is None:
             start = Scalar.from_libcudf(
-                cpp_make_fixed_width_scalar(0, stream.view(), mr.get_mr())
+                cpp_make_fixed_width_scalar(0, _stream.view().value(), mr.get_mr())
             )
         if stop is None:
             stop = Scalar.from_libcudf(
-                cpp_make_fixed_width_scalar(0, stream.view(), mr.get_mr())
+                cpp_make_fixed_width_scalar(0, _stream.view().value(), mr.get_mr())
             )
         if step is None:
             step = Scalar.from_libcudf(
-                cpp_make_fixed_width_scalar(1, stream.view(), mr.get_mr())
+                cpp_make_fixed_width_scalar(1, _stream.view().value(), mr.get_mr())
             )
 
         cpp_start = <numeric_scalar[size_type]*>start.c_obj.get()
@@ -108,10 +110,10 @@ cpdef Column slice_strings(
                 dereference(cpp_start),
                 dereference(cpp_stop),
                 dereference(cpp_step),
-                stream.view(),
+                _cs,
                 mr.get_mr()
             )
     else:
         raise ValueError("start, stop, and step must be either Column or Scalar")
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/strings/split/partition.pxd
index d8001682b32..e3da533c90c 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pxd
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pxd
@@ -1,17 +1,16 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.table cimport Table
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Table partition(
-    Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Table rpartition(
-    Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyi b/python/pylibcudf/pylibcudf/strings/split/partition.pyi
index d919b68153c..cef2d16aea6 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pyi
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyi
@@ -1,22 +1,22 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def partition(
     input: Column,
     delimiter: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def rpartition(
     input: Column,
     delimiter: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
index 728d7b9975d..ce813c10bba 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -16,13 +16,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["partition", "rpartition"]
 
 cpdef Table partition(
     Column input,
     Scalar delimiter=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -46,12 +47,13 @@ cpdef Table partition(
     """
     cdef unique_ptr[table] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
@@ -62,16 +64,16 @@ cpdef Table partition(
         c_result = cpp_partition.partition(
             input.view(),
             dereference(c_delimiter),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Table rpartition(
     Column input,
     Scalar delimiter=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -95,12 +97,13 @@ cpdef Table rpartition(
     """
     cdef unique_ptr[table] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
@@ -111,8 +114,8 @@ cpdef Table rpartition(
         c_result = cpp_partition.rpartition(
             input.view(),
             dereference(c_delimiter),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/strings/split/split.pxd
index 06b77154b18..2372a177944 100644
--- a/python/pylibcudf/pylibcudf/strings/split/split.pxd
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pxd
@@ -7,50 +7,49 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.table cimport Table
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Table split(
-    Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*,
+    Column strings_column, Scalar delimiter, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Table rsplit(
-    Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*,
+    Column strings_column, Scalar delimiter, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column split_record(
-    Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*,
+    Column strings, Scalar delimiter, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column rsplit_record(
-    Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*,
+    Column strings, Scalar delimiter, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Table split_re(
-    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*,
+    Column input, RegexProgram prog, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Table rsplit_re(
-    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*,
+    Column input, RegexProgram prog, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column split_record_re(
-    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*,
+    Column input, RegexProgram prog, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column rsplit_record_re(
-    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*,
+    Column input, RegexProgram prog, size_type maxsplit, object stream = *,
     DeviceMemoryResource mr=*,
 )
 
 cpdef Column split_part(
-    Column input, Scalar delimiter, size_type index, Stream stream=*,
+    Column input, Scalar delimiter, size_type index, object stream = *,
     DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyi b/python/pylibcudf/pylibcudf/strings/split/split.pyi
index ae64e300b63..7a775bd960c 100644
--- a/python/pylibcudf/pylibcudf/strings/split/split.pyi
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pyi
@@ -2,73 +2,73 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.regex_program import RegexProgram
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def split(
     strings_column: Column,
     delimiter: Scalar,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def rsplit(
     strings_column: Column,
     delimiter: Scalar,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def split_record(
     strings: Column,
     delimiter: Scalar,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def rsplit_record(
     strings: Column,
     delimiter: Scalar,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def split_re(
     input: Column,
     prog: RegexProgram,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def rsplit_re(
     input: Column,
     prog: RegexProgram,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def split_record_re(
     input: Column,
     prog: RegexProgram,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def rsplit_record_re(
     input: Column,
     prog: RegexProgram,
     maxsplit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def split_part(
     input: Column,
     delimiter: Scalar,
     index: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyx b/python/pylibcudf/pylibcudf/strings/split/split.pyx
index 0635df87e13..52803b08eb0 100644
--- a/python/pylibcudf/pylibcudf/strings/split/split.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pyx
@@ -16,6 +16,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "rsplit",
@@ -32,7 +33,7 @@ cpdef Table split(
     Column strings_column,
     Scalar delimiter,
     size_type maxsplit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -65,7 +66,8 @@ cpdef Table split(
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -73,18 +75,18 @@ cpdef Table split(
             strings_column.view(),
             dereference(c_delimiter),
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table rsplit(
     Column strings_column,
     Scalar delimiter,
     size_type maxsplit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -117,7 +119,8 @@ cpdef Table rsplit(
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -125,17 +128,17 @@ cpdef Table rsplit(
             strings_column.view(),
             dereference(c_delimiter),
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column split_record(
     Column strings,
     Scalar delimiter,
     size_type maxsplit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -164,7 +167,8 @@ cpdef Column split_record(
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -172,18 +176,18 @@ cpdef Column split_record(
             strings.view(),
             dereference(c_delimiter),
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column rsplit_record(
     Column strings,
     Scalar delimiter,
     size_type maxsplit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -213,7 +217,8 @@ cpdef Column rsplit_record(
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -221,18 +226,18 @@ cpdef Column rsplit_record(
             strings.view(),
             dereference(c_delimiter),
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Table split_re(
     Column input,
     RegexProgram prog,
     size_type maxsplit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -259,7 +264,8 @@ cpdef Table split_re(
         A table of columns of strings.
     """
     cdef unique_ptr[table] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -267,17 +273,17 @@ cpdef Table split_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Table rsplit_re(
     Column input,
     RegexProgram prog,
     size_type maxsplit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -305,7 +311,8 @@ cpdef Table rsplit_re(
         A table of columns of strings.
     """
     cdef unique_ptr[table] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -313,17 +320,17 @@ cpdef Table rsplit_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Table.from_libcudf(move(c_result), stream, mr)
+    return Table.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column split_record_re(
     Column input,
     RegexProgram prog,
     size_type maxsplit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -350,7 +357,8 @@ cpdef Column split_record_re(
         Lists column of strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -358,14 +366,14 @@ cpdef Column split_record_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef Column rsplit_record_re(
-    Column input, RegexProgram prog, size_type maxsplit, Stream stream=None,
+    Column input, RegexProgram prog, size_type maxsplit, object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -392,7 +400,8 @@ cpdef Column rsplit_record_re(
         Lists column of strings.
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -400,22 +409,23 @@ cpdef Column rsplit_record_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column split_part(
-    Column input, Scalar delimiter, size_type index, Stream stream=None,
+    Column input, Scalar delimiter, size_type index, object stream=None,
     DeviceMemoryResource mr=None,
 ):
     cdef unique_ptr[column] c_result
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -423,8 +433,8 @@ cpdef Column split_part(
             input.view(),
             dereference(c_delimiter),
             index,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/strings/strip.pxd
index d3f41ce9a5c..a37ac40c523 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pxd
+++ b/python/pylibcudf/pylibcudf/strings/strip.pxd
@@ -1,17 +1,16 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.side_type cimport side_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column strip(
     Column input,
     side_type side=*,
     Scalar to_strip=*,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyi b/python/pylibcudf/pylibcudf/strings/strip.pyi
index ecb80b632d7..786079769c7 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pyi
+++ b/python/pylibcudf/pylibcudf/strings/strip.pyi
@@ -1,17 +1,17 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.side_type import SideType
+from pylibcudf.utils import CudaStreamLike
 
 def strip(
     input: Column,
     side: SideType = SideType.BOTH,
     to_strip: Scalar | None = None,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyx b/python/pylibcudf/pylibcudf/strings/strip.pyx
index 3b477fa83ad..607428b6f69 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pyx
+++ b/python/pylibcudf/pylibcudf/strings/strip.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from cython.operator cimport dereference
@@ -16,6 +16,7 @@ from pylibcudf.strings.side_type cimport side_type
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["strip"]
 
@@ -23,7 +24,7 @@ cpdef Column strip(
     Column input,
     side_type side=side_type.BOTH,
     Scalar to_strip=None,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Removes the specified characters from the beginning
@@ -47,12 +48,13 @@ cpdef Column strip(
     pylibcudf.Column
         New strings column.
     """
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     if to_strip is None:
         to_strip = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr())
+            cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr())
         )
 
     cdef unique_ptr[column] c_result
@@ -64,8 +66,8 @@ cpdef Column strip(
             input.view(),
             side,
             dereference(cpp_to_strip),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/strings/translate.pxd
index 2d74e2f4a2c..d6a80ddfd43 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pxd
+++ b/python/pylibcudf/pylibcudf/strings/translate.pxd
@@ -1,14 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.strings.translate cimport filter_type
 from pylibcudf.scalar cimport Scalar
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column translate(
-    Column input, dict chars_table, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, dict chars_table, object stream = *, DeviceMemoryResource mr=*
 )
 
 cpdef Column filter_characters(
@@ -16,6 +15,6 @@ cpdef Column filter_characters(
     dict characters_to_filter,
     filter_type keep_characters,
     Scalar replacement,
-    Stream stream=*,
+    object stream = *,
     DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi
index a01b786fd6f..9e7624e0b17 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pyi
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyi
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from collections.abc import Mapping
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.utils import CudaStreamLike
 
 class FilterType(IntEnum):
     KEEP = ...
@@ -16,7 +16,7 @@ class FilterType(IntEnum):
 def translate(
     input: Column,
     chars_table: Mapping[int | str, int | str],
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def filter_characters(
@@ -24,6 +24,6 @@ def filter_characters(
     characters_to_filter: Mapping[int | str, int | str],
     keep_characters: FilterType,
     replacement: Scalar,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyx b/python/pylibcudf/pylibcudf/strings/translate.pyx
index 06c772330df..2a60ff881d4 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pyx
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
@@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
+from cuda.bindings.cyruntime cimport cudaStream_t
 from pylibcudf.libcudf.strings.translate import \
     filter_type as FilterType  # no-cython-lint
 
@@ -43,7 +44,7 @@ cdef vector[pair[char_utf8, char_utf8]] _table_to_c_table(dict table):
 
 
 cpdef Column translate(
-    Column input, dict chars_table, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, dict chars_table, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Translates individual characters within each string.
@@ -69,17 +70,18 @@ cpdef Column translate(
     cdef vector[pair[char_utf8, char_utf8]] c_chars_table = _table_to_c_table(
         chars_table
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_translate.translate(
             input.view(),
             c_chars_table,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column filter_characters(
@@ -87,7 +89,7 @@ cpdef Column filter_characters(
     dict characters_to_filter,
     filter_type keep_characters,
     Scalar replacement,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """
@@ -124,7 +126,8 @@ cpdef Column filter_characters(
     cdef const string_scalar* c_replacement = <const string_scalar*>(
         replacement.c_obj.get()
     )
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -133,9 +136,9 @@ cpdef Column filter_characters(
             c_characters_to_filter,
             keep_characters,
             dereference(c_replacement),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 FilterType.__str__ = FilterType.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/strings/wrap.pxd
index 62faaff36f0..ea74927498d 100644
--- a/python/pylibcudf/pylibcudf/strings/wrap.pxd
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pxd
@@ -1,12 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
-from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column wrap(
-    Column input, size_type width, Stream stream=*, DeviceMemoryResource mr=*
+    Column input, size_type width, object stream = *, DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyi b/python/pylibcudf/pylibcudf/strings/wrap.pyi
index 00c939cc420..aa88b64a391 100644
--- a/python/pylibcudf/pylibcudf/strings/wrap.pyi
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pyi
@@ -1,14 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
+from pylibcudf.utils import CudaStreamLike
 
 def wrap(
     input: Column,
     width: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyx b/python/pylibcudf/pylibcudf/strings/wrap.pyx
index 504c469debc..28bc310b5a4 100644
--- a/python/pylibcudf/pylibcudf/strings/wrap.pyx
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.memory cimport unique_ptr
@@ -10,11 +10,12 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.utils cimport _get_stream, _get_memory_resource
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from rmm.pylibrmm.stream cimport Stream
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["wrap"]
 
 cpdef Column wrap(
-    Column input, size_type width, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, size_type width, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Wraps strings onto multiple lines shorter than `width` by
@@ -41,15 +42,16 @@ cpdef Column wrap(
         Column of wrapped strings
     """
     cdef unique_ptr[column] c_result
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_wrap.wrap(
             input.view(),
             width,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
diff --git a/python/pylibcudf/pylibcudf/table.pxd b/python/pylibcudf/pylibcudf/table.pxd
index 4a4a963e0de..76c38dacf3f 100644
--- a/python/pylibcudf/pylibcudf/table.pxd
+++ b/python/pylibcudf/pylibcudf/table.pxd
@@ -4,7 +4,6 @@
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 cdef class Table:
@@ -20,7 +19,7 @@ cdef class Table:
     @staticmethod
     cdef Table from_libcudf(
         unique_ptr[table] libcudf_tbl,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr
     )
 
@@ -31,8 +30,8 @@ cdef class Table:
     cdef Table from_table_view_of_arbitrary(
         const table_view& tv,
         object owner,
-        Stream stream,
+        object stream,
     )
 
     cpdef list columns(self)
-    cpdef Table copy(self, Stream stream=*, DeviceMemoryResource mr=*)
+    cpdef Table copy(self, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi
index 0f8de52b132..263bf813c75 100644
--- a/python/pylibcudf/pylibcudf/table.pyi
+++ b/python/pylibcudf/pylibcudf/table.pyi
@@ -4,11 +4,11 @@
 from typing import Any
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf._interop_helpers import ArrowLike, ColumnMetadata
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 class Table:
     def __init__(self, column: list[Column]): ...
@@ -18,22 +18,22 @@ class Table:
     def columns(self) -> list[Column]: ...
     def copy(
         self,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Table: ...
     def to_arrow(
         self,
         metadata: list[ColumnMetadata | str] | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
     ) -> ArrowLike: ...
     # Private methods below are included because polars is currently using them,
     # but we want to remove stubs for these private methods eventually
     def _to_schema(self, metadata: Any = None) -> Any: ...
-    def _to_host_array(self, stream: Stream) -> Any: ...
+    def _to_host_array(self, stream: CudaStreamLike) -> Any: ...
     @staticmethod
     def from_arrow(
         arrow_like: ArrowLike,
         dtype: DataType | None = None,
-        stream: Stream | None = None,
+        stream: CudaStreamLike | None = None,
         mr: DeviceMemoryResource | None = None,
     ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
index 654cf9bb60b..6b62a5428f9 100644
--- a/python/pylibcudf/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -39,6 +39,7 @@ from pylibcudf._interop_helpers cimport (
     _metadata_to_libcudf,
 )
 from ._interop_helpers import ArrowLike, ColumnMetadata, _ObjectWithArrowMetadata
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 try:
     import pyarrow as pa
@@ -105,7 +106,7 @@ cdef class Table:
     def from_arrow(
         obj: ArrowLike,
         dtype: DataType | None = None,
-        Stream stream=None,
+        object stream=None,
         DeviceMemoryResource mr=None
     ) -> Table:
         """
@@ -154,7 +155,8 @@ cdef class Table:
         cdef _ArrowTableHolder result
         cdef unique_ptr[arrow_table] c_result
 
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
         mr = _get_memory_resource(mr)
 
         if hasattr(obj, "__arrow_c_device_array__"):
@@ -170,7 +172,7 @@ cdef class Table:
                 c_result = make_unique[arrow_table](
                     move(dereference(c_schema)),
                     move(dereference(c_array)),
-                    stream.view(),
+                    _cs,
                     result.mr.get_mr(),
                 )
             result.tbl.swap(c_result)
@@ -193,7 +195,7 @@ cdef class Table:
             with nogil:
                 c_result = make_unique[arrow_table](
                     move(dereference(c_stream)),
-                    stream.view(),
+                    _cs,
                     result.mr.get_mr(),
                 )
             result.tbl.swap(c_result)
@@ -233,7 +235,7 @@ cdef class Table:
     @staticmethod
     cdef Table from_libcudf(
         unique_ptr[table] libcudf_tbl,
-        Stream stream,
+        object stream,
         DeviceMemoryResource mr
     ):
         """Create a Table from a libcudf table.
@@ -275,7 +277,7 @@ cdef class Table:
     cdef Table from_table_view_of_arbitrary(
         const table_view& tv,
         object owner,
-        Stream stream,
+        object stream,
     ):
         """Create a Table from a libcudf table_view into an arbitrary owner.
 
@@ -292,8 +294,9 @@ cdef class Table:
         # For efficiency, prohibit calling this overload with a Table owner.
         assert not isinstance(owner, Table)
         cdef int i
+        cdef Stream _stream = <Stream>stream
         return Table([
-            Column.from_column_view_of_arbitrary(tv.column(i), owner, stream)
+            Column.from_column_view_of_arbitrary(tv.column(i), owner, _stream)
             for i in range(tv.num_columns())
         ])
 
@@ -315,7 +318,7 @@ cdef class Table:
         """The shape of this table"""
         return (self.num_rows(), self.num_columns())
 
-    cpdef Table copy(self, Stream stream=None, DeviceMemoryResource mr=None):
+    cpdef Table copy(self, object stream=None, DeviceMemoryResource mr=None):
         """Create a deep copy of the table.
 
         Parameters
@@ -330,9 +333,9 @@ cdef class Table:
         Table
             A new Table with deep copies of all columns.
         """
-        stream = _get_stream(stream)
+        cdef Stream _stream = _get_stream(stream)
         mr = _get_memory_resource(mr)
-        return Table([col.copy(stream, mr) for col in self._columns])
+        return Table([col.copy(_stream, mr) for col in self._columns])
 
     def _to_schema(self, metadata=None):
         """Create an Arrow schema from this table."""
@@ -356,11 +359,13 @@ cdef class Table:
 
         return PyCapsule_New(<void*>raw_schema_ptr, "arrow_schema", _release_schema)
 
-    def _to_host_array(self, Stream stream):
+    def _to_host_array(self, object stream):
         cdef ArrowArray* raw_host_array_ptr
+        cdef Stream _stream = _get_stream(stream)
+        cdef cudaStream_t _cs = _stream.view().value()
 
         with nogil:
-            raw_host_array_ptr = to_arrow_host_raw(self.view(), stream.view())
+            raw_host_array_ptr = to_arrow_host_raw(self.view(), _cs)
 
         return PyCapsule_New(<void*>raw_host_array_ptr, "arrow_array", _release_array)
 
diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd
index a92ffb3f27e..8333abd6df0 100644
--- a/python/pylibcudf/pylibcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/transform.pxd
@@ -3,7 +3,6 @@
 from libcpp cimport bool
 from pylibcudf.libcudf.types cimport bitmask_type, data_type
 from pylibcudf.libcudf.types cimport null_aware, output_nullability
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
@@ -14,30 +13,30 @@ from .types cimport DataType
 
 
 cpdef tuple[gpumemoryview, int] nans_to_nulls(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column column_nans_to_nulls(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column compute_column(
-    Table input, Expression expr, Stream stream = *, DeviceMemoryResource mr = *
+    Table input, Expression expr, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column compute_column_jit(
-    Table input, Expression expr, Stream stream = *, DeviceMemoryResource mr = *
+    Table input, Expression expr, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef tuple[gpumemoryview, int] bools_to_mask(
-    Column input, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Column mask_to_bools(
     Py_ssize_t bitmask,
     int begin_bit,
     int end_bit,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
@@ -48,17 +47,17 @@ cpdef Column transform(
     bool is_ptx,
     null_aware is_null_aware,
     output_nullability null_policy,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
 
 cpdef tuple[Table, Column] encode(
-    Table input, Stream stream = *, DeviceMemoryResource mr = *
+    Table input, object stream = *, DeviceMemoryResource mr = *
 )
 
 cpdef Table one_hot_encode(
     Column input_column,
     Column categories,
-    Stream stream = *,
+    object stream = *,
     DeviceMemoryResource mr = *,
 )
diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi
index 2d2038f07a0..e979575f590 100644
--- a/python/pylibcudf/pylibcudf/transform.pyi
+++ b/python/pylibcudf/pylibcudf/transform.pyi
@@ -1,46 +1,46 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.expressions import Expression
 from pylibcudf.gpumemoryview import gpumemoryview
 from pylibcudf.table import Table
 from pylibcudf.types import DataType, NullAware, OutputNullability
+from pylibcudf.utils import CudaStreamLike
 
 def nans_to_nulls(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[gpumemoryview, int]: ...
 def column_nans_to_nulls(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def compute_column(
     input: Table,
     expr: Expression,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def compute_column_jit(
     input: Table,
     expr: Expression,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def bools_to_mask(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[gpumemoryview, int]: ...
 def mask_to_bools(
     bitmask: int,
     begin_bit: int,
     end_bit: int,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def transform(
@@ -50,17 +50,17 @@ def transform(
     is_ptx: bool,
     null_aware: NullAware = NullAware.NO,
     null_policy: OutputNullability = OutputNullability.PRESERVE,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def encode(
     input: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> tuple[Table, Column]: ...
 def one_hot_encode(
     input: Column,
     categories: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx
index 3baf6c5306e..0025ed7d566 100644
--- a/python/pylibcudf/pylibcudf/transform.pyx
+++ b/python/pylibcudf/pylibcudf/transform.pyx
@@ -26,6 +26,7 @@ from .expressions cimport Expression
 from .gpumemoryview cimport gpumemoryview
 from .types cimport DataType, null_aware, output_nullability
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "bools_to_mask",
@@ -41,7 +42,7 @@ __all__ = [
 
 cpdef tuple[gpumemoryview, int] nans_to_nulls(
     Column input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a null mask preserving existing nulls and converting nans to null.
@@ -63,21 +64,26 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls(
     """
     cdef pair[unique_ptr[device_buffer], size_type] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_transform.nans_to_nulls(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_transform.nans_to_nulls(
+            input.view(), _cs, mr.get_mr()
+        )
 
     return (
-        gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream, mr)),
+        gpumemoryview(
+            DeviceBuffer.c_from_unique_ptr(move(c_result.first), _stream, mr)
+        ),
         c_result.second
     )
 
 
 cpdef Column column_nans_to_nulls(
     Column input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a column with nans converted to nulls.
@@ -100,19 +106,20 @@ cpdef Column column_nans_to_nulls(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_transform.column_nans_to_nulls(
-            input.view(), stream.view(), mr.get_mr()
+            input.view(), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column compute_column(
-    Table input, Expression expr, Stream stream=None, DeviceMemoryResource mr=None
+    Table input, Expression expr, object stream=None, DeviceMemoryResource mr=None
 ):
     """Create a column by evaluating an expression on a table.
 
@@ -135,19 +142,20 @@ cpdef Column compute_column(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_transform.compute_column(
-            input.view(), dereference(expr.c_obj.get()), stream.view(), mr.get_mr()
+            input.view(), dereference(expr.c_obj.get()), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column compute_column_jit(
-    Table input, Expression expr, Stream stream=None, DeviceMemoryResource mr=None
+    Table input, Expression expr, object stream=None, DeviceMemoryResource mr=None
 ):
     """
     Create a column by evaluating an expression on a table
@@ -172,20 +180,21 @@ cpdef Column compute_column_jit(
     """
     cdef unique_ptr[column] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_transform.compute_column_jit(
-            input.view(), dereference(expr.c_obj.get()), stream.view(), mr.get_mr()
+            input.view(), dereference(expr.c_obj.get()), _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef tuple[gpumemoryview, int] bools_to_mask(
     Column input,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a bitmask from a column of boolean elements
@@ -206,14 +215,19 @@ cpdef tuple[gpumemoryview, int] bools_to_mask(
     """
     cdef pair[unique_ptr[device_buffer], size_type] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_transform.bools_to_mask(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_transform.bools_to_mask(
+            input.view(), _cs, mr.get_mr()
+        )
 
     return (
-        gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream, mr)),
+        gpumemoryview(
+            DeviceBuffer.c_from_unique_ptr(move(c_result.first), _stream, mr)
+        ),
         c_result.second
     )
 
@@ -222,7 +236,7 @@ cpdef Column mask_to_bools(
     Py_ssize_t bitmask,
     int begin_bit,
     int end_bit,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Creates a boolean column from given bitmask.
@@ -248,7 +262,8 @@ cpdef Column mask_to_bools(
     cdef unique_ptr[column] c_result
     cdef bitmask_type * bitmask_ptr = <bitmask_type*>bitmask
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
@@ -256,11 +271,11 @@ cpdef Column mask_to_bools(
             bitmask_ptr,
             begin_bit,
             end_bit,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 
 cpdef Column transform(
@@ -270,7 +285,7 @@ cpdef Column transform(
     bool is_ptx,
     null_aware is_null_aware,
     output_nullability null_policy,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Create a new column by applying a transform function against
@@ -312,7 +327,8 @@ cpdef Column transform(
     cdef output_nullability c_null_policy = null_policy
     cdef optional[void *] user_data
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     for input in inputs:
@@ -327,14 +343,14 @@ cpdef Column transform(
             user_data,
             c_is_null_aware,
             c_null_policy,
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
-    return Column.from_libcudf(move(c_result), stream, mr)
+    return Column.from_libcudf(move(c_result), _stream, mr)
 
 cpdef tuple[Table, Column] encode(
-    Table input, Stream stream=None, DeviceMemoryResource mr=None
+    Table input, object stream=None, DeviceMemoryResource mr=None
 ):
     """Encode the rows of the given table as integers.
 
@@ -355,21 +371,22 @@ cpdef tuple[Table, Column] encode(
     """
     cdef pair[unique_ptr[table], unique_ptr[column]] c_result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_transform.encode(input.view(), stream.view(), mr.get_mr())
+        c_result = cpp_transform.encode(input.view(), _cs, mr.get_mr())
 
     return (
-        Table.from_libcudf(move(c_result.first), stream, mr),
-        Column.from_libcudf(move(c_result.second), stream, mr)
+        Table.from_libcudf(move(c_result.first), _stream, mr),
+        Column.from_libcudf(move(c_result.second), _stream, mr)
     )
 
 cpdef Table one_hot_encode(
     Column input,
     Column categories,
-    Stream stream=None,
+    object stream=None,
     DeviceMemoryResource mr=None,
 ):
     """Encodes `input` by generating a new column
@@ -395,19 +412,20 @@ cpdef Table one_hot_encode(
     cdef pair[unique_ptr[column], table_view] c_result
     cdef Table owner_table
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_transform.one_hot_encode(
             input.view(),
             categories.view(),
-            stream.view(),
+            _cs,
             mr.get_mr()
         )
 
     owner_table = Table(
-        [Column.from_libcudf(move(c_result.first), stream, mr)]
+        [Column.from_libcudf(move(c_result.first), _stream, mr)]
         * c_result.second.num_columns()
     )
 
diff --git a/python/pylibcudf/pylibcudf/transpose.pxd b/python/pylibcudf/pylibcudf/transpose.pxd
index 6c432a62b5f..a63d52da9e1 100644
--- a/python/pylibcudf/pylibcudf/transpose.pxd
+++ b/python/pylibcudf/pylibcudf/transpose.pxd
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from .table cimport Table
 
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
-cpdef Table transpose(Table input_table, Stream stream=*, DeviceMemoryResource mr=*)
+cpdef Table transpose(Table input_table, object stream = *, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/transpose.pyi b/python/pylibcudf/pylibcudf/transpose.pyi
index 4487e49feaf..fbf2d3fce2d 100644
--- a/python/pylibcudf/pylibcudf/transpose.pyi
+++ b/python/pylibcudf/pylibcudf/transpose.pyi
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.table import Table
+from pylibcudf.utils import CudaStreamLike
 
 def transpose(
     input_table: Table,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx
index e7cdbe503eb..e15aa45ce77 100644
--- a/python/pylibcudf/pylibcudf/transpose.pyx
+++ b/python/pylibcudf/pylibcudf/transpose.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
@@ -13,11 +13,12 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from .column cimport Column
 from .table cimport Table
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = ["transpose"]
 
 cpdef Table transpose(
-    Table input_table, Stream stream=None, DeviceMemoryResource mr=None
+    Table input_table, object stream=None, DeviceMemoryResource mr=None
 ):
     """Transpose a Table.
 
@@ -39,16 +40,17 @@ cpdef Table transpose(
     """
     cdef pair[unique_ptr[column], table_view] c_result
     cdef Table owner_table
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_transpose.transpose(
-            input_table.view(), stream.view(), mr.get_mr()
+            input_table.view(), _cs, mr.get_mr()
         )
 
     owner_table = Table(
-        [Column.from_libcudf(move(c_result.first), stream, mr)] *
+        [Column.from_libcudf(move(c_result.first), _stream, mr)] *
         c_result.second.num_columns()
     )
 
diff --git a/python/pylibcudf/pylibcudf/unary.pxd b/python/pylibcudf/pylibcudf/unary.pxd
index 69ec06ecea6..44a4f796085 100644
--- a/python/pylibcudf/pylibcudf/unary.pxd
+++ b/python/pylibcudf/pylibcudf/unary.pxd
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
 from pylibcudf.libcudf.unary cimport unary_operator
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
@@ -11,19 +10,19 @@ from .types cimport DataType
 
 
 cpdef Column unary_operation(
-    Column input, unary_operator op, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, unary_operator op, object stream = *, DeviceMemoryResource mr = *
 )
 
-cpdef Column is_null(Column input, Stream stream = *, DeviceMemoryResource mr = *)
+cpdef Column is_null(Column input, object stream = *, DeviceMemoryResource mr = *)
 
-cpdef Column is_valid(Column input, Stream stream = *, DeviceMemoryResource mr = *)
+cpdef Column is_valid(Column input, object stream = *, DeviceMemoryResource mr = *)
 
 cpdef Column cast(
-    Column input, DataType data_type, Stream stream = *, DeviceMemoryResource mr = *
+    Column input, DataType data_type, object stream = *, DeviceMemoryResource mr = *
 )
 
-cpdef Column is_nan(Column input, Stream stream = *, DeviceMemoryResource mr = *)
+cpdef Column is_nan(Column input, object stream = *, DeviceMemoryResource mr = *)
 
-cpdef Column is_not_nan(Column input, Stream stream = *, DeviceMemoryResource mr = *)
+cpdef Column is_not_nan(Column input, object stream = *, DeviceMemoryResource mr = *)
 
 cpdef bool is_supported_cast(DataType from_, DataType to)
diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi
index 6a77f7998b9..dd3d42404e7 100644
--- a/python/pylibcudf/pylibcudf/unary.pyi
+++ b/python/pylibcudf/pylibcudf/unary.pyi
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from enum import IntEnum
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
-from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
+from pylibcudf.utils import CudaStreamLike
 
 class UnaryOperator(IntEnum):
     SIN = ...
@@ -38,33 +38,33 @@ class UnaryOperator(IntEnum):
 def unary_operation(
     input: Column,
     op: UnaryOperator,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_null(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_valid(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def cast(
     input: Column,
     data_type: DataType,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_nan(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_not_nan(
     input: Column,
-    stream: Stream | None = None,
+    stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def is_supported_cast(from_: DataType, to: DataType) -> bool: ...
diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx
index da5b08df685..e0614037012 100644
--- a/python/pylibcudf/pylibcudf/unary.pyx
+++ b/python/pylibcudf/pylibcudf/unary.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp cimport bool
@@ -16,6 +16,7 @@ from pylibcudf.libcudf.unary import \
 from .column cimport Column
 from .types cimport DataType
 from .utils cimport _get_stream, _get_memory_resource
+from cuda.bindings.cyruntime cimport cudaStream_t
 
 __all__ = [
     "UnaryOperator",
@@ -29,7 +30,7 @@ __all__ = [
 ]
 
 cpdef Column unary_operation(
-    Column input, unary_operator op, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, unary_operator op, object stream=None, DeviceMemoryResource mr=None
 ):
     """Perform a unary operation on a column.
 
@@ -53,16 +54,19 @@ cpdef Column unary_operation(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_unary.unary_operation(input.view(), op, stream.view(), mr.get_mr())
+        result = cpp_unary.unary_operation(
+            input.view(), op, _cs, mr.get_mr()
+        )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
-cpdef Column is_null(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_null(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Check whether elements of a column are null.
 
     For details, see :cpp:func:`is_null`.
@@ -83,16 +87,17 @@ cpdef Column is_null(Column input, Stream stream=None, DeviceMemoryResource mr=N
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_unary.is_null(input.view(), stream.view(), mr.get_mr())
+        result = cpp_unary.is_null(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
-cpdef Column is_valid(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_valid(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Check whether elements of a column are valid.
 
     For details, see :cpp:func:`is_valid`.
@@ -113,17 +118,18 @@ cpdef Column is_valid(Column input, Stream stream=None, DeviceMemoryResource mr=
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_unary.is_valid(input.view(), stream.view(), mr.get_mr())
+        result = cpp_unary.is_valid(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
 cpdef Column cast(
-    Column input, DataType data_type, Stream stream=None, DeviceMemoryResource mr=None
+    Column input, DataType data_type, object stream=None, DeviceMemoryResource mr=None
 ):
     """Cast a column to a different data type.
 
@@ -147,18 +153,19 @@ cpdef Column cast(
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_unary.cast(
-            input.view(), data_type.c_obj, stream.view(), mr.get_mr()
+            input.view(), data_type.c_obj, _cs, mr.get_mr()
         )
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
-cpdef Column is_nan(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_nan(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Check whether elements of a column are nan.
 
     For details, see :cpp:func:`is_nan`.
@@ -179,16 +186,17 @@ cpdef Column is_nan(Column input, Stream stream=None, DeviceMemoryResource mr=No
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_unary.is_nan(input.view(), stream.view(), mr.get_mr())
+        result = cpp_unary.is_nan(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 
-cpdef Column is_not_nan(Column input, Stream stream=None, DeviceMemoryResource mr=None):
+cpdef Column is_not_nan(Column input, object stream=None, DeviceMemoryResource mr=None):
     """Check whether elements of a column are not nan.
 
     For details, see :cpp:func:`is_not_nan`.
@@ -209,13 +217,14 @@ cpdef Column is_not_nan(Column input, Stream stream=None, DeviceMemoryResource m
     """
     cdef unique_ptr[column] result
 
-    stream = _get_stream(stream)
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_unary.is_not_nan(input.view(), stream.view(), mr.get_mr())
+        result = cpp_unary.is_not_nan(input.view(), _cs, mr.get_mr())
 
-    return Column.from_libcudf(move(result), stream, mr)
+    return Column.from_libcudf(move(result), _stream, mr)
 
 cpdef bool is_supported_cast(DataType from_, DataType to):
     """Check if a cast between datatypes is supported.
diff --git a/python/pylibcudf/pylibcudf/utils.pxd b/python/pylibcudf/pylibcudf/utils.pxd
index b3d2928f398..feb82cea18f 100644
--- a/python/pylibcudf/pylibcudf/utils.pxd
+++ b/python/pylibcudf/pylibcudf/utils.pxd
@@ -1,12 +1,12 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from libcpp.functional cimport reference_wrapper
 from libcpp.vector cimport vector
 from pylibcudf.libcudf.scalar.scalar cimport scalar
-from rmm.pylibrmm.stream cimport Stream
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
+from rmm.pylibrmm.stream cimport Stream
 
 cdef vector[reference_wrapper[const scalar]] _as_vector(list source)
-cpdef Stream _get_stream(Stream stream = *)
+cpdef Stream _get_stream(object stream = *)
 cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = *)
diff --git a/python/pylibcudf/pylibcudf/utils.pyi b/python/pylibcudf/pylibcudf/utils.pyi
index 21f669898ba..cc3cb93e6c0 100644
--- a/python/pylibcudf/pylibcudf/utils.pyi
+++ b/python/pylibcudf/pylibcudf/utils.pyi
@@ -1,6 +1,13 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
+from typing import Protocol
+
 from rmm.pylibrmm.stream import Stream
 
-def _get_stream(stream: Stream | None = None) -> Stream: ...
+class HasCudaStream(Protocol):
+    def __cuda_stream__(self) -> tuple[int, int]: ...
+
+CudaStreamLike = Stream | HasCudaStream
+
+def _get_stream(stream: CudaStreamLike | None = None) -> Stream: ...
diff --git a/python/pylibcudf/pylibcudf/utils.pyx b/python/pylibcudf/pylibcudf/utils.pyx
index 70460e19481..314e62f7760 100644
--- a/python/pylibcudf/pylibcudf/utils.pyx
+++ b/python/pylibcudf/pylibcudf/utils.pyx
@@ -47,10 +47,12 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source):
     return c_scalars
 
 
-cpdef Stream _get_stream(Stream stream = None):
+cpdef Stream _get_stream(object stream = None):
     if stream is None:
         return CUDF_DEFAULT_STREAM
-    return stream
+    if isinstance(stream, Stream):
+        return <Stream>stream
+    return Stream(stream)  # Handles __cuda_stream__ protocol
 
 
 cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = None):
diff --git a/python/pylibcudf/tests/test_experimental.py b/python/pylibcudf/tests/test_experimental.py
index eaf06ff62ae..ed180e8db29 100644
--- a/python/pylibcudf/tests/test_experimental.py
+++ b/python/pylibcudf/tests/test_experimental.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 import pytest
 
@@ -21,6 +21,7 @@ def test_join_streams(streams: list[Stream], stream: Stream):
     plc.experimental.join_streams(streams, stream)
 
 
+@pytest.mark.uses_custom_stream
 def test_join_streams_type_error():
     """Test that join_streams raises appropriate errors for invalid inputs."""
     main_stream = Stream()
@@ -29,16 +30,10 @@ def test_join_streams_type_error():
     with pytest.raises(TypeError):
         plc.experimental.join_streams(None, main_stream)
 
-    # Test with non-Stream in list
-    with pytest.raises(
-        TypeError,
-        match="Cannot convert NoneType to rmm.pylibrmm.stream.Stream",
-    ):
-        plc.experimental.join_streams([None], main_stream)
-
-    # Test with non-Stream as main stream
-    with pytest.raises(
-        TypeError,
-        match="Cannot convert NoneType to rmm.pylibrmm.stream.Stream",
-    ):
-        plc.experimental.join_streams([Stream()], None)
+    # Protocol stream should be accepted
+    class _CudaStreamProto:
+        def __cuda_stream__(self):
+            return (0, 0)
+
+    plc.experimental.join_streams([_CudaStreamProto()], main_stream)
+    plc.experimental.join_streams([Stream()], _CudaStreamProto())
diff --git a/python/pylibcudf/tests/test_stream_protocol.py b/python/pylibcudf/tests/test_stream_protocol.py
new file mode 100644
index 00000000000..075c49bd0b3
--- /dev/null
+++ b/python/pylibcudf/tests/test_stream_protocol.py
@@ -0,0 +1,74 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+
+import pyarrow as pa
+import pytest
+
+from rmm.pylibrmm.stream import Stream
+
+import pylibcudf as plc
+
+
+class _CudaStreamProto:
+    """Minimal __cuda_stream__ protocol object for testing."""
+
+    def __cuda_stream__(self):
+        return (0, 0)
+
+
+def test_get_stream_none():
+    stream = plc.utils._get_stream(None)
+    assert isinstance(stream, Stream)
+
+
+def test_get_stream_stream_object():
+    stream = Stream()
+    result = plc.utils._get_stream(stream)
+    assert result is stream
+
+
+def test_get_stream_protocol_object():
+    proto = _CudaStreamProto()
+    result = plc.utils._get_stream(proto)
+    assert isinstance(result, Stream)
+
+
+@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()])
+def test_reduce_accepts_stream_protocol(stream):
+    arr = pa.array([1, 2, 3], type=pa.int32())
+    col = plc.Column.from_arrow(arr)
+    agg = plc.aggregation.sum()
+    dtype = plc.DataType.from_arrow(pa.int32())
+    result = plc.reduce.reduce(col, agg, dtype, stream=stream)
+    assert result.to_py() == 6
+
+
+@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()])
+def test_binary_operation_accepts_stream_protocol(stream):
+    lhs = plc.Column.from_arrow(pa.array([1, 2, 3], type=pa.int32()))
+    rhs = plc.Column.from_arrow(pa.array([4, 5, 6], type=pa.int32()))
+    dtype = plc.DataType.from_arrow(pa.int32())
+    result = plc.binaryop.binary_operation(
+        lhs,
+        rhs,
+        plc.binaryop.BinaryOperator.ADD,
+        dtype,
+        stream=stream,
+    )
+    expect = pa.array([5, 7, 9], type=pa.int32())
+    assert result.to_arrow().equals(expect)
+
+
+@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()])
+def test_gather_accepts_stream_protocol(stream):
+    table = plc.Table.from_arrow(pa.table({"a": [1, 2, 3], "b": [4, 5, 6]}))
+    indices = plc.Column.from_arrow(pa.array([2, 0], type=pa.int32()))
+    result = plc.copying.gather(
+        table,
+        indices,
+        plc.copying.OutOfBoundsPolicy.DONT_CHECK,
+        stream=stream,
+    )
+    expected = pa.table({"a": [3, 1], "b": [6, 4]})
+    got = result.to_arrow().rename_columns(expected.column_names)
+    assert got.cast(expected.schema).equals(expected)

From 10993fb0434c705e884fe6b1e7b1edabe30dce5e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 May 2026 16:41:46 -0700
Subject: [PATCH 06/12] Use `language: script` for cudf-polars-ir-signatures
 pre-commit hook (#22384)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `cudf-polars-ir-signatures` pre-commit hook uses `language: python` but is just a local script (`./ci/check_cudf_polars_ir.py`) that only depends on stdlib modules (`ast`, `argparse`, `sys`, `typing`) and has a `#!/usr/bin/env python3` shebang.

With `language: python`, pre-commit unnecessarily creates a virtualenv for this hook. `language: script` is the correct setting — it runs the entry point directly as an executable, relying on the shebang for interpreter selection, with no virtualenv overhead.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/22384
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1fb05425bd3..a51294a8f26 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -136,7 +136,7 @@ repos:
         name: cudf-polars-ir-signatures
         description: 'Validate cudf-polars IR.do_evaluate signatures.'
         entry: ./ci/check_cudf_polars_ir.py
-        language: python
+        language: script
         files: ^python/cudf_polars/cudf_polars/(dsl/ir|experimental/(shuffle|io|sort))\.py$
         pass_filenames: true
         verbose: true

From 572437becfffc1ce802849c56218a3c137b917d5 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 6 May 2026 18:14:35 -0700
Subject: [PATCH 07/12] Fix potential errors in Parquet page header decode
 (#22274)

This PR fixes a potential infinite loop in parquet page header count/decode kernels if case of malformed input.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Paul Mattione (https://github.com/pmattione-nvidia)

URL: https://github.com/rapidsai/cudf/pull/22274
---
 cpp/src/io/parquet/page_hdr.cu               | 132 ++++++++++---------
 cpp/src/io/parquet/parquet_gpu.hpp           |   2 +
 cpp/src/io/parquet/reader_impl_preprocess.cu |   8 +-
 3 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu
index 83724dd71e2..8e7a6223447 100644
--- a/cpp/src/io/parquet/page_hdr.cu
+++ b/cpp/src/io/parquet/page_hdr.cu
@@ -110,8 +110,8 @@ inline __device__ int32_t get_i32(byte_stream_s* bs)
  */
 __device__ void skip_struct_field(byte_stream_s* bs, int field_type)
 {
-  int struct_depth = 0;
-  int rep_cnt      = 0;
+  uint32_t struct_depth = 0;
+  uint32_t rep_cnt      = 0;
 
   do {
     if (rep_cnt != 0) {
@@ -138,7 +138,7 @@ __device__ void skip_struct_field(byte_stream_s* bs, int field_type)
       case FieldType::LIST:
       case FieldType::SET: {  // NOTE: skipping a list of lists is not handled
         auto const c = getb(bs);
-        int n        = c >> 4;
+        uint32_t n   = c >> 4;
         if (n == 0xf) { n = get_u32(bs); }
         field_type = c & 0xf;
         if (static_cast<FieldType>(field_type) == FieldType::STRUCT) {
@@ -543,7 +543,6 @@ void __launch_bounds__(decode_page_headers_block_size)
   auto const block = cg::this_thread_block();
   auto const warp  = cg::tiled_partition<cudf::detail::warp_size>(block);
 
-  auto const lane_id = warp.thread_rank();
   auto const warp_id = warp.meta_group_rank();
   auto const chunk_idx =
     static_cast<cudf::size_type>((cg::this_grid().block_rank() * num_warps_per_block) + warp_id);
@@ -554,20 +553,20 @@ void __launch_bounds__(decode_page_headers_block_size)
 
   auto const bs = &bs_g[warp_id];
 
-  if (lane_id == 0) {
+  cg::invoke_one(warp, [&] {
     if (chunk_idx < num_chunks) { bs->ck = chunks[chunk_idx]; }
     error[warp_id] = 0;
-  }
+  });
   block.sync();
 
   if (chunk_idx < num_chunks) {
-    if (lane_id == 0) {
+    cg::invoke_one(warp, [&] {
       bs->base = bs->cur      = bs->ck.compressed_data;
       bs->end                 = bs->base + bs->ck.compressed_size;
       bs->page.chunk_idx      = chunk_idx;
       bs->page.src_col_schema = bs->ck.src_col_schema;
       zero_out_page_header_info(bs);
-    }
+    });
     size_t const num_values        = bs->ck.num_values;
     size_t values_found            = 0;
     uint32_t data_page_count       = 0;
@@ -580,7 +579,7 @@ void __launch_bounds__(decode_page_headers_block_size)
     while (values_found < num_values and bs->cur < bs->end) {
       int index_out = -1;
 
-      if (lane_id == 0) {
+      cg::invoke_one(warp, [&] {
         // this computation is only valid for flat schemas. for nested schemas,
         // they will be recomputed in the preprocess step by examining repetition and
         // definition levels
@@ -593,7 +592,7 @@ void __launch_bounds__(decode_page_headers_block_size)
         bs->page.num_nulls                         = 0;
         bs->page.lvl_bytes[level_type::DEFINITION] = 0;
         bs->page.lvl_bytes[level_type::REPETITION] = 0;
-        if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size >= 0) {
+        if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size > 0) {
           if (not is_supported_encoding(bs->page.encoding)) {
             error[warp_id] |=
               static_cast<kernel_error::value_type>(decode_error::UNSUPPORTED_ENCODING);
@@ -641,11 +640,13 @@ void __launch_bounds__(decode_page_headers_block_size)
           bs->cur = bs->end;
         }
         if (index_out >= 0 and index_out < max_num_pages) { page_info[index_out] = bs->page; }
-      }
+      });
       values_found = shuffle(values_found);
       warp.sync();
     }
-    if (lane_id == 0 and error[warp_id] != 0) { set_error(error[warp_id], error_code); }
+    cg::invoke_one(warp, [&] {
+      if (error[warp_id] != 0) { set_error(error[warp_id], error_code); }
+    });
   }
 }
 
@@ -664,7 +665,6 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size)
   auto const block = cg::this_thread_block();
   auto const warp  = cg::tiled_partition<cudf::detail::warp_size>(block);
 
-  auto const lane_id = warp.thread_rank();
   auto const warp_id = warp.meta_group_rank();
   auto const chunk_idx =
     static_cast<cudf::size_type>((cg::this_grid().block_rank() * num_warps_per_block) + warp_id);
@@ -675,25 +675,25 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size)
 
   auto const bs = &bs_g[warp_id];
 
-  if (lane_id == 0) {
+  cg::invoke_one(warp, [&] {
     if (chunk_idx < num_chunks) { bs->ck = chunks[chunk_idx]; }
     error[warp_id] = 0;
-  }
+  });
   block.sync();
 
   if (chunk_idx < num_chunks) {
-    if (lane_id == 0) {
+    cg::invoke_one(warp, [&] {
       bs->base = bs->cur = bs->ck.compressed_data;
       bs->end            = bs->base + bs->ck.compressed_size;
-    }
+    });
     size_t const num_values        = bs->ck.num_values;
     size_t values_found            = 0;
     uint32_t data_page_count       = 0;
     uint32_t dictionary_page_count = 0;
     warp.sync();
     while (values_found < num_values and bs->cur < bs->end) {
-      if (lane_id == 0) {
-        if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size >= 0) {
+      cg::invoke_one(warp, [&] {
+        if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size > 0) {
           if (not is_supported_encoding(bs->page.encoding)) {
             error[warp_id] |=
               static_cast<kernel_error::value_type>(decode_error::UNSUPPORTED_ENCODING);
@@ -724,15 +724,15 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size)
             static_cast<kernel_error::value_type>(decode_error::INVALID_PAGE_HEADER);
           bs->cur = bs->end;
         }
-      }
+      });
       values_found = shuffle(values_found);
       warp.sync();
     }
-    if (lane_id == 0) {
+    cg::invoke_one(warp, [&] {
       chunks[chunk_idx].num_data_pages = data_page_count;
       chunks[chunk_idx].num_dict_pages = dictionary_page_count;
       if (error[warp_id] != 0) { set_error(error[warp_id], error_code); }
-    }
+    });
   }
 }
 
@@ -784,8 +784,9 @@ struct decode_page_headers_with_pgidx_fn {
     // bs.page.chunk_row not computed here and will be filled in later by
     // `fill_in_page_info()`.
 
-    if (not parse_page_header_fn{}(&bs) or bs.page.compressed_page_size < 0) {
-      set_error(static_cast<kernel_error::value_type>(decode_error::UNSUPPORTED_ENCODING),
+    // Parsed page must be valid and not empty
+    if (not parse_page_header_fn{}(&bs) or bs.page.compressed_page_size <= 0) {
+      set_error(static_cast<kernel_error::value_type>(decode_error::INVALID_PAGE_HEADER),
                 error_code);
       return;
     }
@@ -834,54 +835,64 @@ struct decode_page_headers_with_pgidx_fn {
  * @param[in] num_chunks Number of column chunks
  */
 CUDF_KERNEL void __launch_bounds__(build_string_dict_index_block_size)
-  build_string_dictionary_index_kernel(ColumnChunkDesc* chunks, int32_t num_chunks)
+  build_string_dictionary_index_kernel(ColumnChunkDesc* chunks,
+                                       int32_t num_chunks,
+                                       kernel_error::pointer error_code)
 {
   auto constexpr num_warps_per_block = build_string_dict_index_block_size / cudf::detail::warp_size;
   __shared__ ColumnChunkDesc chunk_g[num_warps_per_block];
 
-  auto const block  = cg::this_thread_block();
-  auto const warp   = cg::tiled_partition<cudf::detail::warp_size>(block);
-  int const lane_id = warp.thread_rank();
-  int const chunk   = (cg::this_grid().block_rank() * num_warps_per_block) + warp.meta_group_rank();
+  auto const block = cg::this_thread_block();
+  auto const warp  = cg::tiled_partition<cudf::detail::warp_size>(block);
+  int const chunk  = (cg::this_grid().block_rank() * num_warps_per_block) + warp.meta_group_rank();
   ColumnChunkDesc* const ck = &chunk_g[warp.meta_group_rank()];
-  if (chunk < num_chunks and lane_id == 0) *ck = chunks[chunk];
+  cg::invoke_one(warp, [&] {
+    if (chunk < num_chunks) { *ck = chunks[chunk]; }
+  });
   block.sync();
 
   if (chunk >= num_chunks) { return; }
-  if (!lane_id && ck->num_dict_pages > 0 && ck->str_dict_index) {
-    // Data type to describe a string
-    string_index_pair* dict_index = ck->str_dict_index;
-    uint8_t const* dict           = ck->dict_page->page_data;
-    int dict_size                 = ck->dict_page->uncompressed_page_size;
-    int num_entries               = ck->dict_page->num_input_values;
-    int pos = 0, cur = 0;
-    for (int i = 0; i < num_entries; i++) {
-      int len = 0;
-      if (ck->physical_type == Type::FIXED_LEN_BYTE_ARRAY) {
-        if (cur + ck->type_length <= dict_size) {
-          len = ck->type_length;
-          pos = cur;
-          cur += len;
-        } else {
-          cur = dict_size;
-        }
-      } else {
-        if (cur + 4 <= dict_size) {
-          len =
-            dict[cur + 0] | (dict[cur + 1] << 8) | (dict[cur + 2] << 16) | (dict[cur + 3] << 24);
-          if (len >= 0 && cur + 4 + len <= dict_size) {
-            pos = cur + 4;
-            cur = pos + len;
+  cg::invoke_one(warp, [&] {
+    if (ck->num_dict_pages > 0 && ck->str_dict_index) {
+      // Data type to describe a string
+      string_index_pair* dict_index = ck->str_dict_index;
+      uint8_t const* dict           = ck->dict_page->page_data;
+      int const dict_size           = ck->dict_page->uncompressed_page_size;
+      int32_t const num_entries     = ck->dict_page->num_input_values;
+      if (num_entries < 0 or dict_size < 0) {
+        set_error(static_cast<kernel_error::value_type>(decode_error::INVALID_DICT_WIDTH),
+                  error_code);
+        return;
+      }
+      int pos = 0, cur = 0;
+      for (int i = 0; i < num_entries; i++) {
+        int len = 0;
+        if (ck->physical_type == Type::FIXED_LEN_BYTE_ARRAY) {
+          if (cur + ck->type_length <= dict_size) {
+            len = ck->type_length;
+            pos = cur;
+            cur += len;
           } else {
             cur = dict_size;
           }
+        } else {
+          if (cur + 4 <= dict_size) {
+            len =
+              dict[cur + 0] | (dict[cur + 1] << 8) | (dict[cur + 2] << 16) | (dict[cur + 3] << 24);
+            if (len >= 0 && cur + 4 + len <= dict_size) {
+              pos = cur + 4;
+              cur = pos + len;
+            } else {
+              cur = dict_size;
+            }
+          }
         }
+        // TODO: Could store 8 entries in shared mem, then do a single warp-wide store
+        dict_index[i].first  = reinterpret_cast<char const*>(dict + pos);
+        dict_index[i].second = len;
       }
-      // TODO: Could store 8 entries in shared mem, then do a single warp-wide store
-      dict_index[i].first  = reinterpret_cast<char const*>(dict + pos);
-      dict_index[i].second = len;
     }
-  }
+  });
 }
 
 }  // namespace
@@ -942,6 +953,7 @@ void decode_page_headers_with_pgidx(cudf::device_span<ColumnChunkDesc const> chu
 
 void build_string_dictionary_index(ColumnChunkDesc* chunks,
                                    int32_t num_chunks,
+                                   kernel_error::pointer error_code,
                                    rmm::cuda_stream_view stream)
 {
   static_assert(
@@ -954,8 +966,8 @@ void build_string_dictionary_index(ColumnChunkDesc* chunks,
   dim3 dim_block(build_string_dict_index_block_size, 1);
   dim3 dim_grid(num_blocks, 1);
 
-  build_string_dictionary_index_kernel<<<dim_grid, dim_block, 0, stream.value()>>>(chunks,
-                                                                                   num_chunks);
+  build_string_dictionary_index_kernel<<<dim_grid, dim_block, 0, stream.value()>>>(
+    chunks, num_chunks, error_code);
 }
 
 }  // namespace cudf::io::parquet::detail
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 680d11959a1..7d07f39aa38 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -735,10 +735,12 @@ void decode_page_headers_with_pgidx(cudf::device_span<ColumnChunkDesc const> chu
  *
  * @param[in] chunks List of column chunks
  * @param[in] num_chunks Number of column chunks
+ * @param[out] error_code Pointer to the error code for kernel failures
  * @param[in] stream CUDA stream to use
  */
 void build_string_dictionary_index(ColumnChunkDesc* chunks,
                                    int32_t num_chunks,
+                                   kernel_error::pointer error_code,
                                    rmm::cuda_stream_view stream);
 
 /**
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 07db8ff0c23..8ebb8879d7e 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -94,8 +94,14 @@ void reader_impl::build_string_dict_indices()
     set_str_dict_index_ptr{pass.str_dict_index.data(), str_dict_index_offsets, pass.chunks});
 
   // compute the indices
-  build_string_dictionary_index(pass.chunks.device_ptr(), pass.chunks.size(), _stream);
+  kernel_error error_code(_stream);
+  build_string_dictionary_index(
+    pass.chunks.device_ptr(), pass.chunks.size(), error_code.data(), _stream);
   pass.chunks.device_to_host(_stream);
+  auto const error = error_code.value_sync(_stream);
+  CUDF_EXPECTS(
+    error == 0,
+    "Parquet dictionary index construction failed with code(s) " + kernel_error::to_string(error));
 }
 
 void reader_impl::allocate_nesting_info()

From 563021f45e9f11acec6e242b644e3b024ac1e3c3 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 6 May 2026 18:55:29 -0700
Subject: [PATCH 08/12] Make RapidsMPF the default runtime for cudf_polars
 streaming executor (#22281)

closes https://github.com/rapidsai/cudf/issues/21466
closes https://github.com/rapidsai/cudf/issues/21767

Waiting for https://github.com/rapidsai/cudf/pull/22212

* Makes rapidsmpf a required dependency of cudf_polars
* Removes the following `StreamingExecutor` options as they were "experimental" with associated code paths
    * `StreamingExecutor.runtime`
    * `StreamingExecutor.shuffle_method`
    * `StreamingExecutor.unique_fraction`
    * `StreamingExecutor.groupby_n_ary`
    * `StreamingExecutor.rapidsmpf_spill`
* Removes the task runtime and associated tests
* Some tests we modified to only test 1 specific test configuration because of https://github.com/rapidsai/cudf/issues/22346 to pass these tests for now. Planning on revisiting this once rapidsmpf becomes the default

Ops-Bot-Merge-Barrier: true

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Bradley Dice (https://github.com/bdice)
  - Matthew Murray (https://github.com/Matt711)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/22281
---
 .devcontainer/Dockerfile                      |   2 +
 .devcontainer/README.md                       |   8 +
 .../cuda12.9-conda/devcontainer.json          |   5 +-
 .devcontainer/cuda12.9-pip/devcontainer.json  |   7 +-
 .../cuda13.1-conda/devcontainer.json          |   5 +-
 .devcontainer/cuda13.1-pip/devcontainer.json  |   7 +-
 .github/workflows/pr.yaml                     |   9 +-
 ci/test_cudf_polars_experimental.sh           |   2 +-
 ci/test_cudf_polars_polars_tests.sh           |   2 +-
 .../all_cuda-129_arch-aarch64.yaml            |   1 +
 .../all_cuda-129_arch-x86_64.yaml             |   1 +
 .../all_cuda-131_arch-aarch64.yaml            |   1 +
 .../all_cuda-131_arch-x86_64.yaml             |   1 +
 conda/recipes/cudf-polars/recipe.yaml         |   1 +
 dependencies.yaml                             |  56 ++-
 docs/cudf/source/cudf_polars/api.md           |   1 -
 .../cudf/source/cudf_polars/engine_options.md |   2 +-
 python/cudf_polars/cudf_polars/callback.py    |   5 +-
 python/cudf_polars/cudf_polars/dsl/expr.py    |   2 -
 .../cudf_polars/dsl/expressions/base.py       |   8 +-
 python/cudf_polars/cudf_polars/dsl/ir.py      |  10 +-
 .../cudf_polars/experimental/base.py          |  13 +-
 .../benchmarks/utils_new_frontends.py         |  27 +-
 .../cudf_polars/experimental/dispatch.py      |  39 +-
 .../cudf_polars/experimental/distinct.py      |  77 +---
 .../cudf_polars/experimental/explain.py       |   5 +-
 .../cudf_polars/experimental/expressions.py   |  21 +-
 .../cudf_polars/experimental/groupby.py       |  63 +--
 .../cudf_polars/experimental/io.py            | 232 +---------
 .../cudf_polars/experimental/join.py          | 164 +------
 .../cudf_polars/experimental/parallel.py      | 125 +-----
 .../experimental/rapidsmpf/core.py            |   4 -
 .../experimental/rapidsmpf/frontend/core.py   |   2 +-
 .../experimental/rapidsmpf/frontend/dask.py   |   2 -
 .../rapidsmpf/frontend/options.py             |  18 -
 .../experimental/rapidsmpf/frontend/ray.py    |   6 -
 .../experimental/rapidsmpf/frontend/spmd.py   |  21 +-
 .../cudf_polars/experimental/repartition.py   |  43 +-
 .../cudf_polars/experimental/scheduler.py     | 153 -------
 .../cudf_polars/experimental/shuffle.py       | 279 +-----------
 .../cudf_polars/experimental/sort.py          | 402 +-----------------
 .../cudf_polars/experimental/utils.py         |  47 +-
 .../cudf_polars/testing/asserts.py            |   5 +-
 .../cudf_polars/testing/inject_gpu_engine.py  |   3 +-
 .../cudf_polars/cudf_polars/utils/config.py   | 214 ++--------
 .../cudf_polars/utils/cuda_stream.py          |   5 -
 python/cudf_polars/pyproject.toml             |   2 +-
 python/cudf_polars/tests/conftest.py          |  30 +-
 .../tests/experimental/test_dask.py           |   2 -
 .../tests/experimental/test_explain.py        |   4 +-
 .../tests/experimental/test_groupby.py        |   5 +-
 .../tests/experimental/test_hstack.py         |   2 -
 .../tests/experimental/test_options.py        |   5 -
 .../tests/experimental/test_parallel.py       |  41 --
 .../tests/experimental/test_ray.py            |   4 +-
 .../tests/experimental/test_sort.py           |   4 -
 .../tests/experimental/test_spmd.py           |   3 +-
 .../tests/experimental/test_unique.py         |  35 +-
 python/cudf_polars/tests/test_config.py       | 175 ++------
 python/cudf_polars/tests/test_scan.py         |   2 +-
 python/cudf_polars/tests/test_sink.py         |   1 +
 python/cudf_polars/tests/test_tracing.py      |  14 +-
 .../tests/testing/test_engine_utils.py        |   5 -
 63 files changed, 259 insertions(+), 2181 deletions(-)
 delete mode 100644 python/cudf_polars/cudf_polars/experimental/scheduler.py

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index b4b2ecb69e0..57ccf6302c5 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -5,9 +5,11 @@ ARG PYTHON_PACKAGE_MANAGER=conda
 
 FROM ${BASE} as pip-base
 
+# libnuma-dev is required for pip devcontainers for cucascade from rapidsmpf
 RUN apt update -y \
  && DEBIAN_FRONTEND=noninteractive apt install -y \
     librdkafka-dev \
+    libnuma-dev \
  && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
 
 ENV DEFAULT_VIRTUAL_ENV=rapids
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
index 91ee7ef85f7..cc5fac22fde 100644
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -20,6 +20,7 @@ This container is a turnkey development environment for building and testing the
 By default, the following directories are bind-mounted into the devcontainer:
 
 * `${repo}:/home/coder/cudf`
+* `${repo}/../rapidsmpf:/home/coder/rapidsmpf`
 * `${repo}/../.aws:/home/coder/.aws`
 * `${repo}/../.local:/home/coder/.local`
 * `${repo}/../.cache:/home/coder/.cache`
@@ -28,6 +29,13 @@ By default, the following directories are bind-mounted into the devcontainer:
 
 This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs.
 
+The [rapidsmpf](https://github.com/rapidsai/rapidsmpf) repository is a required dependency of `cudf_polars` (that also requires `libcudf`) and must be cloned as a sibling directory to the cudf repo before launching the devcontainer:
+
+```
+# from the parent directory of your cudf clone
+git clone https://github.com/rapidsai/rapidsmpf.git
+```
+
 ## Launch a Dev Container
 
 To launch a devcontainer from VSCode, open the cuDF repo and select the "Reopen in Container" button in the bottom right:<br/><img src="https://user-images.githubusercontent.com/178183/221771999-97ab29d5-e718-4e5f-b32f-2cdd51bba25c.png"/>
diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json
index 9d672bdbde8..272007e7c95 100644
--- a/.devcontainer/cuda12.9-conda/devcontainer.json
+++ b/.devcontainer/cuda12.9-conda/devcontainer.json
@@ -49,7 +49,7 @@
   "initializeCommand": [
     "/bin/bash",
     "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.9-envs}"
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.9-envs} ${localWorkspaceFolder}/../rapidsmpf"
   ],
   "postAttachCommand": [
     "/bin/bash",
@@ -63,7 +63,8 @@
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.9-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.9-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json
index f99cc4ce5dc..5012dcfa979 100644
--- a/.devcontainer/cuda12.9-pip/devcontainer.json
+++ b/.devcontainer/cuda12.9-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.9",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:26.06-cpp-cuda12.9"
+      "BASE": "rapidsai/devcontainers:26.06-cpp-cuda12.9-ucx1.19.0-openmpi5.0.10"
     }
   },
   "runArgs": [
@@ -27,7 +27,7 @@
   "initializeCommand": [
     "/bin/bash",
     "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs}"
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs} ${localWorkspaceFolder}/../rapidsmpf"
   ],
   "postAttachCommand": [
     "/bin/bash",
@@ -40,7 +40,8 @@
     "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.devcontainer/cuda13.1-conda/devcontainer.json b/.devcontainer/cuda13.1-conda/devcontainer.json
index a73953b1989..785302c3c1d 100644
--- a/.devcontainer/cuda13.1-conda/devcontainer.json
+++ b/.devcontainer/cuda13.1-conda/devcontainer.json
@@ -49,7 +49,7 @@
   "initializeCommand": [
     "/bin/bash",
     "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.1-envs}"
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.1-envs} ${localWorkspaceFolder}/../rapidsmpf"
   ],
   "postAttachCommand": [
     "/bin/bash",
@@ -63,7 +63,8 @@
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.1-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.1-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.devcontainer/cuda13.1-pip/devcontainer.json b/.devcontainer/cuda13.1-pip/devcontainer.json
index 8596ff6b503..730b1c1e8ca 100644
--- a/.devcontainer/cuda13.1-pip/devcontainer.json
+++ b/.devcontainer/cuda13.1-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "13.1",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:26.06-cpp-cuda13.1"
+      "BASE": "rapidsai/devcontainers:26.06-cpp-cuda13.1-ucx1.19.0-openmpi5.0.10"
     }
   },
   "runArgs": [
@@ -27,7 +27,7 @@
   "initializeCommand": [
     "/bin/bash",
     "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs}"
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs} ${localWorkspaceFolder}/../rapidsmpf"
   ],
   "postAttachCommand": [
     "/bin/bash",
@@ -40,7 +40,8 @@
     "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
     "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent"
   ],
   "customizations": {
     "vscode": {
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 4e6935c3822..41a4c734f21 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -550,9 +550,16 @@ jobs:
         SCCACHE_DIST_MAX_RETRIES=inf
         SCCACHE_SERVER_LOG=sccache=debug
         SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false
+      # clone-rapidsmpf to not use the rapidsmpf wheels from cudf_polars dependency
+      # librapidsmpf-cu13 wheels brings in a hardcoded libnuma-dev cmake target: https://github.com/NVIDIA/cuCascade/issues/118
+      # -DBUILD_TESTS=OFF to match rapidsmpf https://github.com/rapidsai/rapidsmpf/blob/main/.github/workflows/pr.yaml#L351 (leads to compilation errors)
+      # -DCUDF_BUILD_TESTUTIL=OFF to avoid IMPORTED_GLOBAL promotion errors when cuCascade's find_package(cudf) loads cudf-config.cmake from a CPM subdirectory
       build_command: |
         sccache --zero-stats;
-        build-all -j0 -DBUILD_BENCHMARKS=ON --verbose 2>&1 | tee telemetry-artifacts/build.log;
+        clone-rapidsmpf -j$(nproc) -v -q --branch "$(cat ~/cudf/RAPIDS_BRANCH)" --clone-upstream --depth 1 --single-branch --shallow-submodules;
+        if [ "$PYTHON_PACKAGE_MANAGER" = "pip" ]; then rapids-make-pip-env --force; elif [ "$PYTHON_PACKAGE_MANAGER" = "conda" ]; then rapids-make-conda-env --force; fi;
+        rapids-generate-scripts;
+        build-all -j0 -DBUILD_BENCHMARKS=OFF -DBUILD_NUMA_SUPPORT=OFF -DBUILD_TESTS=OFF -DCUDF_BUILD_TESTUTIL=OFF --verbose 2>&1 | tee telemetry-artifacts/build.log;
         sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt;
   unit-tests-cudf-pandas:
     needs: [wheel-build-cudf, changed-files]
diff --git a/ci/test_cudf_polars_experimental.sh b/ci/test_cudf_polars_experimental.sh
index 02eab86c0dd..aa3abd66254 100755
--- a/ci/test_cudf_polars_experimental.sh
+++ b/ci/test_cudf_polars_experimental.sh
@@ -28,7 +28,7 @@ rapids-pip-retry install \
     -v \
     --prefer-binary \
     --constraint "${PIP_CONSTRAINT}" \
-    "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental,rapidsmpf]" \
+    "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental]" \
     "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
     "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)"
 
diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh
index 802110b18ac..4e19464a895 100755
--- a/ci/test_cudf_polars_polars_tests.sh
+++ b/ci/test_cudf_polars_polars_tests.sh
@@ -27,7 +27,7 @@ rapids-logger "Install libcudf, pylibcudf and cudf_polars"
 rapids-pip-retry install \
     -v \
     --constraint "${PIP_CONSTRAINT}" \
-    "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,rapidsmpf]" \
+    "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \
     "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
     "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)"
 
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 35939809d53..3bf006400bc 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -85,6 +85,7 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0
 - rapids-dask-dependency==26.6.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
+- rapidsmpf==26.6.*,>=0.0.0a0
 - rich
 - rmm==26.6.*,>=0.0.0a0
 - s3fs>=2022.3.0
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index d88d82e4999..d206963fc36 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -85,6 +85,7 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0
 - rapids-dask-dependency==26.6.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
+- rapidsmpf==26.6.*,>=0.0.0a0
 - rich
 - rmm==26.6.*,>=0.0.0a0
 - s3fs>=2022.3.0
diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml
index f423c19b51d..c73cea89f29 100644
--- a/conda/environments/all_cuda-131_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-131_arch-aarch64.yaml
@@ -85,6 +85,7 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0
 - rapids-dask-dependency==26.6.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
+- rapidsmpf==26.6.*,>=0.0.0a0
 - rich
 - rmm==26.6.*,>=0.0.0a0
 - s3fs>=2022.3.0
diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml
index 3c9bffa4738..f4d88866690 100644
--- a/conda/environments/all_cuda-131_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-131_arch-x86_64.yaml
@@ -85,6 +85,7 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0
 - rapids-dask-dependency==26.6.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
+- rapidsmpf==26.6.*,>=0.0.0a0
 - rich
 - rmm==26.6.*,>=0.0.0a0
 - s3fs>=2022.3.0
diff --git a/conda/recipes/cudf-polars/recipe.yaml b/conda/recipes/cudf-polars/recipe.yaml
index e3a21aa1afd..52ac74c7c8b 100644
--- a/conda/recipes/cudf-polars/recipe.yaml
+++ b/conda/recipes/cudf-polars/recipe.yaml
@@ -36,6 +36,7 @@ requirements:
     - nvidia-ml-py>=12
     - python
     - pylibcudf =${{ version }}
+    - rapidsmpf =${{ minor_version }}
     - polars>=1.30,<1.39
     - packaging
     - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
diff --git a/dependencies.yaml b/dependencies.yaml
index 9728b3ea667..b1eb276befb 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -8,6 +8,51 @@ files:
     matrix:
       cuda: ["12.9", "13.1"]
       arch: [x86_64, aarch64]
+    includes:
+      - build_base
+      - build_all
+      - build_cpp
+      - build_python_common
+      - clang
+      - cuda
+      - cuda_version
+      - cudf_polars_trace
+      - depends_on_cupy
+      - depends_on_cuda_python
+      - depends_on_dask_cuda
+      - depends_on_libkvikio
+      - depends_on_librmm
+      - depends_on_libnvcomp
+      - depends_on_numba_cuda
+      - depends_on_rapids_logger
+      - depends_on_rapidsmpf
+      - depends_on_rmm
+      - develop
+      - docs
+      - iwyu
+      - notebooks
+      - numpy_run
+      - py_version
+      - pyarrow_run
+      - rapids_build_skbuild
+      - rapids_build_setuptools
+      - run_common
+      - run_cudf
+      - run_cudf_polars
+      - run_pylibcudf
+      - run_dask_cudf
+      - run_custreamz
+      - test_cpp
+      - test_python_common
+      - test_python_cudf
+      - test_python_cudf_common
+      - test_python_pylibcudf
+      - test_python_cudf_pandas
+      - test_python_cudf_polars
+      - test_python_s3
+  devcontainers:
+    output: none
+    # Same as "all", excluding depends_on_rapidsmpf (which is built from source)
     includes:
       - build_base
       - build_all
@@ -328,6 +373,7 @@ files:
       table: project
     includes:
       - run_cudf_polars
+      - depends_on_rapidsmpf
       - depends_on_pylibcudf
       - depends_on_cuda_python
   py_run_cudf_polars_experimental:
@@ -338,15 +384,6 @@ files:
       key: experimental
     includes:
       - run_cudf_polars_experimental
-  py_run_cudf_polars_rapidsmpf:
-    output: pyproject
-    pyproject_dir: python/cudf_polars
-    extras:
-      table: project.optional-dependencies
-      key: rapidsmpf
-    includes:
-      - depends_on_rapidsmpf
-      - pyarrow_run
   py_test_cudf_polars:
     output: pyproject
     pyproject_dir: python/cudf_polars
@@ -360,7 +397,6 @@ files:
       - test_python_common
       - test_python_cudf_polars
       - cudf_polars_trace
-      - depends_on_rapidsmpf
   py_trace_cudf_polars:
     output: pyproject
     pyproject_dir: python/cudf_polars
diff --git a/docs/cudf/source/cudf_polars/api.md b/docs/cudf/source/cudf_polars/api.md
index 741b2f6f758..823954a3b08 100644
--- a/docs/cudf/source/cudf_polars/api.md
+++ b/docs/cudf/source/cudf_polars/api.md
@@ -13,7 +13,6 @@ For the most part, the public API of `cudf-polars` is the polars API.
       ExecutorType,
       InMemoryExecutor,
       ParquetOptions,
-      ShuffleMethod,
       StreamingExecutor,
       StreamingFallbackMode,
 ```
diff --git a/docs/cudf/source/cudf_polars/engine_options.md b/docs/cudf/source/cudf_polars/engine_options.md
index 67e601467d9..ba6085275b8 100644
--- a/docs/cudf/source/cudf_polars/engine_options.md
+++ b/docs/cudf/source/cudf_polars/engine_options.md
@@ -52,7 +52,7 @@ For example, the environment variable
 `max_rows_per_partition` to use if it isn't overridden through
 `executor_options`.
 
-For boolean options, like `rapidsmpf_spill`, the values `{"1", "true", "yes", "y"}`
+For boolean options, like `sink_to_directory`, the values `{"1", "true", "yes", "y"}`
 are considered `True` and `{"0", "false", "no", "n"}` are considered `False`.
 
 See [Configuration Reference](#cudf-polars-api) for a full list of options, and
diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py
index fb915784f96..acd0452ae1b 100644
--- a/python/cudf_polars/cudf_polars/callback.py
+++ b/python/cudf_polars/cudf_polars/callback.py
@@ -159,13 +159,12 @@ def set_memory_resource(
     """
     previous = rmm.mr.get_current_device_resource()
     if mr is None:
-        # Use cuda async by default with the rapidsmpf runtime.
+        # Use cuda async by default with the streaming executor.
         if (
             memory_resource_config is None
             and executor.name == "streaming"
-            and executor.runtime == "rapidsmpf"
             and (device_size := get_total_device_memory()) is not None
-        ):  # pragma: no cover; Requires rapidsmpf runtime.
+        ):  # pragma: no cover
             memory_resource_config = MemoryResourceConfig(
                 qualname="rmm.mr.CudaAsyncMemoryResource",
                 options={
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 5dd8898bde2..b21485ac41e 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -15,7 +15,6 @@
 
 from cudf_polars.dsl.expressions.aggregation import Agg
 from cudf_polars.dsl.expressions.base import (
-    AggInfo,
     Col,
     ColRef,
     ErrorExpr,
@@ -37,7 +36,6 @@
 
 __all__ = [
     "Agg",
-    "AggInfo",
     "BinOp",
     "BooleanFunction",
     "Cast",
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/base.py b/python/cudf_polars/cudf_polars/dsl/expressions/base.py
index 3336ea3fd7c..b97be71b771 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/base.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/base.py
@@ -8,7 +8,7 @@
 
 import enum
 from enum import IntEnum
-from typing import TYPE_CHECKING, Any, ClassVar, NamedTuple
+from typing import TYPE_CHECKING, Any, ClassVar
 
 import pylibcudf as plc
 
@@ -20,11 +20,7 @@
 
     from cudf_polars.containers import Column, DataFrame, DataType
 
-__all__ = ["AggInfo", "Col", "ColRef", "ExecutionContext", "Expr", "NamedExpr"]
-
-
-class AggInfo(NamedTuple):
-    requests: list[tuple[Expr | None, plc.aggregation.Aggregation, Expr]]
+__all__ = ["Col", "ColRef", "ExecutionContext", "Expr", "NamedExpr"]
 
 
 class ExecutionContext(IntEnum):
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index bee30183e1c..1c48f70bb11 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -819,11 +819,13 @@ def read_csv_header(
                 # TODO: Nested column names
                 names = chunk.column_names(include_children=False)
                 concatenated_columns = chunk.tbl.columns()
-                while reader.has_next():
+                while reader.has_next():  # pragma: no cover
                     columns = reader.read_chunk().tbl.columns()
                     # Discard columns while concatenating to reduce memory footprint.
                     # Reverse order to avoid O(n^2) list popping cost.
-                    for i in range(len(concatenated_columns) - 1, -1, -1):
+                    for i in range(  # pragma: no cover
+                        len(concatenated_columns) - 1, -1, -1
+                    ):
                         concatenated_columns[i] = plc.concatenate.concatenate(
                             [concatenated_columns[i], columns.pop()], stream=stream
                         )
@@ -840,7 +842,7 @@ def read_csv_header(
                     num_rows=num_rows,
                 )
                 if include_file_paths is not None:
-                    df = Scan.add_file_paths(
+                    df = Scan.add_file_paths(  # pragma: no cover
                         include_file_paths, paths, chunk.num_rows_per_source, df
                     )
             else:
@@ -1164,7 +1166,7 @@ def _write_parquet(
             | plc.io.parquet.ParquetWriterOptionsBuilder
         )
 
-        if (
+        if (  # pragma: no cover
             parquet_options.chunked
             and parquet_options.n_output_chunks != 1
             and df.table.num_rows() != 0
diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py
index 73ed9b3dbe1..80ff0dfacbd 100644
--- a/python/cudf_polars/cudf_polars/experimental/base.py
+++ b/python/cudf_polars/cudf_polars/experimental/base.py
@@ -11,11 +11,10 @@
 from cudf_polars.dsl.traversal import traversal
 
 if TYPE_CHECKING:
-    from collections.abc import Generator, Iterator
+    from collections.abc import Generator
 
     from cudf_polars.dsl.expr import NamedExpr
     from cudf_polars.dsl.ir import IR
-    from cudf_polars.dsl.nodebase import Node
 
 
 class PartitionInfo:
@@ -40,22 +39,12 @@ def __init__(
         self.partitioned_on = partitioned_on
         self.io_plan = io_plan
 
-    def keys(self, node: Node) -> Iterator[tuple[str, int]]:
-        """Return the partitioned keys for a given node."""
-        name = get_key_name(node)
-        yield from ((name, i) for i in range(self.count))
-
     def __rich_repr__(self) -> Generator[Any, None, None]:
         """Formatting for rich.pretty.pprint."""
         yield "count", self.count
         yield "partitioned_on", self.partitioned_on
 
 
-def get_key_name(node: Node) -> str:
-    """Generate the key name for a Node."""
-    return f"{type(node).__name__.lower()}-{hash(node)}"
-
-
 class SerializedDataSourceInfo(TypedDict):
     """The serialized form of DataSourceInfo."""
 
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py
index d514d4c44e9..74386993737 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py
@@ -636,26 +636,8 @@ def get_executor_options(
     executor_options: dict[str, Any] = (
         run_config.streaming_options.to_executor_options()
     )
-    executor_options["runtime"] = "rapidsmpf"
     executor_options["max_io_threads"] = run_config.max_io_threads
 
-    # PDSHQueries: inject unique_fraction when dynamic planning is explicitly disabled
-    if (
-        benchmark
-        and benchmark.__name__ == "PDSHQueries"
-        and run_config.executor == "streaming"
-        and run_config.streaming_options.dynamic_planning is None
-    ):
-        executor_options.setdefault(
-            "unique_fraction",
-            {
-                "c_custkey": 0.05,
-                "l_orderkey": 1.0,
-                "l_partkey": 0.1,
-                "o_custkey": 0.25,
-            },
-        )
-
     return executor_options
 
 
@@ -1110,8 +1092,7 @@ def run_polars_spmd(
     from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine
 
     executor_options = get_executor_options(run_config, benchmark=benchmark)
-    # "runtime" and "cluster" are reserved — SPMDEngine sets them
-    executor_options.pop("runtime", None)
+    # "cluster" is reserved — SPMDEngine sets it
     executor_options.pop("cluster", None)
     engine_options = {
         **run_config.streaming_options.to_engine_options(),
@@ -1168,8 +1149,7 @@ def run_polars_ray(
     from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine
 
     executor_options = get_executor_options(run_config, benchmark=benchmark)
-    # "runtime", "cluster" are reserved — RayEngine sets them
-    executor_options.pop("runtime", None)
+    # "cluster" is reserved — RayEngine sets it
     executor_options.pop("cluster", None)
     engine_options: dict[str, Any] = {
         **run_config.streaming_options.to_engine_options(),
@@ -1218,8 +1198,7 @@ def run_polars_dask(
     from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine
 
     executor_options = get_executor_options(run_config, benchmark=benchmark)
-    # "runtime", "cluster" are reserved — DaskEngine sets them
-    executor_options.pop("runtime", None)
+    # "cluster" is reserved — DaskEngine sets it
     executor_options.pop("cluster", None)
     engine_options: dict[str, Any] = {
         **run_config.streaming_options.to_engine_options(),
diff --git a/python/cudf_polars/cudf_polars/experimental/dispatch.py b/python/cudf_polars/cudf_polars/experimental/dispatch.py
index 3ac67b6af46..9ff0cc3156b 100644
--- a/python/cudf_polars/cudf_polars/experimental/dispatch.py
+++ b/python/cudf_polars/cudf_polars/experimental/dispatch.py
@@ -5,7 +5,7 @@
 from __future__ import annotations
 
 from functools import singledispatch
-from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict
+from typing import TYPE_CHECKING, TypeAlias, TypedDict
 
 from cudf_polars.typing import GenericTransformer
 
@@ -13,7 +13,7 @@
     from collections.abc import MutableMapping
 
     from cudf_polars.dsl import ir
-    from cudf_polars.dsl.ir import IR, IRExecutionContext
+    from cudf_polars.dsl.ir import IR
     from cudf_polars.experimental.base import (
         PartitionInfo,
         StatsCollector,
@@ -72,38 +72,3 @@ def lower_ir_node(
     lower_ir_graph
     """
     raise AssertionError(f"Unhandled type {type(ir)}")  # pragma: no cover
-
-
-@singledispatch
-def generate_ir_tasks(
-    ir: IR,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    """
-    Generate a task graph for evaluation of an IR node.
-
-    Parameters
-    ----------
-    ir
-        IR node to generate tasks for.
-    partition_info
-        Partitioning information, obtained from :func:`lower_ir_graph`.
-    context
-        Runtime context for IR node execution.
-
-    Returns
-    -------
-    mapping
-        A (partial) dask task graph for the evaluation of an ir node.
-
-    Notes
-    -----
-    Task generation should only produce the tasks for the current node,
-    referring to child tasks by name.
-
-    See Also
-    --------
-    task_graph
-    """
-    raise AssertionError(f"Unhandled type {type(ir)}")  # pragma: no cover
diff --git a/python/cudf_polars/cudf_polars/experimental/distinct.py b/python/cudf_polars/cudf_polars/experimental/distinct.py
index 9ae148f77d3..564fe570919 100644
--- a/python/cudf_polars/cudf_polars/experimental/distinct.py
+++ b/python/cudf_polars/cudf_polars/experimental/distinct.py
@@ -17,8 +17,6 @@
 from cudf_polars.experimental.shuffle import Shuffle
 from cudf_polars.experimental.utils import (
     _dynamic_planning_on,
-    _fallback_inform,
-    _get_unique_fractions,
     _lower_ir_fallback,
 )
 
@@ -35,8 +33,6 @@ def lower_distinct(
     child: IR,
     partition_info: MutableMapping[IR, PartitionInfo],
     config_options: ConfigOptions[StreamingExecutor],
-    *,
-    unique_fraction: float | None = None,
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
     """
     Lower a Distinct IR into partition-wise stages.
@@ -56,9 +52,6 @@ def lower_distinct(
         associated partitioning information.
     config_options
         GPUEngine configuration options.
-    unique_fraction
-        Fraction of unique values to total values. Used for algorithm selection.
-        A value of `1.0` means the column is unique.
 
     Returns
     -------
@@ -68,69 +61,24 @@ def lower_distinct(
         A mapping from unique nodes in the new graph to associated
         partitioning information.
     """
-    subset: frozenset[str] = ir.subset or frozenset(ir.schema)
-    distinct_keys = tuple(
-        NamedExpr(name, Col(ir.schema[name], name))
-        for name in ir.schema
-        if name in subset
-    )
-
     child_count = partition_info[child].count
-    shuffled = partition_info[child].partitioned_on == distinct_keys
 
-    # Check for ordering requirements (shuffle is not stable)
-    require_tree_reduction = ir.stable or ir.keep in (
-        plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
-        plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
-    )
-
-    output_count = 1
-    n_ary = 32  # Arbitrary default (for now)
+    n_ary = 32
     if ir.zlice is not None and ir.zlice[1] is not None:
-        # Head/tail slice operation has been pushed into Distinct
-        # (caller ensures only simple slices reach here)
         n_ary = max(1_000_000 // ir.zlice[1], 2)
-    elif unique_fraction is not None:
-        # Use unique_fraction to determine partitioning
-        n_ary = min(max(int(1.0 / unique_fraction), 2), child_count)
-        output_count = max(int(unique_fraction * child_count), 1)
-
-    if output_count > 1 and require_tree_reduction:
-        # Need to reduce down to a single partition even
-        # if the unique_fraction is large.
-        output_count = 1
-        _fallback_inform(
-            "Unsupported unique options for multiple partitions.",
-            config_options,
-        )
 
     # Partition-wise unique
     count = child_count
     new_node: IR = ir.reconstruct([child])
     partition_info[new_node] = PartitionInfo(count=count)
 
-    if shuffled or output_count == 1:
-        # Tree reduction
-        while count > output_count:
-            new_node = Repartition(new_node.schema, new_node)
-            count = max(math.ceil(count / n_ary), output_count)
-            partition_info[new_node] = PartitionInfo(count=count)
-            new_node = ir.reconstruct([new_node])
-            partition_info[new_node] = PartitionInfo(count=count)
-    else:
-        # Shuffle
-        new_node = Shuffle(
-            new_node.schema,
-            distinct_keys,
-            config_options.executor.shuffle_method,
-            new_node,
-        )
-        partition_info[new_node] = PartitionInfo(count=output_count)
+    # Tree reduction
+    while count > 1:
+        new_node = Repartition(new_node.schema, new_node)
+        count = max(math.ceil(count / n_ary), 1)
+        partition_info[new_node] = PartitionInfo(count=count)
         new_node = ir.reconstruct([new_node])
-        partition_info[new_node] = PartitionInfo(
-            count=output_count,
-            partitioned_on=distinct_keys,
-        )
+        partition_info[new_node] = PartitionInfo(count=count)
 
     return new_node, partition_info
 
@@ -172,7 +120,6 @@ def _(
             child = Shuffle(
                 child.schema,
                 distinct_keys,
-                config_options.executor.shuffle_method,
                 child,
             )
             partition_info[child] = PartitionInfo(
@@ -202,19 +149,9 @@ def _(
         )
         return dynamic_node, partition_info
 
-    # Non-dynamic planning: use unique_fraction heuristics
-    unique_fraction_dict = _get_unique_fractions(
-        tuple(subset),
-        config_options.executor.unique_fraction,
-    )
-    unique_fraction = (
-        max(unique_fraction_dict.values()) if unique_fraction_dict else None
-    )
-
     return lower_distinct(
         ir,
         child,
         partition_info,
         config_options,
-        unique_fraction=unique_fraction,
     )
diff --git a/python/cudf_polars/cudf_polars/experimental/explain.py b/python/cudf_polars/cudf_polars/experimental/explain.py
index 82f023b229c..d50d9fae0ae 100644
--- a/python/cudf_polars/cudf_polars/experimental/explain.py
+++ b/python/cudf_polars/cudf_polars/experimental/explain.py
@@ -288,10 +288,7 @@ def _(ir: GroupBy) -> dict[str, Serializable]:
 
 @_serialize_properties.register
 def _(ir: Shuffle) -> dict[str, Serializable]:
-    return {
-        "keys": [ne.name for ne in ir.keys],
-        "shuffle_method": ir.shuffle_method.value,
-    }
+    return {"keys": [ne.name for ne in ir.keys]}
 
 
 @_serialize_properties.register
diff --git a/python/cudf_polars/cudf_polars/experimental/expressions.py b/python/cudf_polars/cudf_polars/experimental/expressions.py
index d2a0070d009..d6df4cae8f9 100644
--- a/python/cudf_polars/cudf_polars/experimental/expressions.py
+++ b/python/cudf_polars/cudf_polars/experimental/expressions.py
@@ -41,22 +41,18 @@
 
 from cudf_polars.containers import DataType
 from cudf_polars.dsl.expressions.aggregation import Agg
-from cudf_polars.dsl.expressions.base import Col, ExecutionContext, Expr, NamedExpr
+from cudf_polars.dsl.expressions.base import Col, ExecutionContext, NamedExpr
 from cudf_polars.dsl.expressions.binaryop import BinOp
 from cudf_polars.dsl.expressions.literal import Literal
 from cudf_polars.dsl.expressions.ternary import Ternary
 from cudf_polars.dsl.expressions.unary import Cast, Len, UnaryFunction
-from cudf_polars.dsl.ir import IR, Distinct, Empty, HConcat, Select
+from cudf_polars.dsl.ir import Distinct, Empty, HConcat, Select
 from cudf_polars.dsl.traversal import (
     CachingVisitor,
 )
 from cudf_polars.experimental.base import PartitionInfo
 from cudf_polars.experimental.repartition import Repartition
-from cudf_polars.experimental.utils import (
-    _dynamic_planning_on,
-    _get_unique_fractions,
-    _leaf_column_names,
-)
+from cudf_polars.experimental.utils import _dynamic_planning_on
 
 if TYPE_CHECKING:
     from collections.abc import Generator, MutableMapping, Sequence
@@ -197,15 +193,6 @@ def _decompose_unique(
     )
     (column,) = columns
 
-    unique_fraction_dict = _get_unique_fractions(
-        _leaf_column_names(child),
-        config_options.executor.unique_fraction,
-    )
-
-    unique_fraction = (
-        max(unique_fraction_dict.values()) if unique_fraction_dict else None
-    )
-
     input_ir, partition_info = lower_distinct(
         Distinct(
             {column.name: column.dtype},
@@ -218,7 +205,6 @@ def _decompose_unique(
         input_ir,
         partition_info,
         config_options,
-        unique_fraction=unique_fraction,
     )
 
     return column, input_ir, partition_info
@@ -344,7 +330,6 @@ def _decompose_agg_node(
             input_ir = Shuffle(
                 input_ir.schema,
                 shuffle_on,
-                config_options.executor.shuffle_method,
                 input_ir,
             )
             partition_info[input_ir] = PartitionInfo(
diff --git a/python/cudf_polars/cudf_polars/experimental/groupby.py b/python/cudf_polars/cudf_polars/experimental/groupby.py
index 898dfdbf03f..6a17b56bfc5 100644
--- a/python/cudf_polars/cudf_polars/experimental/groupby.py
+++ b/python/cudf_polars/cudf_polars/experimental/groupby.py
@@ -36,7 +36,6 @@
 from cudf_polars.experimental.shuffle import Shuffle
 from cudf_polars.experimental.utils import (
     _dynamic_planning_on,
-    _get_unique_fractions,
     _lower_ir_fallback,
 )
 
@@ -390,7 +389,6 @@ def _(
 
     # Check if we are dealing with any high-cardinality columns
     post_aggregation_count = 1  # Default tree reduction
-    groupby_key_columns = [ne.name for ne in ir.keys]
     shuffled = partition_info[child].partitioned_on == ir.keys
     child_count = partition_info[child].count
 
@@ -421,7 +419,6 @@ def _(
         child = Shuffle(
             child.schema,
             ir.keys,
-            config_options.executor.shuffle_method,
             child,
         )
         partition_info[child] = PartitionInfo(
@@ -441,14 +438,6 @@ def _(
         )
         return dynamic_node, partition_info
 
-    if unique_fraction_dict := _get_unique_fractions(
-        groupby_key_columns,
-        config_options.executor.unique_fraction,
-    ):
-        # Use unique_fraction to determine output partitioning
-        unique_fraction = max(unique_fraction_dict.values())
-        post_aggregation_count = max(int(unique_fraction * child_count), 1)
-
     # Partition-wise groupby operation
     pwise_schema = {k.name: k.value.dtype for k in ir.keys} | {
         k.name: k.value.dtype for k in piecewise_exprs
@@ -465,46 +454,28 @@ def _(
     partition_info[gb_pwise] = PartitionInfo(count=child_count)
     grouped_keys = tuple(NamedExpr(k.name, Col(k.value.dtype, k.name)) for k in ir.keys)
 
-    # Reduction
-    gb_inter: GroupBy | Repartition | Shuffle
+    # N-ary tree reduction
+    gb_inter: GroupBy | Repartition
     reduction_schema = {k.name: k.value.dtype for k in grouped_keys} | {
         k.name: k.value.dtype for k in reduction_exprs
     }
-    if not shuffled and post_aggregation_count > 1:
-        # Shuffle reduction
-        if ir.maintain_order:  # pragma: no cover
-            return _lower_ir_fallback(
-                ir,
-                rec,
-                msg="maintain_order not supported for multiple output partitions.",
+    n_ary = 32
+    count = child_count
+    gb_inter = gb_pwise
+    while count > post_aggregation_count:
+        gb_inter = Repartition(gb_inter.schema, gb_inter)
+        count = max(math.ceil(count / n_ary), post_aggregation_count)
+        partition_info[gb_inter] = PartitionInfo(count=count)
+        if count > post_aggregation_count:
+            gb_inter = GroupBy(
+                reduction_schema,
+                grouped_keys,
+                reduction_exprs,
+                ir.maintain_order,
+                None,
+                gb_inter,
             )
-
-        gb_inter = Shuffle(
-            gb_pwise.schema,
-            grouped_keys,
-            config_options.executor.shuffle_method,
-            gb_pwise,
-        )
-        partition_info[gb_inter] = PartitionInfo(count=post_aggregation_count)
-    else:
-        # N-ary tree reduction
-        n_ary = config_options.executor.groupby_n_ary
-        count = child_count
-        gb_inter = gb_pwise
-        while count > post_aggregation_count:
-            gb_inter = Repartition(gb_inter.schema, gb_inter)
-            count = max(math.ceil(count / n_ary), post_aggregation_count)
             partition_info[gb_inter] = PartitionInfo(count=count)
-            if count > post_aggregation_count:
-                gb_inter = GroupBy(
-                    reduction_schema,
-                    grouped_keys,
-                    reduction_exprs,
-                    ir.maintain_order,
-                    None,
-                    gb_inter,
-                )
-                partition_info[gb_inter] = PartitionInfo(count=count)
 
     # Final aggregation
     gb_reduce = GroupBy(
diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py
index f45baa054dd..2cea0274ee6 100644
--- a/python/cudf_polars/cudf_polars/experimental/io.py
+++ b/python/cudf_polars/cudf_polars/experimental/io.py
@@ -4,13 +4,11 @@
 
 from __future__ import annotations
 
-import dataclasses
 import functools
 import itertools
 import math
 import statistics
 from collections import defaultdict
-from functools import partial
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, overload
 
@@ -24,16 +22,14 @@
     Empty,
     Scan,
     Sink,
-    Union,
 )
 from cudf_polars.experimental.base import (
     IOPartitionFlavor,
     IOPartitionPlan,
     PartitionInfo,
     SerializedDataSourceInfo,
-    get_key_name,
 )
-from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node
+from cudf_polars.experimental.dispatch import lower_ir_node
 from cudf_polars.utils.config import Cluster
 from cudf_polars.utils.cuda_stream import get_cuda_stream
 from cudf_polars.utils.versions import POLARS_VERSION_LT_137
@@ -62,36 +58,9 @@
 def _(
     ir: DataFrameScan, rec: LowerIRTransformer
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
-    config_options = rec.state["config_options"]
+    from cudf_polars.experimental.rapidsmpf.io import lower_dataframescan_rapidsmpf
 
-    # RapidsMPF runtime: Use rapidsmpf-specific lowering
-    if (
-        config_options.executor.runtime == "rapidsmpf"
-    ):  # pragma: no cover; Requires rapidsmpf runtime
-        from cudf_polars.experimental.rapidsmpf.io import lower_dataframescan_rapidsmpf
-
-        return lower_dataframescan_rapidsmpf(ir, rec)
-
-    rows_per_partition = config_options.executor.max_rows_per_partition
-    nrows = max(ir.df.shape()[0], 1)
-    count = math.ceil(nrows / rows_per_partition)
-
-    if count > 1:
-        length = math.ceil(nrows / count)
-        slices = [
-            DataFrameScan(
-                ir.schema,
-                ir.df.slice(offset, length),
-                ir.projection,
-            )
-            for offset in range(0, nrows, length)
-        ]
-        new_node = Union(ir.schema, None, *slices)
-        return new_node, {slice: PartitionInfo(count=1) for slice in slices} | {
-            new_node: PartitionInfo(count=count)
-        }
-
-    return ir, {ir: PartitionInfo(count=1)}
+    return lower_dataframescan_rapidsmpf(ir, rec)
 
 
 def scan_partition_plan(
@@ -285,84 +254,9 @@ def _(
 def _(
     ir: Scan, rec: LowerIRTransformer
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
-    partition_info: MutableMapping[IR, PartitionInfo]
-    config_options = rec.state["config_options"]
-
-    # RapidsMPF runtime: Use rapidsmpf-specific lowering
-    if (
-        config_options.executor.name == "streaming"
-        and config_options.executor.runtime == "rapidsmpf"
-    ):  # pragma: no cover; Requires rapidsmpf runtime
-        from cudf_polars.experimental.rapidsmpf.io import lower_scan_rapidsmpf
+    from cudf_polars.experimental.rapidsmpf.io import lower_scan_rapidsmpf
 
-        return lower_scan_rapidsmpf(ir, rec)
-
-    if (
-        ir.typ in ("csv", "parquet", "ndjson")
-        and ir.n_rows == -1
-        and ir.skip_rows == 0
-        and ir.row_index is None
-    ):
-        plan = scan_partition_plan(ir, rec.state["stats"], config_options)
-        paths = list(ir.paths)
-        if plan.flavor == IOPartitionFlavor.SPLIT_FILES:
-            # Disable chunked reader when splitting files
-            parquet_options = dataclasses.replace(
-                config_options.parquet_options,
-                chunked=False,
-            )
-
-            slices: list[SplitScan] = []
-            for path in paths:
-                base_scan = Scan(
-                    ir.schema,
-                    ir.typ,
-                    ir.reader_options,
-                    ir.cloud_options,
-                    [path],
-                    ir.with_columns,
-                    ir.skip_rows,
-                    ir.n_rows,
-                    ir.row_index,
-                    ir.include_file_paths,
-                    ir.predicate,
-                    parquet_options,
-                )
-                slices.extend(
-                    SplitScan(
-                        ir.schema, base_scan, sindex, plan.factor, parquet_options
-                    )
-                    for sindex in range(plan.factor)
-                )
-            new_node = Union(ir.schema, None, *slices)
-            partition_info = {slice: PartitionInfo(count=1) for slice in slices} | {
-                new_node: PartitionInfo(count=len(slices))
-            }
-        else:
-            groups: list[Scan] = [
-                Scan(
-                    ir.schema,
-                    ir.typ,
-                    ir.reader_options,
-                    ir.cloud_options,
-                    paths[i : i + plan.factor],
-                    ir.with_columns,
-                    ir.skip_rows,
-                    ir.n_rows,
-                    ir.row_index,
-                    ir.include_file_paths,
-                    ir.predicate,
-                    config_options.parquet_options,
-                )
-                for i in range(0, len(paths), plan.factor)
-            ]
-            new_node = Union(ir.schema, None, *groups)
-            partition_info = {group: PartitionInfo(count=1) for group in groups} | {
-                new_node: PartitionInfo(count=len(groups))
-            }
-        return new_node, partition_info
-
-    return ir, {ir: PartitionInfo(count=1)}  # pragma: no cover
+    return lower_scan_rapidsmpf(ir, rec)
 
 
 class StreamingSink(IR):
@@ -441,22 +335,6 @@ def _prepare_sink_directory(path: str) -> None:
     Path(path).mkdir(parents=True, exist_ok=True)
 
 
-def _sink_to_directory(
-    schema: Schema,
-    kind: str,
-    path: str,
-    parquet_options: ParquetOptions,
-    options: dict[str, Any],
-    df: DataFrame,
-    ready: None,
-    context: IRExecutionContext,
-) -> DataFrame:
-    """Sink a partition to a new file."""
-    return Sink.do_evaluate(
-        schema, kind, path, parquet_options, options, df, context=context
-    )
-
-
 def _sink_to_parquet_file(
     path: str,
     options: dict[str, Any],
@@ -545,106 +423,6 @@ def _sink_to_file(
     return True
 
 
-def _finalize_file_sink(
-    kind: str,
-    writer_state: Any,
-    df: DataFrame,
-) -> DataFrame:
-    """Finalize the file sink by closing the writer."""
-    if kind == "Parquet" and writer_state is not None:
-        writer_state.close([])
-    return df.slice((0, 0))
-
-
-def _file_sink_graph(
-    ir: StreamingSink,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    """Sink to a single file."""
-    name = get_key_name(ir)
-    count = partition_info[ir].count
-    child_name = get_key_name(ir.children[0])
-    sink = ir.sink
-    if count == 1:
-        return {
-            (name, 0): (
-                partial(sink.do_evaluate, context=context),
-                *sink._non_child_args,
-                (child_name, 0),
-            )
-        }
-
-    sink_name = get_key_name(sink)
-    graph: MutableMapping[Any, Any] = {
-        (sink_name, i): (
-            _sink_to_file,
-            sink.kind,
-            sink.path,
-            sink.options,
-            None if i == 0 else (sink_name, i - 1),  # Writer state
-            (child_name, i),
-        )
-        for i in range(count)
-    }
-
-    # Finalize task closes the writer after all chunks are written
-    graph[(sink_name, "finalize")] = (
-        _finalize_file_sink,
-        sink.kind,
-        (sink_name, count - 1),  # Writer state from last task
-        (child_name, count - 1),  # Last source df for creating empty result
-    )
-
-    # Make sure final tasks point to finalize task
-    graph.update({(name, i): (sink_name, "finalize") for i in range(count)})
-    return graph
-
-
-def _directory_sink_graph(
-    ir: StreamingSink,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    """Sink to a directory of files."""
-    name = get_key_name(ir)
-    count = partition_info[ir].count
-    child_name = get_key_name(ir.children[0])
-    sink = ir.sink
-
-    setup_name = f"setup-{name}"
-    suffix = sink.kind.lower()
-    width = math.ceil(math.log10(count))
-    graph: MutableMapping[Any, Any] = {
-        (name, i): (
-            _sink_to_directory,
-            sink.schema,
-            sink.kind,
-            f"{sink.path}/part.{str(i).zfill(width)}.{suffix}",
-            sink.parquet_options,
-            sink.options,
-            (child_name, i),
-            setup_name,
-            context,
-        )
-        for i in range(count)
-    }
-    graph[setup_name] = (_prepare_sink_directory, sink.path)
-    return graph
-
-
-@generate_ir_tasks.register(StreamingSink)
-def _(
-    ir: StreamingSink,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    if ir.sink_to_directory:
-        return _directory_sink_graph(ir, partition_info, context=context)
-    else:
-        return _file_sink_graph(ir, partition_info, context=context)
-
-
 class ParquetMetadata:
     """
     Parquet metadata container.
diff --git a/python/cudf_polars/cudf_polars/experimental/join.py b/python/cudf_polars/cudf_polars/experimental/join.py
index 47d0ad90d8e..cd5c514b45a 100644
--- a/python/cudf_polars/cudf_polars/experimental/join.py
+++ b/python/cudf_polars/cudf_polars/experimental/join.py
@@ -5,16 +5,15 @@
 from __future__ import annotations
 
 import operator
-from functools import partial, reduce
-from typing import TYPE_CHECKING, Any
+from functools import reduce
+from typing import TYPE_CHECKING
 
 from cudf_polars.dsl.ir import ConditionalJoin, Join, Slice
-from cudf_polars.experimental.base import PartitionInfo, get_key_name
-from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node
+from cudf_polars.experimental.base import PartitionInfo
+from cudf_polars.experimental.dispatch import lower_ir_node
 from cudf_polars.experimental.repartition import Repartition
-from cudf_polars.experimental.shuffle import Shuffle, _hash_partition_dataframe
+from cudf_polars.experimental.shuffle import Shuffle
 from cudf_polars.experimental.utils import (
-    _concat,
     _dynamic_planning_on,
     _fallback_inform,
     _lower_ir_fallback,
@@ -24,16 +23,14 @@
     from collections.abc import MutableMapping
 
     from cudf_polars.dsl.expr import NamedExpr
-    from cudf_polars.dsl.ir import IR, IRExecutionContext
+    from cudf_polars.dsl.ir import IR
     from cudf_polars.experimental.parallel import LowerIRTransformer
-    from cudf_polars.utils.config import ShuffleMethod
 
 
 def _maybe_shuffle_frame(
     frame: IR,
     on: tuple[NamedExpr, ...],
     partition_info: MutableMapping[IR, PartitionInfo],
-    shuffle_method: ShuffleMethod,
     output_count: int,
 ) -> IR:
     # Shuffle `frame` if it isn't already shuffled.
@@ -48,7 +45,6 @@ def _maybe_shuffle_frame(
         frame = Shuffle(
             frame.schema,
             on,
-            shuffle_method,
             frame,
         )
         partition_info[frame] = PartitionInfo(
@@ -64,21 +60,18 @@ def _make_hash_join(
     partition_info: MutableMapping[IR, PartitionInfo],
     left: IR,
     right: IR,
-    shuffle_method: ShuffleMethod,
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
     # Shuffle left and right dataframes (if necessary)
     left = _maybe_shuffle_frame(
         left,
         ir.left_on,
         partition_info,
-        shuffle_method,
         output_count,
     )
     right = _maybe_shuffle_frame(
         right,
         ir.right_on,
         partition_info,
-        shuffle_method,
         output_count,
     )
     # Always reconstruct in case children contain Cache nodes
@@ -146,45 +139,7 @@ def _make_bcast_join(
     partition_info: MutableMapping[IR, PartitionInfo],
     left: IR,
     right: IR,
-    shuffle_method: ShuffleMethod,
-    *,
-    streaming_runtime: str,
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
-    if ir.options[0] != "Inner":
-        left_count = partition_info[left].count
-        right_count = partition_info[right].count
-
-        # Shuffle the smaller table (if necessary) - Notes:
-        # - We need to shuffle the smaller table if
-        #   (1) we are not doing an "inner" join,
-        #   and (2) the small table contains multiple
-        #   partitions.
-        # - We cannot simply join a large-table partition
-        #   to each small-table partition, and then
-        #   concatenate the partial-join results, because
-        #   a non-"inner" join does NOT commute with
-        #   concatenation.
-        # - In some cases, we can perform the partial joins
-        #   sequentially. However, we are starting with a
-        #   catch-all algorithm that works for all cases.
-        if streaming_runtime == "tasks":
-            if left_count >= right_count:
-                right = _maybe_shuffle_frame(
-                    right,
-                    ir.right_on,
-                    partition_info,
-                    shuffle_method,
-                    right_count,
-                )
-            else:
-                left = _maybe_shuffle_frame(
-                    left,
-                    ir.left_on,
-                    partition_info,
-                    shuffle_method,
-                    left_count,
-                )
-
     new_node = ir.reconstruct([left, right])
     partition_info[new_node] = PartitionInfo(count=output_count)
     return new_node, partition_info
@@ -301,8 +256,6 @@ def _(
             partition_info,
             left,
             right,
-            config_options.executor.shuffle_method,
-            streaming_runtime=config_options.executor.runtime,
         )
     else:
         # Create a hash join
@@ -312,109 +265,4 @@ def _(
             partition_info,
             left,
             right,
-            config_options.executor.shuffle_method,
         )
-
-
-@generate_ir_tasks.register(Join)
-def _(
-    ir: Join,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    left, right = ir.children
-    output_count = partition_info[ir].count
-
-    left_partitioned = (
-        partition_info[left].partitioned_on == ir.left_on
-        and partition_info[left].count == output_count
-    )
-    right_partitioned = (
-        partition_info[right].partitioned_on == ir.right_on
-        and partition_info[right].count == output_count
-    )
-
-    if output_count == 1 or (left_partitioned and right_partitioned):
-        # Partition-wise join
-        left_name = get_key_name(left)
-        right_name = get_key_name(right)
-        return {
-            key: (
-                partial(ir.do_evaluate, context=context),
-                *ir._non_child_args,
-                (left_name, i),
-                (right_name, i),
-            )
-            for i, key in enumerate(partition_info[ir].keys(ir))
-        }
-    else:
-        # Broadcast join
-        left_parts = partition_info[left]
-        right_parts = partition_info[right]
-        if left_parts.count >= right_parts.count:
-            small_side = "Right"
-            small_name = get_key_name(right)
-            small_size = partition_info[right].count
-            large_name = get_key_name(left)
-            large_on = ir.left_on
-        else:
-            small_side = "Left"
-            small_name = get_key_name(left)
-            small_size = partition_info[left].count
-            large_name = get_key_name(right)
-            large_on = ir.right_on
-
-        graph: MutableMapping[Any, Any] = {}
-
-        out_name = get_key_name(ir)
-        out_size = partition_info[ir].count
-        split_name = f"split-{out_name}"
-        getit_name = f"getit-{out_name}"
-        inter_name = f"inter-{out_name}"
-
-        # Split each large partition if we have
-        # multiple small partitions (unless this
-        # is an inner join)
-        split_large = ir.options[0] != "Inner" and small_size > 1
-
-        for part_out in range(out_size):
-            if split_large:
-                graph[(split_name, part_out)] = (
-                    _hash_partition_dataframe,
-                    (large_name, part_out),
-                    part_out,
-                    small_size,
-                    None,
-                    large_on,
-                )
-
-            _concat_list = []
-            for j in range(small_size):
-                left_key: tuple[str, int] | tuple[str, int, int]
-                if split_large:
-                    left_key = (getit_name, part_out, j)
-                    graph[left_key] = (operator.getitem, (split_name, part_out), j)
-                else:
-                    left_key = (large_name, part_out)
-                join_children = [left_key, (small_name, j)]
-                if small_side == "Left":
-                    join_children.reverse()
-
-                inter_key = (inter_name, part_out, j)
-                graph[(inter_name, part_out, j)] = (
-                    partial(ir.do_evaluate, context=context),
-                    ir.left_on,
-                    ir.right_on,
-                    ir.options,
-                    *join_children,
-                )
-                _concat_list.append(inter_key)
-            if len(_concat_list) == 1:
-                graph[(out_name, part_out)] = graph.pop(_concat_list[0])
-            else:
-                graph[(out_name, part_out)] = (
-                    partial(_concat, context=context),
-                    *_concat_list,
-                )
-
-        return graph
diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py
index f77e923bce0..ab5d3b5bd90 100644
--- a/python/cudf_polars/cudf_polars/experimental/parallel.py
+++ b/python/cudf_polars/cudf_polars/experimental/parallel.py
@@ -4,10 +4,9 @@
 
 from __future__ import annotations
 
-import itertools
 import operator
 from functools import partial, reduce
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 import polars as pl
 
@@ -26,7 +25,6 @@
     Filter,
     HConcat,
     HStack,
-    IRExecutionContext,
     MapFunction,
     Projection,
     Select,
@@ -35,16 +33,11 @@
 )
 from cudf_polars.dsl.traversal import CachingVisitor, traversal
 from cudf_polars.dsl.utils.naming import unique_names
-from cudf_polars.experimental.base import PartitionInfo, get_key_name
-from cudf_polars.experimental.dispatch import (
-    generate_ir_tasks,
-    lower_ir_node,
-)
+from cudf_polars.experimental.base import PartitionInfo
+from cudf_polars.experimental.dispatch import lower_ir_node
 from cudf_polars.experimental.io import _clear_source_info_cache
 from cudf_polars.experimental.repartition import Repartition
-from cudf_polars.experimental.statistics import collect_statistics
 from cudf_polars.experimental.utils import (
-    _concat,
     _contains_over,
     _dynamic_planning_on,
     _lower_ir_fallback,
@@ -52,7 +45,6 @@
 
 if TYPE_CHECKING:
     from collections.abc import MutableMapping
-    from typing import Any
 
     from cudf_polars.experimental.base import StatsCollector
     from cudf_polars.experimental.dispatch import LowerIRTransformer, State
@@ -109,63 +101,6 @@ def lower_ir_graph(
     return mapper(ir)
 
 
-def task_graph(
-    ir: IR,
-    partition_info: MutableMapping[IR, PartitionInfo],
-) -> tuple[MutableMapping[Any, Any], str | tuple[str, int]]:
-    """
-    Construct a task graph for evaluation of an IR graph.
-
-    Parameters
-    ----------
-    ir
-        Root of the graph to rewrite.
-    partition_info
-        A mapping from all unique IR nodes to the
-        associated partitioning information.
-
-    Returns
-    -------
-    graph
-        A task graph for the entire IR graph with root `ir`,
-        in dict-of-tuples form consumed by
-        :func:`~cudf_polars.experimental.scheduler.synchronous_scheduler`.
-
-    Notes
-    -----
-    This function traverses the unique nodes of the
-    graph with root `ir`, and extracts the tasks for
-    each node with :func:`generate_ir_tasks`.
-
-    See Also
-    --------
-    generate_ir_tasks
-    """
-    context = IRExecutionContext()
-    graph = reduce(
-        operator.or_,
-        (
-            generate_ir_tasks(node, partition_info, context=context)
-            for node in traversal([ir])
-        ),
-    )
-
-    key_name = get_key_name(ir)
-    partition_count = partition_info[ir].count
-
-    key: str | tuple[str, int]
-    if partition_count > 1:
-        graph[key_name] = (
-            partial(_concat, context=context),
-            *partition_info[ir].keys(ir),
-        )
-        key = key_name
-    else:
-        key = (key_name, 0)
-
-    return graph, key
-
-
 def evaluate_rapidsmpf(
     ir: IR,
     config_options: ConfigOptions[StreamingExecutor],
@@ -211,44 +146,7 @@ def evaluate_streaming(
     # Clear source info cache in case data was overwritten
     _clear_source_info_cache()
 
-    if (
-        config_options.executor.runtime == "rapidsmpf"
-    ):  # pragma: no cover; rapidsmpf runtime not tested in CI yet
-        # Using the RapidsMPF streaming runtime.
-        return evaluate_rapidsmpf(ir, config_options)
-    else:
-        # Using the default task engine.
-        from cudf_polars.experimental.scheduler import synchronous_scheduler
-
-        stats = collect_statistics(ir, config_options)
-        ir, partition_info = lower_ir_graph(ir, config_options, stats)
-
-        graph, key = task_graph(ir, partition_info)
-
-        return synchronous_scheduler(graph, key).to_polars()
-
-
-@generate_ir_tasks.register(IR)
-def _(
-    ir: IR,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    # Generate pointwise (embarrassingly-parallel) tasks by default
-    child_names = [get_key_name(c) for c in ir.children]
-    bcast_child = [partition_info[c].count == 1 for c in ir.children]
-
-    return {
-        key: (
-            partial(ir.do_evaluate, context=context),
-            *ir._non_child_args,
-            *[
-                (child_name, 0 if bcast_child[j] else i)
-                for j, child_name in enumerate(child_names)
-            ],
-        )
-        for i, key in enumerate(partition_info[ir].keys(ir))
-    }
+    return evaluate_rapidsmpf(ir, config_options)
 
 
 @lower_ir_node.register(Union)
@@ -278,21 +176,6 @@ def _(
     return new_node, partition_info
 
 
-@generate_ir_tasks.register(Union)
-def _(
-    ir: Union,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    key_name = get_key_name(ir)
-    partition = itertools.count()
-    return {
-        (key_name, next(partition)): child_key
-        for child in ir.children
-        for child_key in partition_info[child].keys(child)
-    }
-
-
 @lower_ir_node.register(MapFunction)
 def _(
     ir: MapFunction, rec: LowerIRTransformer
diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py
index 478c0a33beb..97168f0b02d 100644
--- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py
+++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py
@@ -99,8 +99,6 @@ def evaluate_logical_plan(
     -------
     The output DataFrame and metadata collector.
     """
-    assert config_options.executor.runtime == "rapidsmpf", "Runtime must be rapidsmpf"
-
     query_id = uuid.uuid4()
 
     with cudf_polars.dsl.tracing.bound_contextvars(
@@ -202,8 +200,6 @@ def evaluate_pipeline(
     -------
     The output DataFrame and metadata collector.
     """
-    assert config_options.executor.runtime == "rapidsmpf", "Runtime must be rapidsmpf"
-
     _original_mr: Any = None
     use_stream_pool = False
     if rmpf_context is not None:
diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py
index 7bc8dabddec..26ad95198f6 100644
--- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py
+++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py
@@ -436,7 +436,7 @@ def execute_ir_on_rank(
 
 
 _RESERVED_EXECUTOR_KEYS: frozenset[str] = frozenset(
-    {"runtime", "cluster", "spmd_context", "ray_context", "dask_context"}
+    {"cluster", "spmd_context", "ray_context", "dask_context"}
 )
 _RESERVED_ENGINE_KEYS: frozenset[str] = frozenset({"memory_resource", "executor"})
 
diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py
index 49810e998fd..b4300346132 100644
--- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py
+++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py
@@ -688,7 +688,6 @@ def __init__(
             nranks=nranks,
             executor_options={
                 **executor_options,
-                "runtime": "rapidsmpf",
                 "cluster": "dask",
                 "dask_context": dask_ctx,
             },
@@ -736,7 +735,6 @@ def _reset(
             nranks=self._nranks,
             executor_options={
                 **executor_options,
-                "runtime": "rapidsmpf",
                 "cluster": "dask",
                 "dask_context": ctx,
             },
diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py
index d8464aa7426..c7650bff513 100644
--- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py
+++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py
@@ -234,11 +234,6 @@ class StreamingOptions:
         Env: ``CUDF_POLARS__EXECUTOR__DYNAMIC_PLANNING``.
         Default: enabled.
         Category: executor.
-    unique_fraction
-        Per-column uniqueness estimate (0-1). Defaults to ``1.0``.
-        Env: ``CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION``.
-        Default: ``{}``.
-        Category: executor.
     sink_to_directory
         Whether multi-partition sink operations should write to a directory
         rather than a single file. The ``spmd``/``ray``/``dask`` engines
@@ -332,9 +327,6 @@ class StreamingOptions:
     dynamic_planning: dict[str, Any] | DynamicPlanningOptions | None | Unspecified = (
         _opt("executor")
     )
-    unique_fraction: dict[str, float] | Unspecified = _opt(
-        "executor", "CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION", json.loads
-    )
     sink_to_directory: bool | Unspecified = _opt(
         "executor", "CUDF_POLARS__EXECUTOR__SINK_TO_DIRECTORY", parse_boolean
     )
@@ -515,7 +507,6 @@ def _get(attr: str) -> Any:
             broadcast_join_limit=_get("broadcast_join_limit"),
             target_partition_size=target_partition_size,
             dynamic_planning=dynamic_planning,
-            unique_fraction=_get("unique_fraction"),
             raise_on_fail=_get("raise_on_fail"),
             parquet_options=_get("parquet_options"),
             memory_resource_config=_get("memory_resource_config"),
@@ -711,15 +702,6 @@ def _add_cli_args(parser: argparse.ArgumentParser) -> None:
                 Enable dynamic planning. Use --no-dynamic-planning to disable.
                 Env: CUDF_POLARS__EXECUTOR__DYNAMIC_PLANNING. Built-in default: enabled."""),
         )
-        g.add_argument(
-            "--unique-fraction",
-            dest="unique_fraction",
-            default=None,
-            type=json.loads,
-            help=textwrap.dedent("""\
-                Per-column uniqueness estimate as a JSON object (e.g. '{"col": 0.5}').
-                Env: CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION. Built-in default: {}."""),
-        )
         g.add_argument(
             "--stream-policy",
             dest="stream_policy",
diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py
index 1ba92de3e49..efbb1db9ad4 100644
--- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py
+++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py
@@ -90,13 +90,9 @@ def evaluate_pipeline_ray_mode(
 
     Raises
     ------
-    RuntimeError
-        If the configured executor runtime is not ``"rapidsmpf"``.
     RuntimeError
         If ``config_options.executor.ray_context`` is not set.
     """
-    if config_options.executor.runtime != "rapidsmpf":
-        raise RuntimeError("Runtime must be rapidsmpf")
     if config_options.executor.ray_context is None:
         raise RuntimeError("ray_context must be set when cluster='ray'")
     rank_actors = config_options.executor.ray_context.rank_actors
@@ -586,7 +582,6 @@ def __init__(
                 nranks=nranks,
                 executor_options={
                     **executor_options,
-                    "runtime": "rapidsmpf",
                     "cluster": "ray",
                     "ray_context": RayContext(rank_actors),
                 },
@@ -641,7 +636,6 @@ def _reset(
             nranks=len(self._rank_actors),
             executor_options={
                 **executor_options,
-                "runtime": "rapidsmpf",
                 "cluster": "ray",
                 "ray_context": RayContext(self._rank_actors),
             },
diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py
index 65e3eb8b1e7..7e1bde808cd 100644
--- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py
+++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py
@@ -23,12 +23,11 @@
 from rapidsmpf.statistics import Statistics
 from rapidsmpf.streaming.core.context import Context
 
-import polars as pl
-
 import pylibcudf as plc
 import rmm.mr
 from pylibcudf.contiguous_split import pack
 
+from cudf_polars.containers import DataFrame, DataType
 from cudf_polars.experimental.rapidsmpf.collectives.common import reserve_op_id
 from cudf_polars.experimental.rapidsmpf.frontend.core import (
     ClusterInfo,
@@ -53,6 +52,8 @@
     from rapidsmpf.config import Options
     from rapidsmpf.streaming.cudf.channel_metadata import ChannelMetadata
 
+    import polars as pl
+
     from cudf_polars.dsl.ir import IR
     from cudf_polars.experimental.parallel import ConfigOptions
     from cudf_polars.experimental.rapidsmpf.frontend.core import T
@@ -98,8 +99,6 @@ def evaluate_pipeline_spmd_mode(
     The concatenated output DataFrame and, if ``collect_metadata`` is
     True, the list of channel metadata objects; otherwise ``None``.
     """
-    if config_options.executor.runtime != "rapidsmpf":
-        raise RuntimeError("Runtime must be rapidsmpf")
     if config_options.executor.spmd_context is None:
         raise RuntimeError("spmd_context must be set for SPMD mode")
     comm = config_options.executor.spmd_context.comm
@@ -155,8 +154,9 @@ def allgather_polars_dataframe(
     ctx = engine.context
     stream = ctx.get_stream_from_pool()
     col_names = local_df.columns
+    dtypes = [DataType(dtype) for dtype in local_df.dtypes]
 
-    plc_table = plc.Table.from_arrow(local_df.to_arrow())
+    plc_table = plc.Table.from_arrow(local_df, stream=stream)
 
     packed_data = PackedData.from_cudf_packed_columns(
         pack(plc_table, stream),
@@ -176,9 +176,12 @@ def allgather_polars_dataframe(
     plc_result = unpack_and_concat(results, stream, ctx.br())
 
     # pylibcudf Table -> pl.DataFrame (restore column names)
-    ret = pl.from_arrow(plc_result.to_arrow(col_names))
-    assert isinstance(ret, pl.DataFrame)
-    return ret
+    return DataFrame.from_table(
+        plc_result,
+        col_names,
+        dtypes,
+        stream,
+    ).to_polars()
 
 
 class SPMDEngine(StreamingEngine):
@@ -389,7 +392,6 @@ def __init__(
                 nranks=comm.nranks,
                 executor_options={
                     **executor_options,
-                    "runtime": "rapidsmpf",
                     "cluster": "spmd",
                     "spmd_context": SPMDContext(
                         comm=comm, context=ctx, py_executor=self._py_executor
@@ -494,7 +496,6 @@ def _reset(
             nranks=self._comm.nranks,
             executor_options={
                 **executor_options,
-                "runtime": "rapidsmpf",
                 "cluster": "spmd",
                 "spmd_context": SPMDContext(
                     comm=self._comm,
diff --git a/python/cudf_polars/cudf_polars/experimental/repartition.py b/python/cudf_polars/cudf_polars/experimental/repartition.py
index 92d89a5f44c..84c39d930ca 100644
--- a/python/cudf_polars/cudf_polars/experimental/repartition.py
+++ b/python/cudf_polars/cudf_polars/experimental/repartition.py
@@ -4,20 +4,11 @@
 
 from __future__ import annotations
 
-import itertools
-from functools import partial
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 from cudf_polars.dsl.ir import IR
-from cudf_polars.experimental.base import get_key_name
-from cudf_polars.experimental.dispatch import generate_ir_tasks
-from cudf_polars.experimental.utils import _concat
 
 if TYPE_CHECKING:
-    from collections.abc import MutableMapping
-
-    from cudf_polars.dsl.ir import IRExecutionContext
-    from cudf_polars.experimental.parallel import PartitionInfo
     from cudf_polars.typing import Schema
 
 
@@ -43,35 +34,3 @@ def __init__(self, schema: Schema, df: IR):
         self.schema = schema
         self._non_child_args = ()
         self.children = (df,)
-
-
-@generate_ir_tasks.register(Repartition)
-def _(
-    ir: Repartition,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    # Repartition an IR node.
-    # Only supports rapartitioning to fewer (for now).
-
-    (child,) = ir.children
-    count_in = partition_info[child].count
-    count_out = partition_info[ir].count
-
-    if count_out > count_in:  # pragma: no cover
-        raise NotImplementedError(
-            f"Repartition {count_in} -> {count_out} not supported."
-        )
-
-    key_name = get_key_name(ir)
-    n, remainder = divmod(count_in, count_out)
-    # Spread remainder evenly over the partitions.
-    offsets = [0, *itertools.accumulate(n + (i < remainder) for i in range(count_out))]
-    child_keys = tuple(partition_info[child].keys(child))
-    return {
-        (key_name, i): (
-            partial(_concat, context=context),
-            *child_keys[offsets[i] : offsets[i + 1]],
-        )
-        for i in range(count_out)
-    }
diff --git a/python/cudf_polars/cudf_polars/experimental/scheduler.py b/python/cudf_polars/cudf_polars/experimental/scheduler.py
deleted file mode 100644
index 97eae6ab378..00000000000
--- a/python/cudf_polars/cudf_polars/experimental/scheduler.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
-# SPDX-License-Identifier: Apache-2.0
-"""Synchronous task scheduler."""
-
-from __future__ import annotations
-
-from collections import Counter
-from collections.abc import MutableMapping
-from itertools import chain
-from typing import TYPE_CHECKING, Any, TypeVar, Unpack
-
-if TYPE_CHECKING:
-    from collections.abc import Mapping
-    from typing import TypeAlias
-
-
-Key: TypeAlias = str | tuple[str, Unpack[tuple[int, ...]]]
-Graph: TypeAlias = MutableMapping[Key, Any]
-T_ = TypeVar("T_")
-
-
-# NOTE: This is a slimmed-down version of the single-threaded
-# (synchronous) scheduler in `dask.core`.
-#
-# Key Differences:
-# * We do not allow a task to contain a list of key names.
-#   Keys must be distinct elements of the task.
-# * We do not support nested tasks.
-
-
-def istask(x: Any) -> bool:
-    """Check if x is a callable task."""
-    return isinstance(x, tuple) and bool(x) and callable(x[0])
-
-
-def is_hashable(x: Any) -> bool:
-    """Check if x is hashable."""
-    try:
-        hash(x)
-    except BaseException:
-        return False
-    else:
-        return True
-
-
-def _execute_task(arg: Any, cache: Mapping) -> Any:
-    """Execute a compute task."""
-    if istask(arg):
-        return arg[0](*(_execute_task(a, cache) for a in arg[1:]))
-    elif is_hashable(arg):
-        return cache.get(arg, arg)
-    else:
-        return arg
-
-
-def required_keys(key: Key, graph: Graph) -> list[Key]:
-    """
-    Return the dependencies to extract a key from the graph.
-
-    Parameters
-    ----------
-    key
-        Root key we want to extract.
-    graph
-        The full task graph.
-
-    Returns
-    -------
-    List of other keys needed to extract ``key``.
-    """
-    maybe_task = graph[key]
-    return [
-        k
-        for k in (
-            maybe_task[1:]
-            if istask(maybe_task)
-            else [maybe_task]  # maybe_task might be a key
-        )
-        if is_hashable(k) and k in graph
-    ]
-
-
-def toposort(graph: Graph, dependencies: Mapping[Key, list[Key]]) -> list[Key]:
-    """Return a list of task keys sorted in topological order."""
-    # Stack-based depth-first search traversal. This is based on Tarjan's
-    # algorithm for strongly-connected components
-    # (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
-    ordered: list[Key] = []
-    completed: set[Key] = set()
-
-    for key in graph:
-        if key in completed:
-            continue
-        nodes = [key]
-        while nodes:
-            # Keep current node on the stack until all descendants are visited
-            current = nodes[-1]
-            if current in completed:  # pragma: no cover
-                # Already fully traversed descendants of current
-                nodes.pop()
-                continue
-
-            # Add direct descendants of current to nodes stack
-            next_nodes = set(dependencies[current]) - completed
-            if next_nodes:
-                nodes.extend(next_nodes)
-            else:
-                # Current has no more descendants to explore
-                ordered.append(current)
-                completed.add(current)
-                nodes.pop()
-
-    return ordered
-
-
-def synchronous_scheduler(
-    graph: Graph,
-    key: Key,
-    *,
-    cache: MutableMapping | None = None,
-) -> Any:
-    """
-    Execute the task graph for a given key.
-
-    Parameters
-    ----------
-    graph
-        The task graph to execute.
-    key
-        The final output key to extract from the graph.
-    cache
-        Intermediate-data cache.
-
-    Returns
-    -------
-    Executed task-graph result for ``key``.
-    """
-    if key not in graph:  # pragma: no cover
-        raise KeyError(f"{key} is not a key in the graph")
-    if cache is None:
-        cache = {}
-
-    dependencies = {k: required_keys(k, graph) for k in graph}
-    refcount = Counter(chain.from_iterable(dependencies.values()))
-
-    for k in toposort(graph, dependencies):
-        cache[k] = _execute_task(graph[k], cache)
-        for dep in dependencies[k]:
-            refcount[dep] -= 1
-            if refcount[dep] == 0 and dep != key:
-                del cache[dep]
-
-    return cache[key]
diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py
index 8e24dd83fe6..9381126775f 100644
--- a/python/cudf_polars/cudf_polars/experimental/shuffle.py
+++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py
@@ -4,112 +4,22 @@
 
 from __future__ import annotations
 
-import operator
-from functools import partial
-from typing import TYPE_CHECKING, Any, Concatenate, TypeVar, TypedDict
+from typing import TYPE_CHECKING
 
-import pylibcudf as plc
-from rmm.pylibrmm.stream import DEFAULT_STREAM
-
-from cudf_polars.containers import DataFrame
-from cudf_polars.dsl.expr import Col
 from cudf_polars.dsl.ir import IR
-from cudf_polars.dsl.tracing import log_do_evaluate, nvtx_annotate_cudf_polars
-from cudf_polars.experimental.base import get_key_name
-from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node
-from cudf_polars.experimental.utils import _concat, _dynamic_planning_on
-from cudf_polars.utils.cuda_stream import get_dask_cuda_stream
+from cudf_polars.dsl.tracing import log_do_evaluate
+from cudf_polars.experimental.dispatch import lower_ir_node
+from cudf_polars.experimental.utils import _dynamic_planning_on
 
 if TYPE_CHECKING:
-    from collections.abc import Callable, MutableMapping, Sequence
+    from collections.abc import MutableMapping
 
-    from cudf_polars.containers import DataType
+    from cudf_polars.containers import DataFrame
     from cudf_polars.dsl.expr import NamedExpr
     from cudf_polars.dsl.ir import IRExecutionContext
     from cudf_polars.experimental.dispatch import LowerIRTransformer
     from cudf_polars.experimental.parallel import PartitionInfo
     from cudf_polars.typing import Schema
-    from cudf_polars.utils.config import ShuffleMethod
-
-
-# Supported shuffle methods
-_SHUFFLE_METHODS = ("rapidsmpf", "tasks")
-
-
-class ShuffleOptions(TypedDict):
-    """RapidsMPF shuffling options."""
-
-    on: Sequence[str]
-    column_names: Sequence[str]
-    dtypes: Sequence[DataType]
-
-
-# Experimental rapidsmpf shuffler integration
-class RMPFIntegration:  # pragma: no cover
-    """cuDF-Polars protocol for rapidsmpf shuffler."""
-
-    @staticmethod
-    @nvtx_annotate_cudf_polars(message="RMPFIntegration.insert_partition")
-    def insert_partition(
-        df: DataFrame,
-        partition_id: int,  # Not currently used
-        partition_count: int,
-        shuffler: Any,
-        options: ShuffleOptions,
-        *other: Any,
-    ) -> None:
-        """Add cudf-polars DataFrame chunks to an RMP shuffler."""
-        from rapidsmpf.integrations.cudf.partition import partition_and_pack
-        from rapidsmpf.integrations.single import get_worker_context
-
-        context = get_worker_context()
-
-        on = options["on"]
-        assert not other, f"Unexpected arguments: {other}"
-        columns_to_hash = tuple(df.column_names.index(val) for val in on)
-        packed_inputs = partition_and_pack(
-            df.table,
-            columns_to_hash=columns_to_hash,
-            num_partitions=partition_count,
-            br=context.br,
-            stream=DEFAULT_STREAM,
-        )
-
-        shuffler.insert_chunks(packed_inputs)
-
-    @staticmethod
-    @nvtx_annotate_cudf_polars(message="RMPFIntegration.extract_partition")
-    def extract_partition(
-        partition_id: int,
-        shuffler: Any,
-        options: ShuffleOptions,
-    ) -> DataFrame:
-        """Extract a finished partition from the RMP shuffler."""
-        from rapidsmpf.integrations.cudf.partition import (
-            unpack_and_concat,
-            unspill_partitions,
-        )
-        from rapidsmpf.integrations.single import get_worker_context
-
-        context = get_worker_context()
-
-        shuffler.wait()
-        column_names = options["column_names"]
-        dtypes = options["dtypes"]
-        return DataFrame.from_table(
-            unpack_and_concat(
-                unspill_partitions(
-                    shuffler.extract(partition_id),
-                    br=context.br,
-                    allow_overbooking=True,
-                ),
-                br=context.br,
-                stream=DEFAULT_STREAM,
-            ),
-            column_names,
-            dtypes,
-            get_dask_cuda_stream(),
-        )
 
 
 class Shuffle(IR):
@@ -118,29 +28,27 @@ class Shuffle(IR):
 
     Notes
     -----
-    Only hash-based partitioning is supported (for now).  See
-    `ShuffleSorted` for sorting-based shuffling.
+    Only hash-based partitioning is supported (for now).
     """
 
-    __slots__ = ("keys", "shuffle_method")
-    _non_child = ("schema", "keys", "shuffle_method")
-    _n_non_child_args = 3
+    __slots__ = ("keys",)
+    _non_child = (
+        "schema",
+        "keys",
+    )
+    _n_non_child_args = 2
     keys: tuple[NamedExpr, ...]
     """Keys to shuffle on."""
-    shuffle_method: ShuffleMethod
-    """Shuffle method to use."""
 
     def __init__(
         self,
         schema: Schema,
         keys: tuple[NamedExpr, ...],
-        shuffle_method: ShuffleMethod,
         df: IR,
     ):
         self.schema = schema
         self.keys = keys
-        self.shuffle_method = shuffle_method
-        self._non_child_args = (schema, keys, shuffle_method)
+        self._non_child_args = (schema, keys)
         self.children = (df,)
 
     # the type-ignore is for
@@ -153,7 +61,6 @@ def do_evaluate(
         cls,
         schema: Schema,
         keys: tuple[NamedExpr, ...],
-        shuffle_method: ShuffleMethod,
         df: DataFrame,
         *,
         context: IRExecutionContext,
@@ -163,120 +70,6 @@ def do_evaluate(
         return df
 
 
-@nvtx_annotate_cudf_polars(message="Shuffle")
-def _hash_partition_dataframe(
-    df: DataFrame,
-    partition_id: int,  # Used only by sorted shuffling
-    partition_count: int,
-    options: MutableMapping[str, Any] | None,  # No options required
-    on: tuple[NamedExpr, ...],
-) -> dict[int, DataFrame]:
-    """
-    Partition an input DataFrame for hash-based shuffling.
-
-    Parameters
-    ----------
-    df
-        DataFrame to partition.
-    partition_id
-        Partition index (unused for hash partitioning).
-    partition_count
-        Total number of output partitions.
-    options
-        Options (unused for hash partitioning).
-    on
-        Expressions used for the hash partitioning.
-
-    Returns
-    -------
-    A dictionary mapping between int partition indices and
-    DataFrame fragments.
-    """
-    assert not options, f"Expected no options, got: {options}"
-
-    if df.num_rows == 0:
-        # Fast path for empty DataFrame
-        return dict.fromkeys(range(partition_count), df)
-
-    # Hash the specified keys to calculate the output
-    # partition for each row
-    partition_map = plc.binaryop.binary_operation(
-        plc.hashing.murmurhash3_x86_32(
-            DataFrame([expr.evaluate(df) for expr in on], stream=df.stream).table,
-            stream=df.stream,
-        ),
-        plc.Scalar.from_py(
-            partition_count, plc.DataType(plc.TypeId.UINT32), stream=df.stream
-        ),
-        plc.binaryop.BinaryOperator.PYMOD,
-        plc.types.DataType(plc.types.TypeId.UINT32),
-        stream=df.stream,
-    )
-
-    # Apply partitioning
-    t, offsets = plc.partitioning.partition(
-        df.table,
-        partition_map,
-        partition_count,
-        stream=df.stream,
-    )
-    splits = offsets[1:-1]
-
-    # Split and return the partitioned result
-    return {
-        i: DataFrame.from_table(
-            split,
-            df.column_names,
-            df.dtypes,
-            df.stream,
-        )
-        for i, split in enumerate(plc.copying.split(t, splits, stream=df.stream))
-    }
-
-
-# When dropping Python 3.10, can use _simple_shuffle_graph[OPT_T](...)
-OPT_T = TypeVar("OPT_T")
-
-
-def _simple_shuffle_graph(
-    name_in: str,
-    name_out: str,
-    count_in: int,
-    count_out: int,
-    _partition_dataframe_func: Callable[
-        Concatenate[DataFrame, int, int, OPT_T, ...],
-        MutableMapping[int, DataFrame],
-    ],
-    options: OPT_T,
-    *other: Any,
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    """Make a simple all-to-all shuffle graph."""
-    split_name = f"split-{name_out}"
-    inter_name = f"inter-{name_out}"
-
-    graph: MutableMapping[Any, Any] = {}
-    for part_out in range(count_out):
-        _concat_list = []
-        for part_in in range(count_in):
-            graph[(split_name, part_in)] = (
-                _partition_dataframe_func,
-                (name_in, part_in),
-                part_in,
-                count_out,
-                options,
-                *other,
-            )
-            _concat_list.append((inter_name, part_out, part_in))
-            graph[_concat_list[-1]] = (
-                operator.getitem,
-                (split_name, part_in),
-                part_out,
-            )
-        graph[(name_out, part_out)] = (partial(_concat, context=context), *_concat_list)
-    return graph
-
-
 @lower_ir_node.register(Shuffle)
 def _(
     ir: Shuffle, rec: LowerIRTransformer
@@ -306,47 +99,3 @@ def _(
         partitioned_on=ir.keys,
     )
     return new_node, pi
-
-
-@generate_ir_tasks.register(Shuffle)
-def _(
-    ir: Shuffle,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    # Extract "shuffle_method" configuration
-    shuffle_method = ir.shuffle_method
-
-    # Try using rapidsmpf shuffler if we have "simple" shuffle
-    # keys, and the "shuffle_method" config is set to "rapidsmpf-single".
-    _keys: list[Col]
-    if shuffle_method == "rapidsmpf-single" and len(
-        _keys := [ne.value for ne in ir.keys if isinstance(ne.value, Col)]
-    ) == len(ir.keys):  # pragma: no cover
-        from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph
-
-        shuffle_on = [k.name for k in _keys]
-
-        return rapidsmpf_shuffle_graph(
-            get_key_name(ir.children[0]),
-            get_key_name(ir),
-            partition_info[ir.children[0]].count,
-            partition_info[ir].count,
-            RMPFIntegration,
-            {
-                "on": shuffle_on,
-                "column_names": list(ir.schema.keys()),
-                "dtypes": list(ir.schema.values()),
-            },
-        )
-
-    # Simple task-based fall-back
-    return partial(_simple_shuffle_graph, context=context)(
-        get_key_name(ir.children[0]),
-        get_key_name(ir),
-        partition_info[ir.children[0]].count,
-        partition_info[ir].count,
-        _hash_partition_dataframe,
-        None,
-        ir.keys,
-    )
diff --git a/python/cudf_polars/cudf_polars/experimental/sort.py b/python/cudf_polars/cudf_polars/experimental/sort.py
index 6800fb4ab74..fa610324c2d 100644
--- a/python/cudf_polars/cudf_polars/experimental/sort.py
+++ b/python/cudf_polars/cudf_polars/experimental/sort.py
@@ -4,47 +4,30 @@
 
 from __future__ import annotations
 
-from functools import partial
-from typing import TYPE_CHECKING, Any, TypedDict
+from typing import TYPE_CHECKING
 
 import polars as pl
 
 import pylibcudf as plc
-from rmm.pylibrmm.stream import DEFAULT_STREAM
 
 from cudf_polars.containers import Column, DataFrame, DataType
 from cudf_polars.dsl.expr import Col
-from cudf_polars.dsl.ir import IR, Slice, Sort
+from cudf_polars.dsl.ir import Slice, Sort
 from cudf_polars.dsl.traversal import traversal
 from cudf_polars.dsl.utils.naming import unique_names
-from cudf_polars.experimental.base import PartitionInfo, get_key_name
-from cudf_polars.experimental.dispatch import (
-    generate_ir_tasks,
-    lower_ir_node,
-)
-from cudf_polars.experimental.repartition import Repartition
-from cudf_polars.experimental.shuffle import _simple_shuffle_graph
+from cudf_polars.experimental.dispatch import lower_ir_node
 from cudf_polars.experimental.utils import (
-    _concat,
-    _fallback_inform,
     _lower_ir_fallback,
 )
-from cudf_polars.utils.config import ShuffleMethod
-from cudf_polars.utils.cuda_stream import (
-    get_dask_cuda_stream,
-    get_joined_cuda_stream,
-    join_cuda_streams,
-)
 
 if TYPE_CHECKING:
     from collections.abc import MutableMapping, Sequence
 
     from rmm.pylibrmm.stream import Stream
 
-    from cudf_polars.dsl.expr import NamedExpr
-    from cudf_polars.dsl.ir import IRExecutionContext
+    from cudf_polars.dsl.ir import IR
+    from cudf_polars.experimental.base import PartitionInfo
     from cudf_polars.experimental.dispatch import LowerIRTransformer
-    from cudf_polars.typing import Schema
 
 
 def find_sort_splits(
@@ -251,248 +234,6 @@ def _get_final_sort_boundaries(
     )
 
 
-def _sort_boundaries_graph(
-    name_in: str,
-    by: Sequence[str],
-    column_order: Sequence[plc.types.Order],
-    null_order: Sequence[plc.types.NullOrder],
-    count: int,
-    context: IRExecutionContext,
-) -> tuple[str, MutableMapping[Any, Any]]:
-    """Graph to get the boundaries from all partitions."""
-    local_boundaries_name = f"sort-boundaries_local-{name_in}"
-    concat_boundaries_name = f"sort-boundaries-concat-{name_in}"
-    global_boundaries_name = f"sort-boundaries-{name_in}"
-    graph: MutableMapping[Any, Any] = {}
-
-    _concat_list = []
-    for part_id in range(count):
-        graph[(local_boundaries_name, part_id)] = (
-            _select_local_split_candidates,
-            (name_in, part_id),
-            by,
-            count,
-            part_id,
-        )
-        _concat_list.append((local_boundaries_name, part_id))
-
-    graph[concat_boundaries_name] = (partial(_concat, context=context), *_concat_list)
-    graph[global_boundaries_name] = (
-        _get_final_sort_boundaries,
-        concat_boundaries_name,
-        column_order,
-        null_order,
-        count,
-    )
-    return global_boundaries_name, graph
-
-
-class SortedShuffleOptions(TypedDict):
-    """RapidsMPF shuffling options."""
-
-    by: Sequence[str]
-    order: Sequence[plc.types.Order]
-    null_order: Sequence[plc.types.NullOrder]
-    column_names: Sequence[str]
-    column_dtypes: Sequence[DataType]
-
-
-# Experimental rapidsmpf shuffler integration
-class RMPFIntegrationSortedShuffle:  # pragma: no cover
-    """cuDF-Polars protocol for rapidsmpf shuffler."""
-
-    @staticmethod
-    def insert_partition(
-        df: DataFrame,
-        partition_id: int,
-        partition_count: int,
-        shuffler: Any,
-        options: SortedShuffleOptions,
-        sort_boundaries: DataFrame,
-    ) -> None:
-        """Add cudf-polars DataFrame chunks to an RMP shuffler."""
-        from rapidsmpf.integrations.cudf.partition import split_and_pack
-        from rapidsmpf.integrations.single import get_worker_context
-
-        context = get_worker_context()
-
-        by = options["by"]
-        data_streams = [
-            df.stream,
-            sort_boundaries.stream,
-        ]
-        stream = get_joined_cuda_stream(get_dask_cuda_stream, upstreams=data_streams)
-
-        splits = find_sort_splits(
-            df.select(by).table,
-            sort_boundaries.table,
-            partition_id,
-            options["order"],
-            options["null_order"],
-            stream=stream,
-        )
-        packed_inputs = split_and_pack(
-            df.table,
-            splits=splits,
-            br=context.br,
-            stream=stream,
-        )
-        # TODO: figure out handoff with rapidsmpf
-        # https://github.com/rapidsai/cudf/issues/20337
-        shuffler.insert_chunks(packed_inputs)
-
-        join_cuda_streams(downstreams=data_streams, upstreams=[stream])
-
-    @staticmethod
-    def extract_partition(
-        partition_id: int,
-        shuffler: Any,
-        options: SortedShuffleOptions,
-    ) -> DataFrame:
-        """Extract a finished partition from the RMP shuffler."""
-        from rapidsmpf.integrations.cudf.partition import (
-            unpack_and_concat,
-            unspill_partitions,
-        )
-        from rapidsmpf.integrations.single import get_worker_context
-
-        context = get_worker_context()
-
-        shuffler.wait()
-        column_names = options["column_names"]
-        column_dtypes = options["column_dtypes"]
-
-        stream = DEFAULT_STREAM
-
-        # TODO: When sorting, this step should finalize with a merge (unless we
-        # require stability, as cudf merge is not stable).
-        # TODO: figure out handoff with rapidsmpf
-        # https://github.com/rapidsai/cudf/issues/20337
-        return DataFrame.from_table(
-            unpack_and_concat(
-                unspill_partitions(
-                    shuffler.extract(partition_id),
-                    br=context.br,
-                    allow_overbooking=True,
-                ),
-                br=context.br,
-                stream=stream,
-            ),
-            column_names,
-            column_dtypes,
-            stream=stream,
-        )
-
-
-def _sort_partition_dataframe(
-    df: DataFrame,
-    partition_id: int,  # Not currently used
-    partition_count: int,
-    options: MutableMapping[str, Any],
-    sort_boundaries: DataFrame,
-) -> MutableMapping[int, DataFrame]:
-    """
-    Partition a sorted DataFrame for shuffling.
-
-    Parameters
-    ----------
-    df
-        The DataFrame to partition.
-    partition_id
-        The partition id of the current partition.
-    partition_count
-        The total number of partitions.
-    options
-        The sort options ``(by, order, null_order)``.
-    sort_boundaries
-        The global sort boundary candidates used to decide where to split.
-    """
-    if df.num_rows == 0:  # pragma: no cover
-        # Fast path for empty DataFrame
-        return dict.fromkeys(range(partition_count), df)
-
-    stream = get_joined_cuda_stream(
-        get_dask_cuda_stream, upstreams=(df.stream, sort_boundaries.stream)
-    )
-
-    splits = find_sort_splits(
-        df.select(options["by"]).table,
-        sort_boundaries.table,
-        partition_id,
-        options["order"],
-        options["null_order"],
-        stream=stream,
-    )
-
-    # Split and return the partitioned result
-    return {
-        i: DataFrame.from_table(
-            split,
-            df.column_names,
-            df.dtypes,
-            stream=df.stream,
-        )
-        for i, split in enumerate(plc.copying.split(df.table, splits, stream=stream))
-    }
-
-
-class ShuffleSorted(IR):
-    """
-    Shuffle already locally sorted multi-partition data.
-
-    Shuffling is performed by extracting sort boundary candidates from all partitions,
-    sharing them all-to-all and then exchanging data accordingly.
-    The sorting information is required to be passed in identically to the already
-    performed local sort and as of now the final result needs to be sorted again to
-    merge the partitions.
-    """
-
-    __slots__ = ("by", "null_order", "order", "shuffle_method")
-    _non_child = ("schema", "by", "order", "null_order", "shuffle_method")
-    _n_non_child_args = 5
-    by: tuple[NamedExpr, ...]
-    """Keys by which the data was sorted."""
-    order: tuple[plc.types.Order, ...]
-    """Sort order if sorted."""
-    null_order: tuple[plc.types.NullOrder, ...]
-    """Null precedence if sorted."""
-    shuffle_method: ShuffleMethod
-    """Shuffle method to use."""
-
-    def __init__(
-        self,
-        schema: Schema,
-        by: tuple[NamedExpr, ...],
-        order: tuple[plc.types.Order, ...],
-        null_order: tuple[plc.types.NullOrder, ...],
-        shuffle_method: ShuffleMethod,
-        df: IR,
-    ):
-        self.schema = schema
-        self.by = by
-        self.order = order
-        self.null_order = null_order
-        self.shuffle_method = shuffle_method
-        self._non_child_args = (schema, by, order, null_order, shuffle_method)
-        self.children = (df,)
-
-    @classmethod
-    def do_evaluate(
-        cls,
-        schema: Schema,
-        by: tuple[NamedExpr, ...],
-        order: tuple[plc.types.Order, ...],
-        null_order: tuple[plc.types.NullOrder, ...],
-        shuffle_method: ShuffleMethod,
-        df: DataFrame,
-        *,
-        context: IRExecutionContext,
-    ) -> DataFrame:  # pragma: no cover
-        """Evaluate and return a dataframe."""
-        # Single-partition ShuffleSorted evaluation is a no-op
-        return df
-
-
 def _has_simple_zlice(zlice: tuple[int, int | None] | None) -> bool:
     """Check if a zlice is a simple top-k/bottom-k operation."""
     if zlice is None:
@@ -517,26 +258,7 @@ def _(
             msg="sort currently only supports column names as `by` keys.",
         )
 
-    config_options = rec.state["config_options"]
-    executor = config_options.executor
-    runtime = executor.runtime
-
-    # Special handling for slicing
-    # (May be a top- or bottom-k operation)
-    simple_zlice = _has_simple_zlice(ir.zlice)
-    if simple_zlice and runtime == "tasks":
-        from cudf_polars.experimental.parallel import _lower_ir_pwise
-
-        new_node, partition_info = _lower_ir_pwise(ir, rec)
-        if partition_info[new_node].count > 1:
-            # Collapse down to single partition
-            inter = Repartition(new_node.schema, new_node)
-            partition_info[inter] = PartitionInfo(count=1)
-            # Sort reduced partition
-            new_node = ir.reconstruct([inter])
-            partition_info[new_node] = PartitionInfo(count=1)
-        return new_node, partition_info
-    elif ir.zlice is not None and not simple_zlice:
+    if ir.zlice is not None and not _has_simple_zlice(ir.zlice):
         # Pull "complex" slices out of the Sort node altogether.
         return rec(
             Slice(
@@ -557,112 +279,6 @@ def _(
     # Extract child partitioning
     child, partition_info = rec(ir.children[0])
 
-    # The "rapidsmpf" runtime uses the sort_actor to handle everything else
-    if runtime == "rapidsmpf":
-        sort_node = ir.reconstruct([child])
-        partition_info[sort_node] = partition_info[child]
-        return sort_node, partition_info
-
-    # TODO: Remove everything below here when "tasks" is removed.
-
-    # Avoid rapidsmpf shuffle with maintain_order=True (for now)
-    shuffle_method = (
-        ShuffleMethod("tasks") if ir.stable else config_options.executor.shuffle_method
-    )
-    if (
-        shuffle_method != config_options.executor.shuffle_method
-    ):  # pragma: no cover; Requires rapidsmpf
-        _fallback_inform(
-            f"shuffle_method={config_options.executor.shuffle_method} does not support maintain_order=True. "
-            f"Falling back to shuffle_method={shuffle_method}.",
-            config_options,
-        )
-
-    if partition_info[child].count == 1:
-        single_part_node = ir.reconstruct([child])
-        partition_info[single_part_node] = partition_info[child]
-        return single_part_node, partition_info
-
-    local_sort_node = ir.reconstruct([child])
-    partition_info[local_sort_node] = partition_info[child]
-
-    shuffle = ShuffleSorted(
-        ir.schema,
-        ir.by,
-        ir.order,
-        ir.null_order,
-        shuffle_method,
-        local_sort_node,
-    )
-    partition_info[shuffle] = partition_info[child]
-
-    # We sort again locally.
-    assert ir.zlice is None  # zlice handling would be incorrect without adjustment
-    final_sort_node = ir.reconstruct([shuffle])
-    partition_info[final_sort_node] = partition_info[shuffle]
-
-    return final_sort_node, partition_info
-
-
-@generate_ir_tasks.register(ShuffleSorted)
-def _(
-    ir: ShuffleSorted,
-    partition_info: MutableMapping[IR, PartitionInfo],
-    context: IRExecutionContext,
-) -> MutableMapping[Any, Any]:
-    by = [ne.value.name for ne in ir.by if isinstance(ne.value, Col)]
-    if len(by) != len(ir.by):  # pragma: no cover
-        # We should not reach here as this is checked in the lower_ir_node
-        raise NotImplementedError("Sorting columns must be column names.")
-
-    (child,) = ir.children
-
-    sort_boundaries_name, graph = _sort_boundaries_graph(
-        get_key_name(child),
-        by,
-        ir.order,
-        ir.null_order,
-        partition_info[child].count,
-        context,
-    )
-
-    options = {
-        "by": by,
-        "order": ir.order,
-        "null_order": ir.null_order,
-        "column_names": list(ir.schema.keys()),
-        "column_dtypes": list(ir.schema.values()),
-    }
-
-    # Try using rapidsmpf shuffler if we have "simple" shuffle
-    # keys, and the "shuffle_method" config is set to "rapidsmpf-single".
-    shuffle_method = ir.shuffle_method
-    if shuffle_method == "rapidsmpf-single":  # pragma: no cover
-        from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph
-
-        graph.update(
-            rapidsmpf_shuffle_graph(
-                get_key_name(child),
-                get_key_name(ir),
-                partition_info[child].count,
-                partition_info[ir].count,
-                RMPFIntegrationSortedShuffle,
-                options,
-                sort_boundaries_name,
-            )
-        )
-        return graph
-
-    # Simple task-based fall-back
-    graph.update(
-        partial(_simple_shuffle_graph, context=context)(
-            get_key_name(child),
-            get_key_name(ir),
-            partition_info[child].count,
-            partition_info[ir].count,
-            _sort_partition_dataframe,
-            options,
-            sort_boundaries_name,
-        )
-    )
-    return graph
+    sort_node = ir.reconstruct([child])
+    partition_info[sort_node] = partition_info[child]
+    return sort_node, partition_info
diff --git a/python/cudf_polars/cudf_polars/experimental/utils.py b/python/cudf_polars/cudf_polars/experimental/utils.py
index 24ce606d41b..848a4d44759 100644
--- a/python/cudf_polars/cudf_polars/experimental/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/utils.py
@@ -10,7 +10,7 @@
 from itertools import chain
 from typing import TYPE_CHECKING
 
-from cudf_polars.dsl.expr import Col, Expr, GroupedWindow, UnaryFunction
+from cudf_polars.dsl.expr import Col, GroupedWindow, UnaryFunction
 from cudf_polars.dsl.ir import Union
 from cudf_polars.dsl.traversal import traversal
 from cudf_polars.experimental.base import PartitionInfo
@@ -49,11 +49,8 @@ def _fallback_inform(
 
 
 def _dynamic_planning_on(config_options: ConfigOptions[StreamingExecutor]) -> bool:
-    """Check if dynamic planning is enabled for rapidsmpf runtime."""
-    return (
-        config_options.executor.runtime == "rapidsmpf"
-        and config_options.executor.dynamic_planning is not None
-    )
+    """Check if dynamic planning is enabled."""
+    return config_options.executor.dynamic_planning is not None
 
 
 def _lower_ir_fallback(
@@ -68,9 +65,6 @@ def _lower_ir_fallback(
     from cudf_polars.experimental.repartition import Repartition
     from cudf_polars.experimental.select import _inline_hstack_false
 
-    config_options = rec.state["config_options"]
-    rapidsmpf_engine = config_options.executor.runtime == "rapidsmpf"
-
     # Make sure we avoid mixed-length columns in intermediate TableChunks.
     ir = _inline_hstack_false(ir)
 
@@ -82,13 +76,10 @@ def _lower_ir_fallback(
     children = []
     inform = False
     for c in lowered_children:
-        child = c
-        if multi_partitioned := partition_info[c].count > 1:
+        if partition_info[c].count > 1:
             inform = True
-        if multi_partitioned or rapidsmpf_engine:
-            # Fall-back logic
-            child = Repartition(child.schema, child)
-            partition_info[child] = PartitionInfo(count=1)
+        child = Repartition(c.schema, c)
+        partition_info[child] = PartitionInfo(count=1)
         children.append(child)
 
     if inform and msg:
@@ -114,32 +105,6 @@ def _leaf_column_names(expr: Expr) -> tuple[str, ...]:
         return ()
 
 
-def _get_unique_fractions(
-    column_names: Sequence[str],
-    user_unique_fractions: dict[str, float],
-) -> dict[str, float]:
-    """
-    Return unique-fraction statistics subset.
-
-    Parameters
-    ----------
-    column_names
-        The column names to get unique-fractions for.
-    user_unique_fractions
-        The user-provided unique-fraction dictionary.
-
-    Returns
-    -------
-    unique_fractions
-        The final unique-fraction dictionary filtered to column_names.
-    """
-    return {
-        c: max(min(f, 1.0), 0.00001)
-        for c, f in user_unique_fractions.items()
-        if c in column_names
-    }
-
-
 def _contains_over(exprs: Sequence[Expr]) -> bool:
     """Return True if any expression contains a window expression."""
     return any(isinstance(e, GroupedWindow) for e in traversal(exprs))
diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py
index 9f0953cd4df..5611f8c3e70 100644
--- a/python/cudf_polars/cudf_polars/testing/asserts.py
+++ b/python/cudf_polars/cudf_polars/testing/asserts.py
@@ -30,7 +30,6 @@
 # Will be overriden by `conftest.py` with the value from the `--executor`
 # and `--cluster` command-line arguments
 DEFAULT_EXECUTOR = "in-memory"
-DEFAULT_RUNTIME = "tasks"
 DEFAULT_CLUSTER = "single"
 
 
@@ -200,7 +199,6 @@ def get_default_engine(
     executor = executor or DEFAULT_EXECUTOR
     if executor == "streaming":
         executor_options["cluster"] = DEFAULT_CLUSTER
-        executor_options["runtime"] = DEFAULT_RUNTIME
 
     return GPUEngine(
         raise_on_fail=True,
@@ -290,7 +288,8 @@ def assert_collect_raises(
         if polars_except != ():
             raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")
 
-    engine = GPUEngine(raise_on_fail=True)
+    # TODO: https://github.com/rapidsai/cudf/issues/22346
+    engine = GPUEngine(executor="in-memory", raise_on_fail=True)
     try:
         lazydf.collect(**final_cudf_collect_kwargs, engine=engine)  # type: ignore[misc, call-overload]
     except cudf_except:
diff --git a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py
index 6fe2de4d154..7cfb62c414e 100644
--- a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py
+++ b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py
@@ -30,6 +30,7 @@ def pytest_addoption(parser: pytest.Parser) -> None:
         choices=("in-memory", "spmd"),
         help="Which GPU engine variant to inject globally.",
     )
+    # TODO: We never run with --inject-gpu-engine-blocksize in ci/run_cudf_polars_polars_tests.sh. Remove?
     group.addoption(
         "--inject-gpu-engine-blocksize",
         action="store",
@@ -134,6 +135,7 @@ def pytest_report_header(config: pytest.Config) -> str:
     return f"injected GPU engine: {cls.__module__}.{cls.__name__}"
 
 
+# TODO: This is just Mapping[str, str]?
 EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
     "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
     "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning",
@@ -305,7 +307,6 @@ def pytest_report_header(config: pytest.Config) -> str:
 
 # Generally skip for:
 # 1) Tests that are too slow with --inject-gpu-engine-blocksize=small due to many small partitions for large data
-# 2) Tests that fail during cudf_polars execution and segfaults later due to https://github.com/rapidsai/cudf/issues/22138
 STREAMING_ENGINE_TESTS_TO_SKIP: Mapping[str, str] = {
     "tests/unit/operations/aggregation/test_aggregations.py::test_boolean_aggs": "float difference in std/var in the unit of least precision",
     "tests/benchmark/test_group_by.py::test_groupby_h2oai_q1": "Too slow with --inject-gpu-engine-blocksize=small",
diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py
index a6bbd73929b..7b5fb5c940c 100644
--- a/python/cudf_polars/cudf_polars/utils/config.py
+++ b/python/cudf_polars/cudf_polars/utils/config.py
@@ -55,9 +55,7 @@
     "InMemoryExecutor",
     "ParquetOptions",
     "RayContext",
-    "Runtime",
     "SPMDContext",
-    "ShuffleMethod",
     "StreamingExecutor",
     "StreamingFallbackMode",
 ]
@@ -112,15 +110,6 @@ def get_total_device_memory() -> int | None:
         return None
 
 
-@functools.cache
-def rapidsmpf_single_available() -> bool:  # pragma: no cover
-    """Query whether rapidsmpf is available as a single-process shuffle method."""
-    try:
-        return importlib.util.find_spec("rapidsmpf.integrations.single") is not None
-    except (ImportError, ValueError):
-        return False
-
-
 class StreamingFallbackMode(enum.StrEnum):
     """
     How the streaming executor handles operations that don't support multiple partitions.
@@ -138,20 +127,6 @@ class StreamingFallbackMode(enum.StrEnum):
     SILENT = "silent"
 
 
-class Runtime(enum.StrEnum):
-    """
-    The runtime to use for the streaming executor.
-
-    * ``Runtime.TASKS`` : Use the task-based runtime.
-      This is the default runtime.
-    * ``Runtime.RAPIDSMPF`` : Use the coroutine-based streaming runtime (rapidsmpf).
-      This runtime is experimental.
-    """
-
-    TASKS = "tasks"
-    RAPIDSMPF = "rapidsmpf"
-
-
 class Cluster(enum.StrEnum):
     """
     The cluster configuration for the streaming executor.
@@ -172,27 +147,6 @@ class Cluster(enum.StrEnum):
     DASK = "dask"
 
 
-class ShuffleMethod(enum.StrEnum):
-    """
-    The method to use for shuffling data between workers with the streaming executor.
-
-    * ``ShuffleMethod.TASKS`` : Use the task-based shuffler.
-    * ``ShuffleMethod.RAPIDSMPF`` : Use the rapidsmpf shuffler.
-    * ``ShuffleMethod._RAPIDSMPF_SINGLE`` : Use the single-process rapidsmpf shuffler.
-
-    With :class:`cudf_polars.utils.config.StreamingExecutor`, the default of ``None``
-    resolves to ``ShuffleMethod.TASKS``.
-
-    The user should **not** specify ``ShuffleMethod._RAPIDSMPF_SINGLE`` directly.
-    A setting of ``ShuffleMethod.RAPIDSMPF`` will be converted to the single-process
-    shuffler automatically when using single-GPU execution.
-    """
-
-    TASKS = "tasks"
-    RAPIDSMPF = "rapidsmpf"
-    _RAPIDSMPF_SINGLE = "rapidsmpf-single"
-
-
 T = TypeVar("T")
 
 
@@ -254,7 +208,7 @@ class ParquetOptions:
         will also be skipped if ``max_footer_samples`` is 0.
     use_rapidsmpf_native
         Whether to use the native rapidsmpf node for parquet reading.
-        This option is only used when the rapidsmpf runtime is enabled.
+        This option is only used by the streaming executor.
         Default is False.
     """
 
@@ -315,49 +269,32 @@ def __post_init__(self) -> None:  # noqa: D105
             raise TypeError("use_rapidsmpf_native must be a bool")
 
 
-def default_target_partition_size(cluster: str, runtime: str) -> int:
+@functools.cache
+def default_target_partition_size() -> int:
     """Return the default blocksize."""
     if (device_size := get_total_device_memory()) is None:  # pragma: no cover
         # System doesn't have proper "GPU memory".
         # Fall back to a conservative 1GB default.
         return 1_000_000_000
 
-    if (
-        cluster == "single"
-        and runtime == "tasks"
-        and _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1
-    ):
-        # We can use a larger blocksize when UVM is enabled
-        blocksize = int(device_size * 0.0625)
-    else:
-        # Otherwise, use a conservative default
-        blocksize = int(device_size * 0.025)
+    blocksize = int(device_size * 0.025)
 
     # Use lower and upper bounds of 1GB and 10GB
     return min(max(blocksize, 1_000_000_000), 10_000_000_000)
 
 
-def default_broadcast_join_limit(cluster: str, runtime: str) -> int:
+@functools.cache
+def default_broadcast_join_limit() -> int:
     """Return the default broadcast join limit."""
     if (device_size := get_total_device_memory()) is None:  # pragma: no cover
         # System doesn't have proper "GPU memory".
         # We probably want to broadcast in most cases.
         return 32
 
-    if runtime == "rapidsmpf":
-        # Target about 12.5% of the device memory when
-        # default_target_partition_size is used to set the
-        # target partition size (i.e. 5x the 2.5% default).
-        return min(5, int(max(1, (device_size * 0.125) // 1e9)))
-    elif _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1:
-        # The "tasks" runtime always runs single-GPU; we can lean on UVM
-        # to support most broadcast joins.
-        return 32
-    else:
-        # Extra-conservative default for the "tasks" runtime without UVM.
-        # We cannot spill outside a rapidsmpf shuffle within this runtime,
-        # so shuffling is usually preferred.
-        return 2
+    # Target about 12.5% of the device memory when
+    # default_target_partition_size is used to set the
+    # target partition size (i.e. 5x the 2.5% default).
+    return min(5, int(max(1, (device_size * 0.125) // 1e9)))
 
 
 @dataclasses.dataclass(frozen=True)
@@ -599,17 +536,14 @@ class StreamingExecutor:
 
     Parameters
     ----------
-    runtime
-        The runtime to use for the streaming executor.
-        ``Runtime.TASKS`` by default.
     cluster
         The cluster configuration for the streaming executor.
         ``Cluster.SINGLE`` by default.
 
         * ``Cluster.SINGLE``: Single-GPU execution
-        * ``Cluster.SPMD``: Multi-GPU SPMD execution (rapidsmpf runtime)
-        * ``Cluster.RAY``: Multi-GPU Ray execution (rapidsmpf runtime)
-        * ``Cluster.DASK``: Multi-GPU Dask execution (rapidsmpf runtime)
+        * ``Cluster.SPMD``: Multi-GPU SPMD execution
+        * ``Cluster.RAY``: Multi-GPU Ray execution
+        * ``Cluster.DASK``: Multi-GPU Dask execution
 
     fallback_mode
         How to handle errors when the GPU engine fails to execute a query.
@@ -621,13 +555,6 @@ class StreamingExecutor:
         The maximum number of rows to process per partition. 1_000_000 by default.
         When the number of rows exceeds this value, the query will be split into
         multiple partitions and executed in parallel.
-    unique_fraction
-        A dictionary mapping column names to floats between 0 and 1 (inclusive
-        on the right).
-
-        Each factor estimates the fractional number of unique values in the
-        column. By default, ``1.0`` is used for any column not included in
-        ``unique_fraction``.
     target_partition_size
         Target partition size, in bytes, for IO tasks. This configuration currently
         controls how large parquet files are split into multiple partitions.
@@ -639,11 +566,8 @@ class StreamingExecutor:
         - keyword argument to ``polars.GPUEngine``
         - the ``CUDF_POLARS__EXECUTOR__TARGET_PARTITION_SIZE`` environment variable
 
-        By default, cudf-polars uses a target partition size that's a fraction
-        of the device memory, where the fraction depends on the cluster and runtime:
-
-        - rapidsmpf runtime: 1/40th of the device memory
-        - single cluster and tasks runtime: 1/16th of the device memory
+        By default, cudf-polars uses a target partition size of 1/40th of the
+        device memory.
 
         The pynvml library is used to query the total device memory on the first
         visible GPU. If the device size is not available, the default target
@@ -651,26 +575,14 @@ class StreamingExecutor:
 
         NOTE: If this configuration is changed manually, it is recommended to set
         `broadcast_join_limit` manually as well.
-    groupby_n_ary
-        The factor by which the number of partitions is decreased when performing
-        a groupby on a partitioned column. For example, if a column has 64 partitions,
-        it will first be reduced to ``ceil(64 / 32) = 2`` partitions.
-
-        This is useful when the absolute number of partitions is large.
     broadcast_join_limit
         The maximum number of partitions to allow for the smaller table in
         a broadcast join. For example, if the target partition size is 1GB and the
         broadcast join limit is 5, then the smaller table will be broadcasted
-        if it is smaller than 5GB (within the "rapidsmpf" runtime) or contains
-        fewer than 5 partitions (within the "tasks" runtime). The default depends
-        on the cluster and runtime.
-    shuffle_method
-        The method to use for shuffling data between workers. Defaults to
-        'tasks' for the single-GPU cluster.
+        if it is smaller than 5GB.
     client_device_threshold
-        Threshold for spilling data from device memory in rapidsmpf.
+        Threshold for spilling data from device memory.
         Default is 50% of device memory on the client process.
-        This argument is only used by the "rapidsmpf" runtime.
     sink_to_directory
         Whether multi-partition sink operations write to a directory rather
         than a single file. For the spmd, ray, and dask clusters this is
@@ -680,7 +592,7 @@ class StreamingExecutor:
         Options controlling dynamic shuffle planning. See
         :class:`~cudf_polars.utils.config.DynamicPlanningOptions` for more.
     max_io_threads
-        Maximum number of IO threads for the rapidsmpf runtime. Default is 4.
+        Maximum number of IO threads. Default is 4.
         This controls the parallelism of IO operations when reading data.
     spill_to_pinned_memory
         Whether RapidsMPF should spill to pinned host memory when available,
@@ -688,8 +600,8 @@ class StreamingExecutor:
         bandwidth and lower latency for device to host transfers compared to
         regular pageable host memory.
     num_py_executors
-        Maximum number of workers for the Python ThreadPoolExecutor used by
-        the rapidsmpf runtime. Default is 8.
+        Maximum number of workers for the Python ThreadPoolExecutor.
+        Default is 8.
 
     Notes
     -----
@@ -700,13 +612,6 @@ class StreamingExecutor:
     _env_prefix = "CUDF_POLARS__EXECUTOR"
 
     name: Literal["streaming"] = dataclasses.field(default="streaming", init=False)
-    runtime: Runtime = dataclasses.field(
-        default_factory=_make_default_factory(
-            f"{_env_prefix}__RUNTIME",
-            Runtime.__call__,
-            default=Runtime.TASKS,
-        )
-    )
     cluster: Cluster | None = dataclasses.field(
         default_factory=_make_default_factory(
             f"{_env_prefix}__CLUSTER",
@@ -726,33 +631,16 @@ class StreamingExecutor:
             f"{_env_prefix}__MAX_ROWS_PER_PARTITION", int, default=1_000_000
         )
     )
-    unique_fraction: dict[str, float] = dataclasses.field(
-        default_factory=_make_default_factory(
-            f"{_env_prefix}__UNIQUE_FRACTION", json.loads, default={}
-        )
-    )
     target_partition_size: int = dataclasses.field(
         default_factory=_make_default_factory(
             f"{_env_prefix}__TARGET_PARTITION_SIZE", int, default=0
         )
     )
-    groupby_n_ary: int = dataclasses.field(
-        default_factory=_make_default_factory(
-            f"{_env_prefix}__GROUPBY_N_ARY", int, default=32
-        )
-    )
     broadcast_join_limit: int = dataclasses.field(
         default_factory=_make_default_factory(
             f"{_env_prefix}__BROADCAST_JOIN_LIMIT", int, default=0
         )
     )
-    shuffle_method: ShuffleMethod = dataclasses.field(
-        default_factory=_make_default_factory(
-            f"{_env_prefix}__SHUFFLE_METHOD",
-            ShuffleMethod.__call__,
-            default=ShuffleMethod.TASKS,
-        )
-    )
     client_device_threshold: float = dataclasses.field(
         default_factory=_make_default_factory(
             f"{_env_prefix}__CLIENT_DEVICE_THRESHOLD", float, default=0.5
@@ -786,33 +674,10 @@ class StreamingExecutor:
     dask_context: DaskContext | None = None
 
     def __post_init__(self) -> None:  # noqa: D105
-        # Check for rapidsmpf runtime
-        if self.runtime == "rapidsmpf":  # pragma: no cover; requires rapidsmpf runtime
-            if not rapidsmpf_single_available():
-                raise ValueError("The rapidsmpf streaming engine requires rapidsmpf.")
-            object.__setattr__(self, "shuffle_method", "rapidsmpf")
-
         if self.cluster is None:
             object.__setattr__(self, "cluster", Cluster.SINGLE)
         assert self.cluster is not None, "Expected cluster to be set."
 
-        # Handle shuffle_method defaults for streaming executor
-        if self.shuffle_method is None:
-            # Use task-based shuffle by default.
-            # TODO: Evaluate single-process shuffle by default.
-            object.__setattr__(self, "shuffle_method", "tasks")
-        elif self.shuffle_method == "rapidsmpf-single":
-            # The user should NOT specify "rapidsmpf-single" directly.
-            raise ValueError("rapidsmpf-single is not a supported shuffle method.")
-        elif self.shuffle_method == "rapidsmpf":
-            if self.cluster == "single" and not rapidsmpf_single_available():
-                raise ValueError(
-                    "rapidsmpf shuffle method requested, but rapidsmpf is not installed."
-                )
-            # Select "rapidsmpf-single" for single-GPU
-            if self.cluster == "single":
-                object.__setattr__(self, "shuffle_method", "rapidsmpf-single")
-
         # frozen dataclass, so use object.__setattr__
         object.__setattr__(
             self, "fallback_mode", StreamingFallbackMode(self.fallback_mode)
@@ -821,16 +686,15 @@ def __post_init__(self) -> None:  # noqa: D105
             object.__setattr__(
                 self,
                 "target_partition_size",
-                default_target_partition_size(self.cluster, self.runtime),
+                default_target_partition_size(),
             )
         if self.broadcast_join_limit == 0:
             object.__setattr__(
                 self,
                 "broadcast_join_limit",
-                default_broadcast_join_limit(self.cluster, self.runtime),
+                default_broadcast_join_limit(),
             )
         object.__setattr__(self, "cluster", Cluster(self.cluster))
-        object.__setattr__(self, "shuffle_method", ShuffleMethod(self.shuffle_method))
 
         # Handle dynamic_planning.
         # Can be None, dict, or DynamicPlanningOptions
@@ -853,12 +717,8 @@ def __post_init__(self) -> None:  # noqa: D105
         # Type / value check everything else
         if not isinstance(self.max_rows_per_partition, int):
             raise TypeError("max_rows_per_partition must be an int")
-        if not isinstance(self.unique_fraction, dict):
-            raise TypeError("unique_fraction must be a dict of column name to float")
         if not isinstance(self.target_partition_size, int):
             raise TypeError("target_partition_size must be an int")
-        if not isinstance(self.groupby_n_ary, int):
-            raise TypeError("groupby_n_ary must be an int")
         if not isinstance(self.broadcast_join_limit, int):
             raise TypeError("broadcast_join_limit must be an int")
         if not isinstance(self.sink_to_directory, bool):
@@ -873,10 +733,9 @@ def __post_init__(self) -> None:  # noqa: D105
             raise TypeError("num_py_executors must be an int")
 
     def __hash__(self) -> int:  # noqa: D105
-        # cardinality factory, a dict, isn't natively hashable. We'll dump it
+        # dynamic_planning factory, a dataclass, isn't natively hashable. We'll dump it
         # to json and hash that.
         d = dataclasses.asdict(self)
-        d["unique_fraction"] = json.dumps(d["unique_fraction"])
         d["dynamic_planning"] = json.dumps(d["dynamic_planning"])
         return hash(tuple(sorted(d.items())))
 
@@ -1059,19 +918,6 @@ def from_polars_engine(
                 executor = InMemoryExecutor(**user_executor_options)
             case "streaming":
                 user_executor_options = user_executor_options.copy()
-                # Handle the interaction between the default shuffle method, the
-                # cluster, and whether rapidsmpf is available.
-                env_shuffle_method = os.environ.get(
-                    "CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", None
-                )
-                if env_shuffle_method is not None:
-                    shuffle_method_default = ShuffleMethod(env_shuffle_method)
-                else:
-                    shuffle_method_default = None
-
-                user_executor_options.setdefault(
-                    "shuffle_method", shuffle_method_default
-                )
 
                 # Handle dynamic_planning: check user config, then env var
                 user_dynamic_planning = user_executor_options.get(
@@ -1097,7 +943,7 @@ def from_polars_engine(
         }
 
         # Handle "cuda-stream-policy".
-        # The default will depend on the runtime and executor.
+        # The default will depend on the executor.
         user_cuda_stream_policy = engine.config.get(
             "cuda_stream_policy", None
         ) or os.environ.get("CUDF_POLARS__CUDA_STREAM_POLICY", None)
@@ -1105,24 +951,18 @@ def from_polars_engine(
         cuda_stream_policy: CUDAStreamPoolConfig | None
 
         if user_cuda_stream_policy is None:
-            if (
-                executor.name == "streaming" and executor.runtime == Runtime.RAPIDSMPF
-            ):  # pragma: no cover; requires rapidsmpf runtime
-                # the rapidsmpf runtime defaults to using a stream pool
+            if executor.name == "streaming":
                 cuda_stream_policy = CUDAStreamPoolConfig()
             else:
-                # everything else defaults to the default stream
                 cuda_stream_policy = None
         else:
             cuda_stream_policy = _convert_cuda_stream_policy(user_cuda_stream_policy)
 
-        # Pool policy is only supported by the rapidsmpf runtime.
         if isinstance(cuda_stream_policy, CUDAStreamPoolConfig) and (
-            (executor.name != "streaming")
-            or (executor.name == "streaming" and executor.runtime != Runtime.RAPIDSMPF)
+            executor.name != "streaming"
         ):
             raise ValueError(
-                "A stream pool is only supported by the rapidsmpf runtime."
+                "A stream pool is only supported by the streaming executor."
             )
 
         kwargs["cuda_stream_policy"] = cuda_stream_policy
diff --git a/python/cudf_polars/cudf_polars/utils/cuda_stream.py b/python/cudf_polars/cudf_polars/utils/cuda_stream.py
index c0708d3bea8..22022ee3401 100644
--- a/python/cudf_polars/cudf_polars/utils/cuda_stream.py
+++ b/python/cudf_polars/cudf_polars/utils/cuda_stream.py
@@ -17,11 +17,6 @@
     from rmm.pylibrmm.stream import Stream
 
 
-def get_dask_cuda_stream() -> Stream:
-    """Get the default CUDA stream for Dask."""
-    return DEFAULT_STREAM
-
-
 def get_cuda_stream() -> Stream:
     """Get the default CUDA stream for the current thread."""
     return DEFAULT_STREAM
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index d48793f0541..47633e42364 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     "packaging",
     "polars>=1.30,<1.39",
     "pylibcudf==26.6.*,>=0.0.0a0",
+    "rapidsmpf==26.6.*,>=0.0.0a0",
     "typing_extensions>=4.0.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -47,7 +48,6 @@ test = [
     "pytest-cov",
     "pytest-httpserver",
     "pytest-xdist",
-    "rapidsmpf==26.6.*,>=0.0.0a0",
     "rich",
     "structlog",
     "zstandard",
diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py
index 7f00684638f..b3d83b36d36 100644
--- a/python/cudf_polars/tests/conftest.py
+++ b/python/cudf_polars/tests/conftest.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-import importlib.util
 from typing import TYPE_CHECKING
 
 import pytest
@@ -54,13 +53,6 @@ def clear_memory_resource_cache():
 @pytest.fixture(autouse=True)
 def _skip_unless_spmd(request: pytest.FixtureRequest) -> None:
     """Skip tests in SPMD multi-rank mode unless marked with ``pytest.mark.spmd``."""
-    # Do not use `pytest.importorskip` here: this fixture is autouse, so an
-    # import-based skip would skip every test in the suite on environments
-    # without rapidsmpf (e.g. the coverage CI job), masking real coverage.
-    # We only want to gate the nranks>1 check on rapidsmpf being available.
-    if importlib.util.find_spec("rapidsmpf") is None:
-        return
-
     from rapidsmpf.bootstrap import get_nranks, is_running_with_rrun
 
     if (
@@ -79,7 +71,6 @@ def streaming_engines() -> Generator[StreamingEngines, None, None]:
     name to a single shared engine instance, which is reused across the entire
     test session.
     """
-    pytest.importorskip("rapidsmpf")
     from rapidsmpf import bootstrap
     from rapidsmpf.communicator.single import new_communicator as single_communicator
     from rapidsmpf.config import Options, get_environment_variables
@@ -228,7 +219,8 @@ def engine_raise_on_fail() -> pl.GPUEngine:
     from ``.collect()``. Uses the in-memory executor so errors are not wrapped
     by a streaming task group.
     """
-    return pl.GPUEngine(raise_on_fail=True)
+    # TODO: We should be testing will all supported engine variants
+    return pl.GPUEngine(executor="in-memory", raise_on_fail=True)
 
 
 def pytest_addoption(parser):
@@ -240,14 +232,6 @@ def pytest_addoption(parser):
         help="Executor to use for GPUEngine.",
     )
 
-    parser.addoption(
-        "--runtime",
-        action="store",
-        default="tasks",
-        choices=("tasks", "rapidsmpf"),
-        help="Runtime to use for the 'streaming' executor.",
-    )
-
     parser.addoption(
         "--cluster",
         action="store",
@@ -278,17 +262,7 @@ def pytest_configure(config):
     # apply globally rather than per-module.
     config.addinivalue_line("filterwarnings", "ignore::ResourceWarning")
 
-    if config.getoption("--runtime") == "rapidsmpf":
-        if config.getoption("--executor") == "in-memory":
-            raise pytest.UsageError("Rapidsmpf runtime requires --executor='streaming'")
-
-        if importlib.util.find_spec("rapidsmpf") is None:
-            raise pytest.UsageError(
-                "Rapidsmpf runtime requires the 'rapidsmpf' package"
-            )
-
     cudf_polars.testing.asserts.DEFAULT_EXECUTOR = config.getoption("--executor")
-    cudf_polars.testing.asserts.DEFAULT_RUNTIME = config.getoption("--runtime")
     cudf_polars.testing.asserts.DEFAULT_CLUSTER = config.getoption("--cluster")
 
 
diff --git a/python/cudf_polars/tests/experimental/test_dask.py b/python/cudf_polars/tests/experimental/test_dask.py
index 5ccdde864ef..93ef4318490 100644
--- a/python/cudf_polars/tests/experimental/test_dask.py
+++ b/python/cudf_polars/tests/experimental/test_dask.py
@@ -64,7 +64,6 @@ def test_yields_engine(engine: DaskEngine) -> None:
 def test_executor_options_forwarded(engine: DaskEngine) -> None:
     """Reserved executor_options keys are injected into the engine config."""
     opts = engine.config["executor_options"]
-    assert opts["runtime"] == "rapidsmpf"
     assert opts["cluster"] == "dask"
     assert isinstance(opts["dask_context"], DaskContext)
 
@@ -196,7 +195,6 @@ def test_reset_updates_executor_options(reset_engine: DaskEngine) -> None:
     opts = reset_engine.config["executor_options"]
     assert opts["max_rows_per_partition"] == 42
     # Reserved keys are still injected by ``_reset``.
-    assert opts["runtime"] == "rapidsmpf"
     assert opts["cluster"] == "dask"
     assert isinstance(opts["dask_context"], DaskContext)
 
diff --git a/python/cudf_polars/tests/experimental/test_explain.py b/python/cudf_polars/tests/experimental/test_explain.py
index fecd4ba4d03..7f19e318778 100644
--- a/python/cudf_polars/tests/experimental/test_explain.py
+++ b/python/cudf_polars/tests/experimental/test_explain.py
@@ -540,8 +540,7 @@ def test_scan_properties(tmp_path: Path, predicate: pl.Expr | None):
     engine = pl.GPUEngine(executor="streaming", raise_on_fail=True)
     dag = serialize_query(q, engine)
 
-    # walk Union -> Scan
-    node = dag.nodes[dag.nodes[dag.roots[0]].children[0]]
+    node = dag.nodes[dag.roots[0]]
     assert node.type == "Scan"
     assert node.properties == expected_properties
 
@@ -673,7 +672,6 @@ def test_dynamic_planning_adds_repartition(df, op):
         executor="streaming",
         raise_on_fail=True,
         executor_options={
-            "runtime": "rapidsmpf",
             "dynamic_planning": {},
             "max_rows_per_partition": 1_000_000,
         },
diff --git a/python/cudf_polars/tests/experimental/test_groupby.py b/python/cudf_polars/tests/experimental/test_groupby.py
index 8d6ac5927e9..03d87fe23e9 100644
--- a/python/cudf_polars/tests/experimental/test_groupby.py
+++ b/python/cudf_polars/tests/experimental/test_groupby.py
@@ -270,10 +270,7 @@ def test_groupby_literal_key(df, streaming_engine):
 @pytest.mark.parametrize("keys", [("y",), ("y", "z")])
 def test_groupby_agg_config_options(df, op, keys, streaming_engine_factory):
     streaming_engine = streaming_engine_factory(
-        StreamingOptions(
-            max_rows_per_partition=4,
-            unique_fraction={"z": 0.5},
-        ),
+        StreamingOptions(max_rows_per_partition=4),
     )
     agg = getattr(pl.col("x"), op)()
     if op in ("sum", "mean"):
diff --git a/python/cudf_polars/tests/experimental/test_hstack.py b/python/cudf_polars/tests/experimental/test_hstack.py
index 9bbb4b7aa33..0c21678f7e2 100644
--- a/python/cudf_polars/tests/experimental/test_hstack.py
+++ b/python/cudf_polars/tests/experimental/test_hstack.py
@@ -20,7 +20,6 @@
 from cudf_polars.experimental.statistics import collect_statistics
 from cudf_polars.testing.asserts import (
     DEFAULT_CLUSTER,
-    DEFAULT_RUNTIME,
     assert_gpu_result_equal,
 )
 from cudf_polars.utils.config import ConfigOptions
@@ -34,7 +33,6 @@ def engine():
         executor_options={
             "max_rows_per_partition": 3,
             "cluster": DEFAULT_CLUSTER,
-            "runtime": DEFAULT_RUNTIME,
         },
     )
 
diff --git a/python/cudf_polars/tests/experimental/test_options.py b/python/cudf_polars/tests/experimental/test_options.py
index 291cbda7589..eb64cb97ed8 100644
--- a/python/cudf_polars/tests/experimental/test_options.py
+++ b/python/cudf_polars/tests/experimental/test_options.py
@@ -66,11 +66,6 @@ def test_executor_options_includes_set_fields() -> None:
     assert "log" not in result
 
 
-def test_executor_options_unique_fraction() -> None:
-    result = StreamingOptions(unique_fraction={"col_a": 0.5}).to_executor_options()
-    assert result["unique_fraction"] == {"col_a": 0.5}
-
-
 def test_executor_options_num_py_executors() -> None:
     result = StreamingOptions(num_py_executors=4).to_executor_options()
     assert result["num_py_executors"] == 4
diff --git a/python/cudf_polars/tests/experimental/test_parallel.py b/python/cudf_polars/tests/experimental/test_parallel.py
index 42365a113e2..67fc372e2e4 100644
--- a/python/cudf_polars/tests/experimental/test_parallel.py
+++ b/python/cudf_polars/tests/experimental/test_parallel.py
@@ -12,13 +12,9 @@
 from polars.testing import assert_frame_equal
 
 from cudf_polars import Translator
-from cudf_polars.dsl.expressions.base import Col, NamedExpr
 from cudf_polars.dsl.traversal import traversal
-from cudf_polars.experimental.parallel import lower_ir_graph
 from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
-from cudf_polars.experimental.statistics import collect_statistics
 from cudf_polars.testing.asserts import assert_gpu_result_equal
-from cudf_polars.utils.config import ConfigOptions
 
 
 @pytest.mark.parametrize("column", ["a", "b"])
@@ -134,40 +130,3 @@ def test_pickle_conditional_join_args():
     ir = Translator(q._ldf.visit(), GPUEngine()).translate_ir()
     for node in traversal([ir]):
         pickle.loads(pickle.dumps(node._non_child_args))
-
-
-def test_preserve_partitioning(streaming_engine_factory):
-    streaming_engine = streaming_engine_factory(
-        StreamingOptions(
-            max_rows_per_partition=2,
-            broadcast_join_limit=2,
-            unique_fraction={"a": 1.0},
-        ),
-    )
-    left = pl.LazyFrame({"a": [1, 2, 3, 4] * 5, "b": range(20)})
-    right = pl.LazyFrame({"a": [3, 4, 5, 6, 7] * 4, "c": range(20)})
-    q = (
-        left.join(right, on="a")
-        .filter(pl.col("a") == 2)
-        .group_by(pl.col("a"))
-        .mean()
-        .select(pl.col("a"), pl.col("c"))
-    )
-    _engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "max_rows_per_partition": 2,
-            "broadcast_join_limit": 2,
-            "unique_fraction": {"a": 1.0},
-        },
-    )
-    config_options = ConfigOptions.from_polars_engine(_engine)
-    ir = Translator(q._ldf.visit(), _engine).translate_ir()
-    ir, partition_info = lower_ir_graph(
-        ir, config_options, collect_statistics(ir, config_options)
-    )
-    expect_dtype = ir.schema["a"]
-    expect_expr = (NamedExpr("a", Col(expect_dtype, "a")),)
-    assert partition_info[ir].partitioned_on == expect_expr
-    assert_gpu_result_equal(q, engine=streaming_engine)
diff --git a/python/cudf_polars/tests/experimental/test_ray.py b/python/cudf_polars/tests/experimental/test_ray.py
index ded4903c594..f62c3e3b831 100644
--- a/python/cudf_polars/tests/experimental/test_ray.py
+++ b/python/cudf_polars/tests/experimental/test_ray.py
@@ -53,7 +53,7 @@ def engine() -> Iterator[RayEngine]:
 
 def test_reserved_executor_keys() -> None:
     """executor_options rejects reserved keys."""
-    for key in ("runtime", "cluster", "spmd_context", "ray_context"):
+    for key in ("cluster", "spmd_context", "ray_context"):
         with pytest.raises(TypeError, match="reserved"):
             RayEngine(executor_options={key: "anything"})
 
@@ -109,7 +109,6 @@ def test_executor_options_forwarded(
 ) -> None:
     """Reserved executor_options keys are injected into the engine config."""
     opts = engine.config["executor_options"]
-    assert opts["runtime"] == "rapidsmpf"
     assert opts["cluster"] == "ray"
     assert isinstance(opts["ray_context"], RayContext)
     assert engine.rank_actors == opts["ray_context"].rank_actors
@@ -258,7 +257,6 @@ def test_reset_updates_executor_options(reset_engine: RayEngine) -> None:
     opts = reset_engine.config["executor_options"]
     assert opts["max_rows_per_partition"] == 42
     # Reserved keys are still injected by ``_reset``.
-    assert opts["runtime"] == "rapidsmpf"
     assert opts["cluster"] == "ray"
     assert isinstance(opts["ray_context"], RayContext)
     assert opts["ray_context"].rank_actors == reset_engine.rank_actors
diff --git a/python/cudf_polars/tests/experimental/test_sort.py b/python/cudf_polars/tests/experimental/test_sort.py
index 194686acf6b..f0abf5caade 100644
--- a/python/cudf_polars/tests/experimental/test_sort.py
+++ b/python/cudf_polars/tests/experimental/test_sort.py
@@ -9,7 +9,6 @@
 
 from cudf_polars.testing.asserts import (
     DEFAULT_CLUSTER,
-    DEFAULT_RUNTIME,
     assert_gpu_result_equal,
 )
 
@@ -22,7 +21,6 @@ def engine():
         executor_options={
             "max_rows_per_partition": 3,
             "cluster": DEFAULT_CLUSTER,
-            "runtime": DEFAULT_RUNTIME,
             "fallback_mode": "raise",
         },
     )
@@ -36,7 +34,6 @@ def engine_large():
         executor_options={
             "max_rows_per_partition": 2_100,
             "cluster": DEFAULT_CLUSTER,
-            "runtime": DEFAULT_RUNTIME,
             "fallback_mode": "raise",
         },
     )
@@ -139,7 +136,6 @@ def test_sort_after_sparse_join():
         executor="streaming",
         executor_options={
             "cluster": DEFAULT_CLUSTER,
-            "runtime": DEFAULT_RUNTIME,
             "max_rows_per_partition": 4,
         },
     )
diff --git a/python/cudf_polars/tests/experimental/test_spmd.py b/python/cudf_polars/tests/experimental/test_spmd.py
index 9fef0e00350..96ec5eab932 100644
--- a/python/cudf_polars/tests/experimental/test_spmd.py
+++ b/python/cudf_polars/tests/experimental/test_spmd.py
@@ -66,7 +66,7 @@ def test_single_communicator_outside_rrun() -> None:
 
 def test_reserved_keys() -> None:
     """executor_options rejects reserved keys."""
-    for key in ("runtime", "cluster", "spmd_context"):
+    for key in ("cluster", "spmd_context"):
         with (
             pytest.raises(TypeError, match="reserved"),
             SPMDEngine(executor_options={key: "anything"}),
@@ -320,7 +320,6 @@ def test_reset_updates_executor_options(comm: Communicator) -> None:
         opts = engine.config["executor_options"]
         assert opts["max_rows_per_partition"] == 42
         # Reserved keys are still injected by ``_reset``.
-        assert opts["runtime"] == "rapidsmpf"
         assert opts["cluster"] == "spmd"
         assert isinstance(opts["spmd_context"], SPMDContext)
 
diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py
index 49d2b580300..6bb30624cb6 100644
--- a/python/cudf_polars/tests/experimental/test_unique.py
+++ b/python/cudf_polars/tests/experimental/test_unique.py
@@ -34,12 +34,9 @@ def df():
 @pytest.mark.parametrize("subset", [None, ("y",), ("y", "z")])
 @pytest.mark.parametrize("keep", ["first", "last", "any", "none"])
 @pytest.mark.parametrize("maintain_order", [True, False])
-@pytest.mark.parametrize("cardinality", [{}, {"y": 0.7}])
-def test_unique(
-    df, streaming_engine_factory, keep, subset, maintain_order, cardinality
-):
+def test_unique(df, streaming_engine_factory, keep, subset, maintain_order):
     engine = streaming_engine_factory(
-        StreamingOptions(unique_fraction=cardinality, fallback_mode="warn"),
+        StreamingOptions(fallback_mode="warn"),
     )
     q = df.unique(subset=subset, keep=keep, maintain_order=maintain_order)
     check_row_order = maintain_order
@@ -50,40 +47,16 @@ def test_unique(
     assert_gpu_result_equal(q, engine=engine, check_row_order=check_row_order)
 
 
-def test_unique_fallback(df, streaming_engine_factory):
-    engine = streaming_engine_factory(
-        StreamingOptions(
-            unique_fraction={"y": 1.0},
-            fallback_mode="raise",
-            dynamic_planning=None,
-        ),
-    )
-    q = df.unique(keep="first", maintain_order=True)
-    with pytest.raises(
-        NotImplementedError,
-        match="Unsupported unique options",
-    ):
-        assert_gpu_result_equal(q, engine=engine)
-
-
 @pytest.mark.parametrize("maintain_order", [True, False])
-@pytest.mark.parametrize("cardinality", [{}, {"y": 0.5}])
-def test_unique_select(df, streaming_engine_factory, maintain_order, cardinality):
+def test_unique_select(df, streaming_engine_factory, maintain_order):
     engine = streaming_engine_factory(
         StreamingOptions(
             max_rows_per_partition=4,
-            unique_fraction=cardinality,
             fallback_mode="warn",
         ),
     )
     q = df.select(pl.col("y").unique(maintain_order=maintain_order))
-    if cardinality == {"y": 0.5} and maintain_order:
-        with pytest.warns(
-            UserWarning, match="Unsupported unique options for multiple partitions."
-        ):
-            assert_gpu_result_equal(q, engine=engine, check_row_order=False)
-    else:
-        assert_gpu_result_equal(q, engine=engine, check_row_order=False)
+    assert_gpu_result_equal(q, engine=engine, check_row_order=False)
 
 
 @pytest.mark.parametrize("keep", ["first", "last", "any"])
diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py
index 3cd66bc527d..6004c5eef40 100644
--- a/python/cudf_polars/tests/test_config.py
+++ b/python/cudf_polars/tests/test_config.py
@@ -4,7 +4,7 @@
 from __future__ import annotations
 
 import sys
-from typing import Any, cast
+from typing import cast
 
 import pytest
 
@@ -35,20 +35,7 @@
     StreamingExecutor,
     _default_cuda_stream_policy,
 )
-from cudf_polars.utils.cuda_stream import (
-    get_cuda_stream,
-    get_dask_cuda_stream,
-)
-
-
-@pytest.fixture(params=[False, True], ids=["norapidsmpf.single", "rapidsmpf.single"])
-def rapidsmpf_single_available(request, monkeypatch):
-    monkeypatch.setattr(
-        cudf_polars.utils.config,
-        "rapidsmpf_single_available",
-        lambda: request.param,
-    )
-    return request.param
+from cudf_polars.utils.cuda_stream import get_cuda_stream
 
 
 def test_polars_verbose_warns(monkeypatch):
@@ -232,47 +219,6 @@ def test_parquet_options_from_none() -> None:
     assert config.parquet_options.chunked is True
 
 
-def test_validate_streaming_executor_shuffle_method(
-    *, rapidsmpf_single_available: bool
-) -> None:
-    config = ConfigOptions.from_polars_engine(
-        pl.GPUEngine(
-            executor="streaming",
-            executor_options={"shuffle_method": "tasks"},
-        )
-    )
-    assert config.executor.name == "streaming"
-    assert config.executor.shuffle_method == "tasks"
-
-    # rapidsmpf with single cluster
-    engine = pl.GPUEngine(
-        executor="streaming",
-        executor_options={"shuffle_method": "rapidsmpf", "cluster": "single"},
-    )
-
-    if rapidsmpf_single_available:
-        config = ConfigOptions.from_polars_engine(engine)
-        assert config.executor.name == "streaming"
-        assert config.executor.shuffle_method == "rapidsmpf-single"
-    else:
-        with pytest.raises(ValueError, match="rapidsmpf is not installed"):
-            ConfigOptions.from_polars_engine(engine)
-
-
-def test_join_rapidsmpf_single_private_config() -> None:
-    # The user may not specify "rapidsmpf-single" directly
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "shuffle_method": "rapidsmpf-single",
-            "runtime": "tasks",
-        },
-    )
-    with pytest.raises(ValueError, match="not a supported shuffle method"):
-        ConfigOptions.from_polars_engine(engine)
-
-
 @pytest.mark.parametrize("executor", ["in-memory", "streaming"])
 def test_hashable(executor: str) -> None:
     config = ConfigOptions.from_polars_engine(
@@ -319,31 +265,11 @@ def test_validate_cluster() -> None:
         )
 
 
-def test_validate_shuffle_method_defaults() -> None:
-    config = ConfigOptions.from_polars_engine(
-        pl.GPUEngine(
-            executor="streaming",
-        )
-    )
-    assert config.executor.name == "streaming"
-    assert config.executor.shuffle_method == "tasks"  # Default for single cluster
-
-    with pytest.raises(ValueError, match="'foo' is not a valid ShuffleMethod"):
-        ConfigOptions.from_polars_engine(
-            pl.GPUEngine(
-                executor="streaming",
-                executor_options={"shuffle_method": "foo"},
-            )
-        )
-
-
 @pytest.mark.parametrize(
     "option",
     [
         "max_rows_per_partition",
-        "unique_fraction",
         "target_partition_size",
-        "groupby_n_ary",
         "broadcast_join_limit",
         "sink_to_directory",
         "client_device_threshold",
@@ -409,11 +335,8 @@ def test_config_option_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
         m.setenv("CUDF_POLARS__EXECUTOR__CLUSTER", "single")
         m.setenv("CUDF_POLARS__EXECUTOR__FALLBACK_MODE", "silent")
         m.setenv("CUDF_POLARS__EXECUTOR__MAX_ROWS_PER_PARTITION", "42")
-        m.setenv("CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION", '{"a": 0.5}')
         m.setenv("CUDF_POLARS__EXECUTOR__TARGET_PARTITION_SIZE", "100")
-        m.setenv("CUDF_POLARS__EXECUTOR__GROUPBY_N_ARY", "43")
         m.setenv("CUDF_POLARS__EXECUTOR__BROADCAST_JOIN_LIMIT", "44")
-        m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "tasks")
         m.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default")
 
         engine = pl.GPUEngine()
@@ -422,11 +345,8 @@ def test_config_option_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
         assert config.executor.cluster == "single"
         assert config.executor.fallback_mode == "silent"
         assert config.executor.max_rows_per_partition == 42
-        assert config.executor.unique_fraction == {"a": 0.5}
         assert config.executor.target_partition_size == 100
-        assert config.executor.groupby_n_ary == 43
         assert config.executor.broadcast_join_limit == 44
-        assert config.executor.shuffle_method == "tasks"
         assert config.cuda_stream_policy is None
 
 
@@ -498,12 +418,6 @@ def test_default_executor() -> None:
     assert config.executor.name == "streaming"
 
 
-def test_default_runtime() -> None:
-    config = ConfigOptions.from_polars_engine(pl.GPUEngine())
-    assert config.executor.name == "streaming"
-    assert config.executor.runtime == "tasks"
-
-
 @pytest.mark.parametrize(
     "memory_resource, memory_resource_config",
     [
@@ -537,10 +451,7 @@ def test_memory_resource(memory_resource, memory_resource_config) -> None:
         if memory_resource is None and memory_resource_config is None:
             # The default case: We make a new RMM MR, whose type depends on the GPU's features.
 
-            if _is_concurrent_managed_access_supported():
-                assert isinstance(result, rmm.mr.PrefetchResourceAdaptor)
-            else:
-                assert isinstance(result, rmm.mr.CudaAsyncMemoryResource)
+            assert isinstance(result, rmm.mr.CudaAsyncMemoryResource)
 
         elif memory_resource is None:
             # Configured through memory_resource_config
@@ -608,21 +519,17 @@ def test_cuda_stream_pool():
 def test_cuda_stream_policy_default(monkeypatch: pytest.MonkeyPatch) -> None:
     # Default from engine
     config = ConfigOptions.from_polars_engine(pl.GPUEngine())
-    assert config.cuda_stream_policy is None
+    assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig)
 
-    config = ConfigOptions.from_polars_engine(
-        pl.GPUEngine(executor_options={"runtime": "tasks"})
-    )
-    assert config.cuda_stream_policy is None
+    config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming"))
+    assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig)
 
     # Default from env
     monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default")
     config = ConfigOptions.from_polars_engine(pl.GPUEngine())
     assert config.cuda_stream_policy is None
 
-    config = ConfigOptions.from_polars_engine(
-        pl.GPUEngine(executor_options={"runtime": "tasks"})
-    )
+    config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming"))
     assert config.cuda_stream_policy is None
 
 
@@ -635,26 +542,19 @@ def test_default_cuda_stream_policy(monkeypatch: pytest.MonkeyPatch) -> None:
     assert isinstance(result, CUDAStreamPoolConfig)
 
 
-def test_cuda_stream_policy_from_config(*, rapidsmpf_single_available: bool) -> None:
+def test_cuda_stream_policy_from_config() -> None:
     engine = pl.GPUEngine(
         executor="streaming",
-        executor_options={"runtime": "rapidsmpf"},
         cuda_stream_policy={
             "pool_size": 32,
             "flags": rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING,
         },
     )
-    if rapidsmpf_single_available:
-        config = ConfigOptions.from_polars_engine(engine)
-        assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig)
-        assert config.cuda_stream_policy.pool_size == 32
-        assert (
-            config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING
-        )
-        config.cuda_stream_policy.build().get_stream()  # no exception
-    else:
-        with pytest.raises(ValueError, match="The rapidsmpf streaming engine"):
-            ConfigOptions.from_polars_engine(engine)
+    config = ConfigOptions.from_polars_engine(engine)
+    assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig)
+    assert config.cuda_stream_policy.pool_size == 32
+    assert config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING
+    config.cuda_stream_policy.build().get_stream()  # no exception
 
 
 @pytest.mark.parametrize(
@@ -667,26 +567,19 @@ def test_cuda_stream_policy_from_config(*, rapidsmpf_single_available: bool) ->
         '{"pool_size": 32}',
     ],
 )
-def test_cuda_stream_policy_from_env(
-    monkeypatch: pytest.MonkeyPatch, env: str, *, rapidsmpf_single_available: bool
-) -> None:
+def test_cuda_stream_policy_from_env(monkeypatch: pytest.MonkeyPatch, env: str) -> None:
     monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", env)
-    runtime = "tasks" if env == "default" else "rapidsmpf"
-    engine = pl.GPUEngine(executor="streaming", executor_options={"runtime": runtime})
-    if runtime == "rapidsmpf" and rapidsmpf_single_available:
-        config = ConfigOptions.from_polars_engine(engine)
+    engine = pl.GPUEngine(executor="streaming")
+    config = ConfigOptions.from_polars_engine(engine)
+    if env == "default":
+        assert config.cuda_stream_policy is None
+    else:
         assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig)
         if env == "pool":
             assert config.cuda_stream_policy.pool_size == 16
             assert config.cuda_stream_policy.flags == CudaStreamFlags.NON_BLOCKING
         else:
             assert config.cuda_stream_policy.pool_size == 32
-    elif runtime == "rapidsmpf":
-        with pytest.raises(ValueError, match="The rapidsmpf streaming engine"):
-            ConfigOptions.from_polars_engine(engine)
-    else:
-        config = ConfigOptions.from_polars_engine(engine)
-        assert config.cuda_stream_policy is None
 
 
 def test_cuda_stream_policy_from_env_invalid(monkeypatch: pytest.MonkeyPatch):
@@ -696,41 +589,26 @@ def test_cuda_stream_policy_from_env_invalid(monkeypatch: pytest.MonkeyPatch):
 
 
 def test_cuda_stream_policy_default_rapidsmpf(monkeypatch: pytest.MonkeyPatch) -> None:
-    pytest.importorskip("rapidsmpf")
-
     # Default from engine
-    config = ConfigOptions.from_polars_engine(
-        pl.GPUEngine(executor_options={"runtime": "rapidsmpf"})
-    )
+    config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming"))
     assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig)
     assert config.cuda_stream_policy.pool_size == 16
     assert config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING
 
     # "default" user argument overrides pool default
     monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default")
-    config = ConfigOptions.from_polars_engine(
-        pl.GPUEngine(executor_options={"runtime": "rapidsmpf"})
-    )
+    config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming"))
     assert config.cuda_stream_policy is None
 
 
-@pytest.mark.parametrize(
-    "polars_kwargs",
-    [
-        {"executor": "in-memory"},
-        {"executor": "streaming", "executor_options": {"runtime": "tasks"}},
-    ],
-)
-def test_cuda_stream_policy_pool_only_supported_by_rapidsmpf(
-    polars_kwargs: dict[str, Any],
-) -> None:
+def test_cuda_stream_policy_pool_in_memory_unsupported() -> None:
     with pytest.raises(
         ValueError,
-        match="A stream pool is only supported by the rapidsmpf runtime.",
+        match="A stream pool is only supported by the streaming executor.",
     ):
         ConfigOptions.from_polars_engine(
             pl.GPUEngine(
-                **polars_kwargs,
+                executor="in-memory",
                 cuda_stream_policy={"pool_size": 32, "flags": "NON_BLOCKING"},
             )
         )
@@ -903,8 +781,3 @@ def test_dask_sink_to_directory_false_raises() -> None:
         ValueError, match="The dask cluster requires sink_to_directory=True"
     ):
         StreamingExecutor(cluster=Cluster.DASK, sink_to_directory=False)
-
-
-def test_get_dask_cuda_stream() -> None:
-    stream = get_dask_cuda_stream()
-    assert stream is not None
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index e3e788f2866..a655efbe422 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -718,7 +718,7 @@ def test_scan_parquet_zero_width_with_limit(
 ):
     request.applymarker(
         pytest.mark.xfail(
-            is_streaming_engine(engine) and custom_engine is None,
+            is_streaming_engine(engine) or custom_engine is not None,
             reason="https://github.com/rapidsai/cudf/issues/21644",
         )
     )
diff --git a/python/cudf_polars/tests/test_sink.py b/python/cudf_polars/tests/test_sink.py
index 7b69f6904b4..d23559d2134 100644
--- a/python/cudf_polars/tests/test_sink.py
+++ b/python/cudf_polars/tests/test_sink.py
@@ -157,6 +157,7 @@ def test_chunked_sink_empty_table_to_parquet(tmp_path):
         pl.LazyFrame(),
         tmp_path / "out.parquet",
         engine=pl.GPUEngine(
+            executor="in-memory",
             raise_on_fail=True,
             parquet_options={"chunked": True, "n_output_chunks": 2},
         ),
diff --git a/python/cudf_polars/tests/test_tracing.py b/python/cudf_polars/tests/test_tracing.py
index 184c0a77d38..283ca361682 100644
--- a/python/cudf_polars/tests/test_tracing.py
+++ b/python/cudf_polars/tests/test_tracing.py
@@ -55,9 +55,10 @@ def test_trace_basic(
     assert b"frames_input" in result
     assert b"total_bytes_output" in result
     assert b"total_bytes_input" in result
-    assert b"rmm_total_bytes_output" in result
-    assert b"rmm_total_bytes_input" in result
-    assert b"rmm_current_bytes_output" in result
+    # TODO: With rapidsmpf are the rmm fields not supposed to be logged?
+    assert b"rmm_total_bytes_output" not in result
+    assert b"rmm_total_bytes_input" not in result
+    assert b"rmm_current_bytes_output" not in result
     assert b"overhead_duration" in result
 
 
@@ -79,10 +80,6 @@ def test_import_without_structlog() -> None:
     subprocess.check_call([sys.executable, "-c", code])
 
 
-@pytest.mark.skipif(
-    cudf_polars.testing.asserts.DEFAULT_RUNTIME != "rapidsmpf",
-    reason="Requires 'rapidsmpf' runtime.",
-)
 def test_log_query_plan() -> None:
     """Test that log_query_plan emits a Query Plan event."""
     import os
@@ -98,7 +95,6 @@ def test_log_query_plan() -> None:
         executor="streaming",
         executor_options={
             "cluster": "single",
-            "runtime": "rapidsmpf",
             "max_rows_per_partition": 5,
         },
         memory_resource=rmm.mr.ManagedMemoryResource(),
@@ -126,7 +122,6 @@ def test_log_query_plan() -> None:
     reason="Requires CUDF_POLARS_LOG_TRACES=1.",
 )
 def test_sets_cudf_polars_query_id():
-    pytest.importorskip("rapidsmpf")
     left = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
     right = pl.LazyFrame({"a": [1, 2, 3], "c": [7, 8, 9]})
 
@@ -136,7 +131,6 @@ def test_sets_cudf_polars_query_id():
     engine = pl.GPUEngine(
         executor="streaming",
         raise_on_fail=True,
-        executor_options={"runtime": "rapidsmpf"},
     )
 
     with structlog.testing.capture_logs(
diff --git a/python/cudf_polars/tests/testing/test_engine_utils.py b/python/cudf_polars/tests/testing/test_engine_utils.py
index faf113502d6..346a11acf2e 100644
--- a/python/cudf_polars/tests/testing/test_engine_utils.py
+++ b/python/cudf_polars/tests/testing/test_engine_utils.py
@@ -3,8 +3,6 @@
 
 from __future__ import annotations
 
-import pytest
-
 from cudf_polars.testing.engine_utils import (
     EngineFixtureParam,
     create_streaming_options,
@@ -30,7 +28,6 @@ def test_engine_fixture_param_small_blocksize():
 
 
 def test_create_streaming_options_medium():
-    pytest.importorskip("rapidsmpf")
     opts = create_streaming_options("medium")
     assert opts.max_rows_per_partition == 50
     assert opts.target_partition_size == 1_000_000
@@ -38,7 +35,6 @@ def test_create_streaming_options_medium():
 
 
 def test_create_streaming_options_small():
-    pytest.importorskip("rapidsmpf")
     opts = create_streaming_options("small")
     assert opts.max_rows_per_partition == 4
     assert opts.target_partition_size == 10
@@ -46,7 +42,6 @@ def test_create_streaming_options_small():
 
 def test_create_streaming_options_overrides_merge():
     """Overrides take precedence over the blocksize baseline."""
-    pytest.importorskip("rapidsmpf")
     from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
 
     overrides = StreamingOptions(max_rows_per_partition=999)

From 65df1061882b16b1c5e4696fe2dedda432be4ca8 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 6 May 2026 20:00:13 -0700
Subject: [PATCH 09/12] Use thread pool to submit hybrid scan host IO tasks
 (#21992)

This PR uses the host worker pool to submit hybrid scan's host-read IO tasks so that the mutex can be safely released after submission.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/21992
---
 cpp/src/io/parquet/io_utils/parquet_io_utils.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp b/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp
index 3e67b49d03e..9b6953b4bd1 100644
--- a/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp
+++ b/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp
@@ -6,6 +6,7 @@
 #include "io/comp/common.hpp"
 #include "io/parquet/parquet_common.hpp"
 
+#include <cudf/detail/utilities/host_worker_pool.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/io/datasource.hpp>
 #include <cudf/io/parquet.hpp>
@@ -16,8 +17,8 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <cuda/iterator>
 #include <cuda/std/tuple>
-#include <thrust/iterator/zip_iterator.h>
 
 #include <numeric>
 
@@ -118,8 +119,7 @@ fetch_byte_ranges_to_device_async(
   stream.synchronize();
 
   {
-    auto iter =
-      thrust::make_zip_iterator(io_offsets.begin(), io_sizes.begin(), destinations.begin());
+    auto iter = cuda::make_zip_iterator(io_offsets.begin(), io_sizes.begin(), destinations.begin());
 
     std::lock_guard<std::mutex> lock(mutex);
 
@@ -128,16 +128,14 @@ fetch_byte_ranges_to_device_async(
       auto const io_size   = cuda::std::get<1>(tuple);
       auto const dest      = cuda::std::get<2>(tuple);
 
-      // Directly read the column chunk data to the device
-      // buffer if supported
+      // Directly read the column chunk data to the device buffer if supported
       if (datasource.supports_device_read() and datasource.is_device_read_preferred(io_size)) {
         device_read_tasks.emplace_back(
           datasource.device_read_async(io_offset, io_size, dest, stream));
       } else {
-        // Read the column chunk data to the host buffer and
-        // copy it to the device buffer
-        host_read_tasks.emplace_back(
-          std::async(std::launch::deferred, [&datasource, io_offset, io_size, dest, stream]() {
+        // Read the column chunk data to the host buffer copy it to the device buffer
+        host_read_tasks.emplace_back(cudf::detail::host_worker_pool().submit_task(
+          [&datasource, io_offset, io_size, dest, stream]() {
             auto host_buffer = datasource.host_read(io_offset, io_size);
             cudf::detail::cuda_memcpy_async(
               cudf::device_span<uint8_t>{dest, io_size},

From 8d76fc287ea0474f4f66a60f86368aa604a9b4bd Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 6 May 2026 23:03:11 -0700
Subject: [PATCH 10/12] Python bindings and pytests for
 `cudf::apply_deletion_mask` (#22145)

Follow up #22144

Adds Python bindings for the `cudf::apply_deletion_mask` API and adds pytests for stream compaction.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Bradley Dice (https://github.com/bdice)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/22145
---
 .../libcudf/lists/stream_compaction.pxd       |  7 ++
 .../pylibcudf/libcudf/stream_compaction.pxd   |  7 ++
 python/pylibcudf/pylibcudf/lists.pxd          |  7 ++
 python/pylibcudf/pylibcudf/lists.pyi          |  6 ++
 python/pylibcudf/pylibcudf/lists.pyx          | 42 +++++++++++
 .../pylibcudf/pylibcudf/stream_compaction.pxd |  7 ++
 .../pylibcudf/pylibcudf/stream_compaction.pyi |  6 ++
 .../pylibcudf/pylibcudf/stream_compaction.pyx | 36 ++++++++++
 .../pylibcudf/tests/test_stream_compaction.py | 69 +++++++++++++++++++
 9 files changed, 187 insertions(+)
 create mode 100644 python/pylibcudf/tests/test_stream_compaction.py

diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
index 0187642e0c7..7514f9d159a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
@@ -19,6 +19,13 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
+    cdef unique_ptr[column] apply_deletion_mask(
+        const lists_column_view& lists_column,
+        const lists_column_view& deletion_mask,
+        cudaStream_t stream,
+        device_async_resource_ref mr
+    ) except +libcudf_exception_handler
+
     cdef unique_ptr[column] distinct(
         const lists_column_view& lists_column,
         null_equality nulls_equal,
diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
index 9f8686da472..9b5f6d287f3 100644
--- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
@@ -48,6 +48,13 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         device_async_resource_ref mr
     ) except +libcudf_exception_handler
 
+    cdef unique_ptr[table] apply_deletion_mask(
+        table_view source_table,
+        column_view deletion_mask,
+        cudaStream_t stream,
+        device_async_resource_ref mr
+    ) except +libcudf_exception_handler
+
     cdef unique_ptr[table] unique(
         table_view input,
         vector[size_type] keys,
diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd
index 88b09c01531..75db812de14 100644
--- a/python/pylibcudf/pylibcudf/lists.pxd
+++ b/python/pylibcudf/pylibcudf/lists.pxd
@@ -150,6 +150,13 @@ cpdef Column apply_boolean_mask(
     DeviceMemoryResource mr=*,
 )
 
+cpdef Column apply_deletion_mask(
+    Column,
+    Column,
+    object stream=*,
+    DeviceMemoryResource mr=*,
+)
+
 cpdef Column distinct(
     Column,
     null_equality,
diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi
index 1e418b59726..6ff27345854 100644
--- a/python/pylibcudf/pylibcudf/lists.pyi
+++ b/python/pylibcudf/pylibcudf/lists.pyi
@@ -131,6 +131,12 @@ def apply_boolean_mask(
     stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
+def apply_deletion_mask(
+    input: Column,
+    deletion_mask: Column,
+    stream: CudaStreamLike | None = None,
+    mr: DeviceMemoryResource | None = None,
+) -> Column: ...
 def distinct(
     input: Column,
     nulls_equal: NullEquality,
diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
index fd05242e44f..fbc07eebb8a 100644
--- a/python/pylibcudf/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -32,6 +32,7 @@ from pylibcudf.libcudf.lists.sorting cimport (
 )
 from pylibcudf.libcudf.lists.stream_compaction cimport (
     apply_boolean_mask as cpp_apply_boolean_mask,
+    apply_deletion_mask as cpp_apply_deletion_mask,
     distinct as cpp_distinct,
 )
 from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option
@@ -61,6 +62,7 @@ __all__ = [
     "ConcatenateNullPolicy",
     "DuplicateFindOption",
     "apply_boolean_mask",
+    "apply_deletion_mask",
     "concatenate_list_elements",
     "concatenate_rows",
     "contains",
@@ -833,6 +835,46 @@ cpdef Column apply_boolean_mask(
     return Column.from_libcudf(move(c_result), _stream, mr)
 
 
+cpdef Column apply_deletion_mask(
+    Column input,
+    Column deletion_mask,
+    object stream=None,
+    DeviceMemoryResource mr=None,
+):
+    """Filters elements in each row of the input lists column using a deletion mask.
+
+    For details, see :cpp:func:`apply_deletion_mask`.
+
+    Parameters
+    ----------
+    input : Column
+        The input lists column.
+    deletion_mask : Column
+        A lists-of-bools column used as a deletion mask.
+
+    Returns
+    -------
+    Column
+        Lists column with elements removed where deletion_mask is true.
+    """
+    cdef unique_ptr[column] c_result
+    cdef ListsColumnView list_view = input.list_view()
+    cdef ListsColumnView mask_view = deletion_mask.list_view()
+
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
+    mr = _get_memory_resource(mr)
+
+    with nogil:
+        c_result = cpp_apply_deletion_mask(
+            list_view.view(),
+            mask_view.view(),
+            _cs,
+            mr.get_mr(),
+        )
+    return Column.from_libcudf(move(c_result), _stream, mr)
+
+
 cpdef Column distinct(
     Column input,
     null_equality nulls_equal,
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd
index 6e904e11ce1..ffe36cebfbd 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd
@@ -37,6 +37,13 @@ cpdef Table apply_boolean_mask(
     DeviceMemoryResource mr = *,
 )
 
+cpdef Table apply_deletion_mask(
+    Table source_table,
+    Column deletion_mask,
+    object stream = *,
+    DeviceMemoryResource mr = *,
+)
+
 cpdef Table unique(
     Table input,
     list keys,
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi
index afdd692dde2..76e669f8995 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyi
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi
@@ -37,6 +37,12 @@ def apply_boolean_mask(
     stream: CudaStreamLike | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
+def apply_deletion_mask(
+    source_table: Table,
+    deletion_mask: Column,
+    stream: CudaStreamLike | None = None,
+    mr: DeviceMemoryResource | None = None,
+) -> Table: ...
 def unique(
     input: Table,
     keys: list[int],
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx
index b4751078acb..2fe8705ea52 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyx
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx
@@ -29,6 +29,7 @@ from cuda.bindings.cyruntime cimport cudaStream_t
 __all__ = [
     "DuplicateKeepOption",
     "apply_boolean_mask",
+    "apply_deletion_mask",
     "distinct",
     "distinct_indices",
     "drop_nans",
@@ -151,6 +152,41 @@ cpdef Table apply_boolean_mask(
     return Table.from_libcudf(move(c_result), _stream, mr)
 
 
+cpdef Table apply_deletion_mask(
+    Table source_table,
+    Column deletion_mask,
+    object stream=None,
+    DeviceMemoryResource mr=None,
+):
+    """Filters out rows from the input table using a deletion mask.
+
+    For details, see :cpp:func:`apply_deletion_mask`.
+
+    Parameters
+    ----------
+    source_table : Table
+        The input table to filter.
+    deletion_mask : Column
+        A boolean column used as a deletion mask.
+
+    Returns
+    -------
+    Table
+        Table with rows removed where deletion_mask is true.
+    """
+    cdef unique_ptr[table] c_result
+
+    cdef Stream _stream = _get_stream(stream)
+    cdef cudaStream_t _cs = _stream.view().value()
+    mr = _get_memory_resource(mr)
+
+    with nogil:
+        c_result = cpp_stream_compaction.apply_deletion_mask(
+            source_table.view(), deletion_mask.view(), _cs, mr.get_mr()
+        )
+    return Table.from_libcudf(move(c_result), _stream, mr)
+
+
 cpdef Table unique(
     Table input,
     list keys,
diff --git a/python/pylibcudf/tests/test_stream_compaction.py b/python/pylibcudf/tests/test_stream_compaction.py
new file mode 100644
index 00000000000..ccf21c2a6b3
--- /dev/null
+++ b/python/pylibcudf/tests/test_stream_compaction.py
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+
+import pyarrow as pa
+import pytest
+from utils import assert_column_eq, assert_table_eq
+
+import pylibcudf as plc
+
+
+@pytest.fixture
+def lists_column_and_mask():
+    pa_input = pa.array(
+        [[0, 1, 2, 3], [4, 5], [6, 7, 8, 9]], type=pa.list_(pa.int32())
+    )
+    pa_mask = pa.array(
+        [
+            [True, False, True, False],
+            [True, False],
+            [True, False, True, False],
+        ],
+        type=pa.list_(pa.bool_()),
+    )
+    return pa_input, pa_mask
+
+
+def test_lists_apply_boolean_mask(lists_column_and_mask):
+    pa_input, pa_mask = lists_column_and_mask
+    result = plc.lists.apply_boolean_mask(
+        plc.Column.from_arrow(pa_input), plc.Column.from_arrow(pa_mask)
+    )
+    expected = pa.array([[0, 2], [4], [6, 8]], type=pa.list_(pa.int32()))
+    assert_column_eq(expected, result)
+
+
+def test_lists_apply_deletion_mask(lists_column_and_mask):
+    pa_input, pa_mask = lists_column_and_mask
+    result = plc.lists.apply_deletion_mask(
+        plc.Column.from_arrow(pa_input), plc.Column.from_arrow(pa_mask)
+    )
+    expected = pa.array([[1, 3], [5], [7, 9]], type=pa.list_(pa.int32()))
+    assert_column_eq(expected, result)
+
+
+def test_apply_boolean_mask():
+    pa_table = pa.table(
+        {
+            "a": pa.array([10, 40, 70, 5, 2, 10], type=pa.int32()),
+            "b": pa.array([10, 40, 70, 5, 2, 10], type=pa.float64()),
+        }
+    )
+    pa_mask = pa.array(
+        [True, False, True, False, True, False], type=pa.bool_()
+    )
+    result = plc.stream_compaction.apply_boolean_mask(
+        plc.Table.from_arrow(pa_table), plc.Column.from_arrow(pa_mask)
+    )
+    expected = pa_table.filter(pa_mask)
+    assert_table_eq(expected, result)
+
+
+def test_apply_deletion_mask():
+    pa_table = pa.table({"a": pa.array([1, 2, 3, 4, 5], type=pa.int32())})
+    pa_mask = pa.array([True, False, True, False, True], type=pa.bool_())
+    result = plc.stream_compaction.apply_deletion_mask(
+        plc.Table.from_arrow(pa_table), plc.Column.from_arrow(pa_mask)
+    )
+    expected = pa.table({"a": pa.array([2, 4], type=pa.int32())})
+    assert_table_eq(expected, result)

From c9ad1c58ab2626b788d7fc9a03e027e89cf210be Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Thu, 7 May 2026 08:52:41 -0500
Subject: [PATCH 11/12] Refactor ``sort_actor`` to prepare for ``OrderScheme``
 changes (#22350)

- Follow up to https://github.com/rapidsai/cudf/pull/22315 - Further revises `sort_actor` in preparation for https://github.com/rapidsai/rapidsmpf/pull/853
- Part of https://github.com/rapidsai/cudf/issues/22128
- Breaks apart `sort_actor` logic into modular steps, so we can avoid collecting boundaries when we already know the boundaries (future work).

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/22350
---
 .../rapidsmpf/collectives/sort.py             | 129 +++++++++++++-----
 1 file changed, 94 insertions(+), 35 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py
index ffc10ea44c2..a950df3ce34 100644
--- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py
+++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py
@@ -337,16 +337,25 @@ async def _receive_and_buffer_chunks(
     return local_candidates_list
 
 
+async def _forward_from_chunk_store(
+    context: Context, ch_out: Channel[TableChunk], chunk_store: ChunkStore
+) -> None:
+    """Forward buffered messages from a ChunkStore into a channel."""
+    for msg in chunk_store:
+        await ch_out.send(context, msg)
+    await ch_out.drain(context)
+
+
 async def _insert_chunks_into_shuffle(
     context: Context,
     comm: Communicator,
+    ir: Sort,
+    ir_context: IRExecutionContext,
+    ch_in: Channel[TableChunk],
     num_partitions: int,
     collective_ids: list[int],
     metadata_in: ChannelMetadata,
-    chunk_store: ChunkStore,
     sort_boundaries_df: DataFrame,
-    ir: Sort,
-    ir_context: IRExecutionContext,
     by: list[str],
 ) -> tuple[ShuffleManager, Sort]:
     """Create shuffle manager and insert each buffered chunk with sort-based splits."""
@@ -364,7 +373,7 @@ async def _insert_chunks_into_shuffle(
         partition_assignment=PartitionAssignment.CONTIGUOUS,
     )
     async with shuffle.inserting() as inserter:
-        for msg in chunk_store:
+        while (msg := await ch_in.recv(context)) is not None:
             if skip_insert:
                 continue
             seq_num = msg.sequence_number
@@ -379,6 +388,8 @@ async def _insert_chunks_into_shuffle(
                 upstreams=(available_chunk.stream, sort_boundaries_df.stream),
             )
 
+            # TODO: Pre-sort chunks if they do not originate from the ChunkStore.
+            # (Not possible until we use _global_sort outside of sort_actor.)
             splits = find_sort_splits(
                 sort_cols_tbl,
                 sort_boundaries_df.table,
@@ -453,6 +464,52 @@ async def _extract_partitions_and_send(
     await ch_out.drain(context)
 
 
+async def _global_sort(
+    context: Context,
+    comm: Communicator,
+    ir: Sort,
+    ir_context: IRExecutionContext,
+    ch_out: Channel[TableChunk],
+    ch_in: Channel[TableChunk],
+    metadata_in: ChannelMetadata,
+    by: list[str],
+    num_partitions: int,
+    sort_boundaries_df: DataFrame,
+    collective_ids: list[int],
+    *,
+    tracer: ActorTracer | None,
+) -> None:
+    """Global sort."""
+    # TODO: Attach OrderScheme metadata here.
+    output_metadata = ChannelMetadata(
+        local_count=max(1, num_partitions // comm.nranks),
+        partitioning=Partitioning(inter_rank=None, local="inherit"),
+    )
+    await send_metadata(ch_out, context, output_metadata)
+
+    shuffle, post_sort_ir = await _insert_chunks_into_shuffle(
+        context,
+        comm,
+        ir,
+        ir_context,
+        ch_in,
+        num_partitions,
+        collective_ids,
+        metadata_in,
+        sort_boundaries_df,
+        by,
+    )
+    await _extract_partitions_and_send(
+        context,
+        ch_out,
+        shuffle,
+        post_sort_ir,
+        ir_context,
+        ir.schema,
+        tracer=tracer,
+    )
+
+
 @define_actor()
 async def sort_actor(
     context: Context,
@@ -467,10 +524,18 @@ async def sort_actor(
     collective_ids: list[int],
 ) -> None:
     """Streaming sort actor."""
-    ch_replay = context.create_channel()
+    ch_sample_replay = context.create_channel()
+    ch_chunk_store = context.create_channel()
     async with shutdown_on_error(
-        context, ch_in, ch_out, ch_replay, trace_ir=ir, ir_context=ir_context
+        context,
+        ch_in,
+        ch_out,
+        ch_sample_replay,
+        ch_chunk_store,
+        trace_ir=ir,
+        ir_context=ir_context,
     ) as tracer:
+        # TODO: Skip sort if OrderScheme metadata is present and compatible.
         metadata_in = await recv_metadata(ch_in, context)
 
         if ir.zlice is not None:
@@ -494,20 +559,19 @@ async def sort_actor(
             context, comm, ch_in, num_partitions, metadata_in, executor, collective_ids
         )
 
-        output_metadata = ChannelMetadata(
-            local_count=max(1, num_partitions // comm.nranks),
-            partitioning=Partitioning(inter_rank=None, local="inherit"),
-        )
-        await send_metadata(ch_out, context, output_metadata)
-
         chunk_store = ChunkStore(context)
         _, local_candidates_list = await gather_in_task_group(
             replay_buffered_channel(
-                context, ch_replay, ch_in, sampled_chunks, metadata_in, trace_ir=ir
+                context,
+                ch_sample_replay,
+                ch_in,
+                sampled_chunks,
+                metadata_in,
+                trace_ir=ir,
             ),
             _receive_and_buffer_chunks(
                 context,
-                ch_replay,
+                ch_sample_replay,
                 chunk_store,
                 ir,
                 by,
@@ -529,27 +593,22 @@ async def sort_actor(
             collective_ids.pop() if need_allgather else None,
         )
 
-        shuffle, post_sort_ir = await _insert_chunks_into_shuffle(
-            context,
-            comm,
-            num_partitions,
-            collective_ids,
-            metadata_in,
-            chunk_store,
-            sort_boundaries_df,
-            ir,
-            ir_context,
-            by,
-        )
-
-        await _extract_partitions_and_send(
-            context,
-            ch_out,
-            shuffle,
-            post_sort_ir,
-            ir_context,
-            ir.schema,
-            tracer=tracer,
+        await gather_in_task_group(
+            _forward_from_chunk_store(context, ch_chunk_store, chunk_store),
+            _global_sort(
+                context,
+                comm,
+                ir,
+                ir_context,
+                ch_out,
+                ch_chunk_store,
+                metadata_in,
+                by,
+                num_partitions,
+                sort_boundaries_df,
+                collective_ids,
+                tracer=tracer,
+            ),
         )
 
 

From 16c6356f094b895afaf26887aeac9300c003c9b0 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Thu, 7 May 2026 21:07:24 +0200
Subject: [PATCH 12/12] Run the cudf-polars test suite against `DaskEngine` and
 `RayEngine` (#22381)

Builds on the cached `streaming_engines` fixture from #22364, which amortizes SPMD bootstrap via `_reset()`, and extends the same pattern to Dask and Ray.

With this change, the test matrix runs against:

`["in-memory", "spmd", "spmd-small", "dask", "ray"]`

subject to package availability and `rrun` gating.

We might change the different setups later, but for now CI runs:

| Engine        | Block Size(s)         | GPU Configuration |
|----------------|-----------------------|-------------------|
| `SPMDEngine`   | `"medium"`, `"small"` | Single GPU        |
| `DaskEngine`   | `"medium"`            | Single GPU        |
| `RayEngine`    | `"medium"`            | Two GPUs          |

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Bradley Dice (https://github.com/bdice)
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/22381
---
 .github/workflows/pr.yaml                     |  1 +
 .github/workflows/test.yaml                   |  1 +
 ci/run_cudf_polars_experimental_pytests.sh    |  2 +-
 ci/test_cudf_polars_experimental.sh           |  2 +-
 dependencies.yaml                             | 13 ++++
 .../cudf_polars/experimental/join.py          | 20 +++---
 .../cudf_polars/testing/engine_utils.py       | 43 +++++++++++
 python/cudf_polars/pyproject.toml             |  3 +
 python/cudf_polars/tests/conftest.py          | 72 +++++++++++++++++--
 .../experimental/test_all_gather_host_data.py |  2 -
 .../tests/experimental/test_dataframescan.py  | 21 +++---
 .../tests/experimental/test_filter.py         |  9 +--
 .../tests/experimental/test_groupby.py        |  8 ++-
 .../tests/experimental/test_io_multirank.py   | 48 +++----------
 .../tests/experimental/test_join.py           | 52 ++++++++------
 .../tests/experimental/test_metadata.py       | 22 ++++--
 .../tests/experimental/test_parallel.py       |  4 +-
 .../tests/experimental/test_rolling.py        | 13 +++-
 .../tests/experimental/test_select.py         | 25 ++++---
 .../tests/experimental/test_spilling.py       |  8 +--
 .../tests/experimental/test_statistics.py     | 53 +++-----------
 .../tests/experimental/test_unique.py         | 13 ++--
 22 files changed, 266 insertions(+), 169 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 41a4c734f21..844f20fe573 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -503,6 +503,7 @@ jobs:
       # (rapidsmpf compatibility already validated in rapidsmpf CI)
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
       build_type: pull-request
+      container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
       script: "ci/test_cudf_polars_experimental.sh"
   cudf-polars-polars-tests:
     needs: [wheel-build-cudf-polars, changed-files]
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index b4977f60def..a6b0b6f3326 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -174,6 +174,7 @@ jobs:
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
+      container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: "ci/test_cudf_polars_experimental.sh"
diff --git a/ci/run_cudf_polars_experimental_pytests.sh b/ci/run_cudf_polars_experimental_pytests.sh
index d0a4767bd99..da659c7b386 100755
--- a/ci/run_cudf_polars_experimental_pytests.sh
+++ b/ci/run_cudf_polars_experimental_pytests.sh
@@ -10,5 +10,5 @@ set -euo pipefail
 # Support invoking outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/
 
-echo "Running the full cudf-polars test suite with both the in-memory and spmd engine"
+echo "Running the full cudf-polars test suite"
 python -m pytest --cache-clear "$@" tests
diff --git a/ci/test_cudf_polars_experimental.sh b/ci/test_cudf_polars_experimental.sh
index aa3abd66254..4b796ff4b94 100755
--- a/ci/test_cudf_polars_experimental.sh
+++ b/ci/test_cudf_polars_experimental.sh
@@ -28,7 +28,7 @@ rapids-pip-retry install \
     -v \
     --prefer-binary \
     --constraint "${PIP_CONSTRAINT}" \
-    "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental]" \
+    "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental,ray]" \
     "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
     "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)"
 
diff --git a/dependencies.yaml b/dependencies.yaml
index b1eb276befb..f4acc169263 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -384,6 +384,14 @@ files:
       key: experimental
     includes:
       - run_cudf_polars_experimental
+  py_run_cudf_polars_ray:
+    output: pyproject
+    pyproject_dir: python/cudf_polars
+    extras:
+      table: project.optional-dependencies
+      key: ray
+    includes:
+      - depends_on_ray
   py_test_cudf_polars:
     output: pyproject
     pyproject_dir: python/cudf_polars
@@ -1290,6 +1298,11 @@ dependencies:
           - matrix:
             packages:
               - *rapidsmpf_unsuffixed
+  depends_on_ray:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - ray>=2.55.1
   depends_on_rapids_logger:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/cudf_polars/cudf_polars/experimental/join.py b/python/cudf_polars/cudf_polars/experimental/join.py
index cd5c514b45a..1682762c9e8 100644
--- a/python/cudf_polars/cudf_polars/experimental/join.py
+++ b/python/cudf_polars/cudf_polars/experimental/join.py
@@ -164,20 +164,22 @@ def _(
     left, pi_left = rec(left)
     right, pi_right = rec(right)
 
-    # Fallback to single partition on the smaller table
+    # Fallback to single partition on the smaller table whenever either
+    # side has more than one partition.
     left_count = pi_left[left].count
     right_count = pi_right[right].count
     output_count = max(left_count, right_count)
-    fallback_msg = "ConditionalJoin not supported for multiple partitions."
-    if left_count < right_count:
-        if left_count > 1 or dynamic_planning:
+    if output_count > 1 or dynamic_planning:
+        if left_count < right_count:
             left = Repartition(left.schema, left)
             pi_left[left] = PartitionInfo(count=1)
-            _fallback_inform(fallback_msg, config_options)
-    elif right_count > 1 or dynamic_planning:
-        right = Repartition(right.schema, right)
-        pi_right[right] = PartitionInfo(count=1)
-        _fallback_inform(fallback_msg, config_options)
+        else:
+            right = Repartition(right.schema, right)
+            pi_right[right] = PartitionInfo(count=1)
+        _fallback_inform(
+            "ConditionalJoin not supported for multiple partitions.",
+            config_options,
+        )
 
     # Reconstruct and return
     new_node = ir.reconstruct([left, right])
diff --git a/python/cudf_polars/cudf_polars/testing/engine_utils.py b/python/cudf_polars/cudf_polars/testing/engine_utils.py
index c36bcf2ed27..b0b640615f7 100644
--- a/python/cudf_polars/cudf_polars/testing/engine_utils.py
+++ b/python/cudf_polars/cudf_polars/testing/engine_utils.py
@@ -11,6 +11,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
+    from contextlib import AbstractContextManager
 
     import polars as pl
 
@@ -21,6 +22,15 @@
 STREAMING_ENGINE_FIXTURE_PARAMS: list[str] = []
 if importlib.util.find_spec("rapidsmpf") is not None:
     STREAMING_ENGINE_FIXTURE_PARAMS.extend(["spmd", "spmd-small"])
+    # ``DaskEngine`` and ``RayEngine`` both reject construction inside an
+    # ``rrun`` cluster.
+    from rapidsmpf.bootstrap import is_running_with_rrun as _is_running_with_rrun
+
+    if not _is_running_with_rrun():  # pragma: no cover
+        if importlib.util.find_spec("distributed") is not None:
+            STREAMING_ENGINE_FIXTURE_PARAMS.append("dask")
+        if importlib.util.find_spec("ray") is not None:
+            STREAMING_ENGINE_FIXTURE_PARAMS.append("ray")
 ALL_ENGINE_FIXTURE_PARAMS = ["in-memory", *STREAMING_ENGINE_FIXTURE_PARAMS]
 
 
@@ -63,6 +73,34 @@ def is_streaming_engine(obj: Any) -> bool:
     return isinstance(obj, StreamingEngine)
 
 
+def warns_on_spmd(  # pragma: no cover; rapidsmpf-only path
+    engine: Any,
+    *args: Any,
+    when: bool = True,
+    **kwargs: Any,
+) -> AbstractContextManager[Any]:
+    """
+    ``pytest.warns(*args, **kwargs)`` on SPMD; ``nullcontext`` otherwise.
+
+    ``pytest.warns`` only captures warnings emitted in the test process. On
+    multi-process backends (``DaskEngine``, ``RayEngine``) the fallback
+    warning fires on workers/actors and only appears in worker logs/stdout,
+    so the assertion is replaced with a passthrough on those backends.
+
+    The optional ``when`` kwarg lets callers compose an additional gate (e.g.
+    a parametrize value) without an outer ``if``.
+    """
+    import contextlib
+
+    import pytest
+
+    from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine
+
+    if when and isinstance(engine, SPMDEngine):
+        return pytest.warns(*args, **kwargs)
+    return contextlib.nullcontext()
+
+
 def create_streaming_options(
     blocksize_mode: Literal["medium", "small"],
     overrides: StreamingOptions | None = None,
@@ -87,6 +125,9 @@ def create_streaming_options(
     from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
     from cudf_polars.utils.config import StreamingFallbackMode
 
+    # ``allow_gpu_sharing=True`` is always set so the cached multi-rank
+    # engines (Dask workers, Ray actors with ``num_ranks > 1``) don't trip
+    # the UUID-collision guard on every ``_reset(...)``.
     match blocksize_mode:
         case "medium":
             baseline = StreamingOptions(
@@ -94,6 +135,7 @@ def create_streaming_options(
                 dynamic_planning={},
                 target_partition_size=1_000_000,
                 raise_on_fail=True,
+                allow_gpu_sharing=True,
             )
         case "small":
             baseline = StreamingOptions(
@@ -102,6 +144,7 @@ def create_streaming_options(
                 target_partition_size=10,
                 raise_on_fail=True,
                 fallback_mode=StreamingFallbackMode.SILENT,
+                allow_gpu_sharing=True,
             )
         case _:  # pragma: no cover
             raise ValueError(f"Unknown blocksize_mode: {blocksize_mode!r}")
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index 47633e42364..7703cad7dad 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -63,6 +63,9 @@ rapidsmpf = [
     "pyarrow>=19.0.0,<24",
     "rapidsmpf==26.6.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+ray = [
+    "ray>=2.55.1",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
 Homepage = "https://github.com/rapidsai/cudf"
diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py
index b3d83b36d36..65445b683ae 100644
--- a/python/cudf_polars/tests/conftest.py
+++ b/python/cudf_polars/tests/conftest.py
@@ -31,6 +31,12 @@
     StreamingEngines: TypeAlias = Mapping[str, StreamingEngine]
 
 
+# Number of ranks for multi-rank streaming engines that share one GPU
+# (currently ``RayEngine``). Single-GPU dev hosts and CI runners require
+# ``allow_gpu_sharing=True`` to oversubscribe one device across actors.
+NUM_RANKS = 2
+
+
 @pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"], scope="session")
 def with_nulls(request):
     return request.param
@@ -89,6 +95,27 @@ def streaming_engines() -> Generator[StreamingEngines, None, None]:
         )
 
     engines: dict[str, StreamingEngine] = {"spmd": SPMDEngine(comm=comm)}
+
+    if "dask" in STREAMING_ENGINE_FIXTURE_PARAMS:  # pragma: no cover
+        from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine
+
+        engines["dask"] = DaskEngine(engine_options={"allow_gpu_sharing": True})
+
+    if "ray" in STREAMING_ENGINE_FIXTURE_PARAMS:  # pragma: no cover
+        from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine
+
+        # Always pin ``num_ranks`` so the cached engine has a deterministic
+        # actor count regardless of how many GPUs the host happens to have;
+        # otherwise ``RayEngine`` defaults to ``get_num_gpus_in_ray_cluster()``
+        # and tests that depend on rank-count behavior (e.g. fast-count
+        # parquet, concat) become non-portable. Pinning ``num_ranks`` requires
+        # ``allow_gpu_sharing=True`` (production guard).
+        engines["ray"] = RayEngine(
+            num_ranks=NUM_RANKS,
+            engine_options={"allow_gpu_sharing": True},
+            ray_init_options={"include_dashboard": False},
+        )
+
     try:
         yield engines
     finally:
@@ -108,6 +135,28 @@ def spmd_engine(streaming_engines: StreamingEngines) -> SPMDEngine:
     return engine
 
 
+@pytest.fixture
+def spmd_engine_factory(
+    streaming_engines: StreamingEngines,
+) -> Callable[..., SPMDEngine]:
+    """
+    Return a factory that yields the shared :class:`SPMDEngine`.
+
+    Use this in place of :func:`streaming_engine_factory` for tests that
+    must run on SPMD only.
+    """
+    from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine
+
+    param = EngineFixtureParam(full_name="spmd")
+
+    def factory(options: StreamingOptions | None = None) -> SPMDEngine:
+        engine = build_streaming_engine(param, streaming_engines, options)
+        assert isinstance(engine, SPMDEngine)
+        return engine
+
+    return factory
+
+
 @pytest.fixture(params=STREAMING_ENGINE_FIXTURE_PARAMS)
 def _streaming_engine_param(request: pytest.FixtureRequest) -> EngineFixtureParam:
     """Parametrization helper to run tests for each streaming engine variant."""
@@ -246,10 +295,9 @@ def pytest_configure(config):
 
     config.addinivalue_line(
         "markers",
-        "skip_on_streaming_engine(reason): skip the test for streaming "
-        '``engine`` variants (e.g. ``"spmd"``, ``"spmd-small"``) while '
-        "still letting the in-memory variant run. Use this to track features "
-        "that have no multi-partition implementation",
+        "skip_on_streaming_engine(reason, *, engine=None): skip the test for "
+        'streaming ``engine`` variants (e.g. ``"spmd"``, ``"spmd-small"``, '
+        '``"dask"``, ``"ray"``) while still allowing the in-memory variant to run.',
     )
 
     # Ray's internal subprocess management leaks `/dev/null` file handles, and
@@ -275,9 +323,23 @@ def pytest_collection_modifyitems(items):
         callspec = getattr(item, "callspec", None)
         if callspec is None:
             continue
-        engine_param = callspec.params.get("_all_engine_param")
+        # Tests bind to either ``engine`` (parametrized via ``_all_engine_param``)
+        # or ``streaming_engine`` / ``streaming_engine_factory`` (parametrized via
+        # ``_streaming_engine_param``). Check both.
+        engine_param = callspec.params.get("_all_engine_param") or callspec.params.get(
+            "_streaming_engine_param"
+        )
         if engine_param is None or engine_param == "in-memory":
             continue
+        engine_filter = marker.kwargs.get("engine")
+        if engine_filter is not None:
+            if isinstance(engine_filter, str):
+                engine_filter = (engine_filter,)
+            # Strip the ``-small`` suffix so ``"spmd-small"`` matches
+            # ``engine=("spmd",)``.
+            engine_name = engine_param.removesuffix("-small")
+            if engine_name not in engine_filter:
+                continue
         reason = (
             marker.args[0]
             if marker.args
diff --git a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py
index 8f09a82c4bd..c85598a8c64 100644
--- a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py
+++ b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py
@@ -59,8 +59,6 @@ def test_gather_cluster_info(streaming_engine) -> None:
         assert isinstance(info.gpu_uuid, str)
     # Each rank runs in its own process.
     assert len({info.pid for info in infos}) == streaming_engine.nranks
-    # Without allow_gpu_sharing, all UUIDs must be unique (enforced at init).
-    assert len({info.gpu_uuid for info in infos}) == streaming_engine.nranks
 
 
 def test_cluster_info_cuda_visible_devices(monkeypatch) -> None:
diff --git a/python/cudf_polars/tests/experimental/test_dataframescan.py b/python/cudf_polars/tests/experimental/test_dataframescan.py
index dbf22848824..fb263e20b94 100644
--- a/python/cudf_polars/tests/experimental/test_dataframescan.py
+++ b/python/cudf_polars/tests/experimental/test_dataframescan.py
@@ -60,19 +60,20 @@ def test_parallel_dataframescan(df, streaming_engine_factory, max_rows_per_parti
         assert count == 1
 
 
-@pytest.mark.xfail(
-    reason=(
-        "Multi-rank Union interleaves child outputs across ranks: client "
-        "receives [rank0_A, rank0_B, rank1_A, rank1_B] instead of the "
-        "polars-CPU [A, B]. Tracked in "
-        "https://github.com/rapidsai/cudf/issues/22376."
-    ),
-    strict=False,
-)
-def test_dataframescan_concat(df, streaming_engine_factory):
+def test_dataframescan_concat(request, df, streaming_engine_factory):
     streaming_engine = streaming_engine_factory(
         StreamingOptions(max_rows_per_partition=1_000),
     )
+    if streaming_engine.nranks > 1:
+        # Multi-rank Union interleaves child outputs across ranks: client
+        # receives [rank0_A, rank0_B, rank1_A, rank1_B] instead of the
+        # polars-CPU [A, B].
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/22376",
+                strict=False,
+            )
+        )
     df2 = pl.concat([df, df])
     assert_gpu_result_equal(df2, engine=streaming_engine)
 
diff --git a/python/cudf_polars/tests/experimental/test_filter.py b/python/cudf_polars/tests/experimental/test_filter.py
index 4fb11df691c..b8b4fb2749c 100644
--- a/python/cudf_polars/tests/experimental/test_filter.py
+++ b/python/cudf_polars/tests/experimental/test_filter.py
@@ -9,12 +9,11 @@
 
 from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
 from cudf_polars.testing.asserts import assert_gpu_result_equal
+from cudf_polars.testing.engine_utils import warns_on_spmd
 
 
 @pytest.fixture
 def engine(streaming_engine_factory):
-    # ``fallback_mode="warn"`` overrides the small-blocksize baseline (which
-    # sets SILENT) so ``test_filter_non_pointwise`` can assert on the warning.
     return streaming_engine_factory(
         StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"),
     )
@@ -38,7 +37,9 @@ def test_filter_pointwise(df, engine):
 
 def test_filter_non_pointwise(df, engine):
     query = df.filter(pl.col("a") > pl.col("a").max())
-    with pytest.warns(
-        UserWarning, match="This filter is not supported for multiple partitions."
+    with warns_on_spmd(
+        engine,
+        UserWarning,
+        match="This filter is not supported for multiple partitions.",
     ):
         assert_gpu_result_equal(query, engine=engine)
diff --git a/python/cudf_polars/tests/experimental/test_groupby.py b/python/cudf_polars/tests/experimental/test_groupby.py
index 03d87fe23e9..6ca11387da0 100644
--- a/python/cudf_polars/tests/experimental/test_groupby.py
+++ b/python/cudf_polars/tests/experimental/test_groupby.py
@@ -131,8 +131,8 @@ def test_groupby_std_var_ddof(df, engine, agg, ddof):
 
 
 @pytest.mark.parametrize("fallback_mode", ["silent", "raise", "warn", "foo"])
-def test_groupby_fallback(df, fallback_mode, streaming_engine_factory):
-    streaming_engine = streaming_engine_factory(
+def test_groupby_fallback(df, fallback_mode, spmd_engine_factory):
+    streaming_engine = spmd_engine_factory(
         StreamingOptions(fallback_mode=fallback_mode),
     )
     match = "Failed to decompose groupby aggs"
@@ -287,6 +287,10 @@ def test_groupby_count_type_mismatch(df, streaming_engine_factory):
     assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False)
 
 
+@pytest.mark.skip_on_streaming_engine(
+    "patch.object on ShuffleManager.Inserter doesn't reach worker processes",
+    engine=("dask", "ray"),
+)
 def test_shuffle_reduce_insert_finished_called_on_oom(streaming_engine_factory):
     streaming_engine = streaming_engine_factory(
         StreamingOptions(target_partition_size=10, max_rows_per_partition=5),
diff --git a/python/cudf_polars/tests/experimental/test_io_multirank.py b/python/cudf_polars/tests/experimental/test_io_multirank.py
index 2208cc67316..bf9e8e70343 100644
--- a/python/cudf_polars/tests/experimental/test_io_multirank.py
+++ b/python/cudf_polars/tests/experimental/test_io_multirank.py
@@ -7,16 +7,15 @@
 from typing import TYPE_CHECKING
 
 import pytest
-from rapidsmpf.bootstrap import is_running_with_rrun
 
 import polars as pl
 
-from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine
+from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
 from cudf_polars.testing.asserts import assert_sink_result_equal
 from cudf_polars.utils.config import Cluster, StreamingExecutor
 
 if TYPE_CHECKING:
-    from collections.abc import Iterator
+    from collections.abc import Callable
     from pathlib import Path
 
     from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine
@@ -39,43 +38,14 @@ def df() -> pl.LazyFrame:
     )
 
 
-@pytest.fixture(params=["spmd", "ray", "dask"])
+@pytest.fixture
 def engine(
-    request: pytest.FixtureRequest,
-    spmd_engine: SPMDEngine,
-) -> Iterator[StreamingEngine]:
-    """Yield each supported streaming engine."""
-    backend = request.param
-    executor_options = {"max_rows_per_partition": 1_000}
-
-    if backend == "spmd":
-        with SPMDEngine(
-            comm=spmd_engine.comm,
-            executor_options=executor_options,
-        ) as eng:
-            yield eng
-        return
-
-    if is_running_with_rrun():
-        pytest.skip(f"{backend}Engine must not be created from within an rrun cluster")
-
-    if backend == "ray":
-        pytest.importorskip("ray", reason="ray is not installed")
-        from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine
-
-        with RayEngine(
-            executor_options=executor_options,
-            ray_init_options={"include_dashboard": False},
-        ) as eng:
-            yield eng
-        return
-
-    assert backend == "dask"
-    pytest.importorskip("distributed", reason="distributed is not installed")
-    from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine
-
-    with DaskEngine(executor_options=executor_options) as eng:
-        yield eng
+    streaming_engine_factory: Callable[..., StreamingEngine],
+) -> StreamingEngine:
+    """Yield each supported streaming engine pinned to small partitions."""
+    return streaming_engine_factory(
+        StreamingOptions(max_rows_per_partition=1_000),
+    )
 
 
 def test_sink_parquet_directory(
diff --git a/python/cudf_polars/tests/experimental/test_join.py b/python/cudf_polars/tests/experimental/test_join.py
index 6a09ff95ef5..1b4635dd924 100644
--- a/python/cudf_polars/tests/experimental/test_join.py
+++ b/python/cudf_polars/tests/experimental/test_join.py
@@ -19,6 +19,7 @@
 from cudf_polars.experimental.shuffle import Shuffle
 from cudf_polars.experimental.statistics import collect_statistics
 from cudf_polars.testing.asserts import assert_gpu_result_equal
+from cudf_polars.testing.engine_utils import warns_on_spmd
 from cudf_polars.utils.config import ConfigOptions, StreamingExecutor
 
 
@@ -103,12 +104,11 @@ def test_join_conditional(reverse, max_rows_per_partition, streaming_engine_fact
     if reverse:
         left, right = right, left
     q = left.join_where(right, pl.col("y") < pl.col("yy"))
-    if max_rows_per_partition == 3:
-        with pytest.warns(
-            UserWarning, match="ConditionalJoin not supported for multiple partitions."
-        ):
-            assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False)
-    else:
+    with warns_on_spmd(
+        streaming_engine,
+        UserWarning,
+        match="ConditionalJoin not supported for multiple partitions.",
+    ):
         assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False)
 
 
@@ -156,7 +156,7 @@ def test_join(left, right, how, reverse, streaming_engine_factory, options):
 
 
 @pytest.mark.parametrize("zlice", [(0, 2), (2, 2), (-2, None)])
-def test_join_and_slice(zlice, streaming_engine_factory):
+def test_join_and_slice(request, zlice, streaming_engine_factory):
     streaming_engine = streaming_engine_factory(
         StreamingOptions(
             max_rows_per_partition=3,
@@ -164,6 +164,16 @@ def test_join_and_slice(zlice, streaming_engine_factory):
             fallback_mode="warn",
         ),
     )
+    if streaming_engine.nranks > 1:
+        # The multi-rank fallback for slice doesn't preserve row order
+        # within equal-key groups, so the slice can pick different rows
+        # than the CPU baseline.
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/22405",
+                strict=False,
+            )
+        )
     left = pl.LazyFrame(
         {
             "a": [1, 2, 3, 1, None],
@@ -181,23 +191,22 @@ def test_join_and_slice(zlice, streaming_engine_factory):
     q = left.join(right, on="a", how="inner").slice(*zlice)
     # Check that we get the correct row count
     # See: https://github.com/rapidsai/cudf/issues/19153
-    if zlice in {(2, 2), (-2, None)}:
-        with pytest.warns(
-            UserWarning, match="This slice not supported for multiple partitions."
-        ):
-            assert q.collect(engine=streaming_engine).height == q.collect().height
-    else:
+    with warns_on_spmd(
+        streaming_engine,
+        UserWarning,
+        match="This slice not supported for multiple partitions.",
+        when=zlice in {(2, 2), (-2, None)},
+    ):
         assert q.collect(engine=streaming_engine).height == q.collect().height
 
     # Need sort to match order after a join
     q = left.join(right, on="a", how="inner").sort(pl.col("a")).slice(*zlice)
-    if zlice == (2, 2):
-        with pytest.warns(
-            UserWarning,
-            match="This slice not supported for multiple partitions.",
-        ):
-            assert_gpu_result_equal(q, engine=streaming_engine)
-    else:
+    with warns_on_spmd(
+        streaming_engine,
+        UserWarning,
+        match="This slice not supported for multiple partitions.",
+        when=zlice == (2, 2),
+    ):
         assert_gpu_result_equal(q, engine=streaming_engine)
 
 
@@ -232,7 +241,8 @@ def test_join_maintain_order_fallback_streaming(
     )
     q = left.join(right, on="y", how="inner", maintain_order=maintain_order)
 
-    with pytest.warns(
+    with warns_on_spmd(
+        streaming_engine,
         UserWarning,
         match=r"Join\(maintain_order=.*\) not supported for multiple partitions\.",
     ):
diff --git a/python/cudf_polars/tests/experimental/test_metadata.py b/python/cudf_polars/tests/experimental/test_metadata.py
index 618087a27c5..791e33744cd 100644
--- a/python/cudf_polars/tests/experimental/test_metadata.py
+++ b/python/cudf_polars/tests/experimental/test_metadata.py
@@ -66,20 +66,30 @@ def right() -> pl.LazyFrame:
 def test_rapidsmpf_join_metadata(
     left: pl.LazyFrame,
     right: pl.LazyFrame,
-    streaming_engine_factory,
+    spmd_engine_factory,
     options,
 ) -> None:
-    streaming_engine = streaming_engine_factory(options)
-    config_options = ConfigOptions.from_polars_engine(streaming_engine)
+    # Pinned to SPMD: ``ChannelMetadata.__reduce_cython__`` can't pickle
+    # ``self._handle`` across worker/actor processes, so the
+    # ``metadata_collector`` round-trip fails on Dask and Ray.
+    #
+    # When https://github.com/rapidsai/cudf/pull/22394 lands, dedup of
+    # replicated outputs moves to the Dask/Ray frontends and the
+    # ``duplicated`` flag's semantics change to "every rank holds the
+    # data". Revisit the ``len(metadata_collector) == 1`` and
+    # ``metadata.duplicated is False`` assertions below, and reconsider
+    # whether this test can widen to ``streaming_engine_factory``.
+    engine = spmd_engine_factory(options)
+    config_options = ConfigOptions.from_polars_engine(engine)
     broadcast_join_limit = config_options.executor.broadcast_join_limit
     q = left.join(
         right,
         on="y",
         how="left",
     ).filter(pl.col("x") > pl.col("zz"))
-    ir = Translator(q._ldf.visit(), streaming_engine).translate_ir()
-    left_count = left.collect(engine=streaming_engine).height
-    right_count = right.collect(engine=streaming_engine).height
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    left_count = left.collect(engine=engine).height
+    right_count = right.collect(engine=engine).height
 
     metadata_collector = evaluate_logical_plan(
         ir, config_options, collect_metadata=True
diff --git a/python/cudf_polars/tests/experimental/test_parallel.py b/python/cudf_polars/tests/experimental/test_parallel.py
index 67fc372e2e4..a9a0ff63786 100644
--- a/python/cudf_polars/tests/experimental/test_parallel.py
+++ b/python/cudf_polars/tests/experimental/test_parallel.py
@@ -50,10 +50,10 @@ def test_rename_concat(streaming_engine) -> None:
     assert_gpu_result_equal(q, engine=streaming_engine)
 
 
-def test_fallback_on_concat_zlice(streaming_engine_factory) -> None:
+def test_fallback_on_concat_zlice(spmd_engine_factory) -> None:
     # Pin ``fallback_mode="warn"`` so the spmd-small baseline (which sets
     # ``SILENT``) doesn't suppress the warning this test asserts on.
-    streaming_engine = streaming_engine_factory(StreamingOptions(fallback_mode="warn"))
+    streaming_engine = spmd_engine_factory(StreamingOptions(fallback_mode="warn"))
     q = pl.concat(
         [
             pl.LazyFrame({"a": [1, 2]}),
diff --git a/python/cudf_polars/tests/experimental/test_rolling.py b/python/cudf_polars/tests/experimental/test_rolling.py
index 37de6f7f8a1..ee3ae137e27 100644
--- a/python/cudf_polars/tests/experimental/test_rolling.py
+++ b/python/cudf_polars/tests/experimental/test_rolling.py
@@ -8,6 +8,7 @@
 import polars as pl
 
 from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
+from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 from cudf_polars.utils.versions import POLARS_VERSION_LT_136
 
@@ -46,10 +47,20 @@ def test_rolling_datetime(request, engine):
         assert_gpu_result_equal(q, engine=engine)
 
 
-def test_over_in_filter_unsupported(streaming_engine_factory) -> None:
+def test_over_in_filter_unsupported(request, streaming_engine_factory) -> None:
     engine = streaming_engine_factory(
         StreamingOptions(max_rows_per_partition=1, fallback_mode="warn"),
     )
+    if not isinstance(engine, SPMDEngine):
+        # On Dask/Ray the fallback warning fires on worker processes and is
+        # invisible to ``pytest.warns``; the multi-rank fallback also
+        # doesn't preserve row order.
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/22405",
+                strict=False,
+            )
+        )
     q = pl.concat(
         [
             pl.LazyFrame({"k": ["x", "y"], "v": [3, 2]}),
diff --git a/python/cudf_polars/tests/experimental/test_select.py b/python/cudf_polars/tests/experimental/test_select.py
index 264f8b5aab1..cef9f0f66cf 100644
--- a/python/cudf_polars/tests/experimental/test_select.py
+++ b/python/cudf_polars/tests/experimental/test_select.py
@@ -22,6 +22,7 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
+from cudf_polars.testing.engine_utils import warns_on_spmd
 from cudf_polars.utils.versions import (
     POLARS_VERSION_LT_132,
     POLARS_VERSION_LT_134,
@@ -54,8 +55,8 @@ def test_select(df, engine):
 
 
 @pytest.mark.parametrize("fallback_mode", ["silent", "raise", "warn", "foo"])
-def test_select_reduce_fallback(df, streaming_engine_factory, fallback_mode):
-    engine = streaming_engine_factory(
+def test_select_reduce_fallback(df, spmd_engine_factory, fallback_mode):
+    engine = spmd_engine_factory(
         StreamingOptions(max_rows_per_partition=3, fallback_mode=fallback_mode),
     )
     match = "This selection is not supported for multiple partitions."
@@ -84,13 +85,17 @@ def test_select_reduce_fallback(df, streaming_engine_factory, fallback_mode):
         assert_gpu_result_equal(query, engine=engine)
 
 
-def test_select_fill_null_with_strategy(df, engine):
+def test_select_fill_null_with_strategy(df, streaming_engine_factory):
+    engine = streaming_engine_factory(
+        StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"),
+    )
     q = df.select(pl.col("a").forward_fill())
 
     if POLARS_VERSION_LT_132:
         assert_ir_translation_raises(q, NotImplementedError)
     else:
-        with pytest.warns(
+        with warns_on_spmd(
+            engine,
             UserWarning,
             match="fill_null with strategy other than 'zero' or 'one' is not supported for multiple partitions",
         ):
@@ -183,15 +188,19 @@ def test_select_mean_with_decimals(engine):
     assert_gpu_result_equal(q, engine=engine, check_dtypes=not POLARS_VERSION_LT_134)
 
 
-def test_select_with_len(engine):
-    # https://github.com/pola-rs/polars/issues/25592
+def test_select_with_len(streaming_engine_factory):
+    engine = streaming_engine_factory(
+        StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"),
+    )
     df1 = pl.LazyFrame({"c0": [1] * 4})
     df2 = pl.LazyFrame({"c0": [2] * 4})
     q = pl.concat([df1.join(df2, how="cross"), df1.with_columns(pl.lit(None))]).select(
         pl.len()
     )
-    with pytest.warns(
-        UserWarning, match="Cross join not support for multiple partitions"
+    with warns_on_spmd(
+        engine,
+        UserWarning,
+        match="Cross join not support for multiple partitions",
     ):
         assert_gpu_result_equal(q, engine=engine)
 
diff --git a/python/cudf_polars/tests/experimental/test_spilling.py b/python/cudf_polars/tests/experimental/test_spilling.py
index 6aa11801132..7f79b911038 100644
--- a/python/cudf_polars/tests/experimental/test_spilling.py
+++ b/python/cudf_polars/tests/experimental/test_spilling.py
@@ -50,20 +50,20 @@ def create_test_table(nbytes: int, stream: Stream) -> plc.Table:
     ],
 )
 def test_make_spill_function(
-    streaming_engine_factory,
+    spmd_engine_factory,
     *,
     pinned_memory: bool,
     spilled_host_mem_type: MemoryType,
 ) -> None:
     """Test that spilling prioritizes longest queues and newest messages."""
-    engine = streaming_engine_factory(StreamingOptions(pinned_memory=pinned_memory))
+    engine = spmd_engine_factory(StreamingOptions(pinned_memory=pinned_memory))
     context = engine.context
 
     if spilled_host_mem_type == MemoryType.PINNED_HOST:
-        assert engine.context.br().pinned_mr is not None
+        assert context.br().pinned_mr is not None
         other_host_mem_type = MemoryType.HOST
     else:
-        assert engine.context.br().pinned_mr is None
+        assert context.br().pinned_mr is None
         other_host_mem_type = MemoryType.PINNED_HOST
 
     # Create 3 spillable message containers simulating fanout buffers
diff --git a/python/cudf_polars/tests/experimental/test_statistics.py b/python/cudf_polars/tests/experimental/test_statistics.py
index 82c121d5830..42014a02106 100644
--- a/python/cudf_polars/tests/experimental/test_statistics.py
+++ b/python/cudf_polars/tests/experimental/test_statistics.py
@@ -7,14 +7,12 @@
 from typing import TYPE_CHECKING
 
 import pytest
-from rapidsmpf.bootstrap import is_running_with_rrun
-from rapidsmpf.config import Options
 from rapidsmpf.statistics import Statistics
 
-from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine
+from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
 
 if TYPE_CHECKING:
-    from collections.abc import Iterator
+    from collections.abc import Callable
 
     from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine
 
@@ -25,49 +23,14 @@
 ]
 
 
-@pytest.fixture(params=["spmd", "ray", "dask"])
+@pytest.fixture
 def engine(
-    request: pytest.FixtureRequest,
-    spmd_engine: SPMDEngine,
-) -> Iterator[StreamingEngine]:
+    streaming_engine_factory: Callable[..., StreamingEngine],
+) -> StreamingEngine:
     """Yield each supported streaming engine with statistics enabled."""
-    backend = request.param
-    rapidsmpf_options = Options({"statistics": "True"})
-    executor_options = {"max_rows_per_partition": 10}
-
-    if backend == "spmd":
-        with SPMDEngine(
-            comm=spmd_engine.comm,
-            rapidsmpf_options=rapidsmpf_options,
-            executor_options=executor_options,
-        ) as engine:
-            yield engine
-        return
-
-    if is_running_with_rrun():
-        pytest.skip(f"{backend}Engine must not be created from within an rrun cluster")
-
-    if backend == "ray":
-        pytest.importorskip("ray", reason="ray is not installed")
-        from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine
-
-        with RayEngine(
-            rapidsmpf_options=rapidsmpf_options,
-            executor_options=executor_options,
-            ray_init_options={"include_dashboard": False},
-        ) as engine:
-            yield engine
-        return
-
-    assert backend == "dask"
-    pytest.importorskip("distributed", reason="distributed is not installed")
-    from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine
-
-    with DaskEngine(
-        rapidsmpf_options=rapidsmpf_options,
-        executor_options=executor_options,
-    ) as engine:
-        yield engine
+    return streaming_engine_factory(
+        StreamingOptions(statistics=True, max_rows_per_partition=10),
+    )
 
 
 def test_statistics(engine: StreamingEngine) -> None:
diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py
index 6bb30624cb6..1a157c3fe21 100644
--- a/python/cudf_polars/tests/experimental/test_unique.py
+++ b/python/cudf_polars/tests/experimental/test_unique.py
@@ -10,13 +10,7 @@
 
 from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions
 from cudf_polars.testing.asserts import assert_gpu_result_equal
-
-
-@pytest.fixture
-def engine(streaming_engine_factory):
-    return streaming_engine_factory(
-        StreamingOptions(fallback_mode="warn"),
-    )
+from cudf_polars.testing.engine_utils import warns_on_spmd
 
 
 @pytest.fixture(scope="module")
@@ -77,11 +71,12 @@ def test_unique_head_tail(keep, zlice, streaming_engine_factory):
     )
 
 
-def test_unique_complex_slice_fallback(df, engine):
+def test_unique_complex_slice_fallback(df, streaming_engine_factory):
     """Test that unique with complex slice (offset >= 1) falls back correctly."""
+    engine = streaming_engine_factory(StreamingOptions(fallback_mode="warn"))
     # unique().slice(offset=5, length=10) has zlice[0] >= 1, triggering fallback
     q = df.unique(subset=("y",), keep="any").slice(5, 10)
-    with pytest.warns(UserWarning, match="Complex slice not supported"):
+    with warns_on_spmd(engine, UserWarning, match="Complex slice not supported"):
         result = q.collect(engine=engine)
     # Just verify the fallback produces valid output with expected shape
     assert result.shape == (10, 3)