From 08e00c9bf0802f8c8638f6545f2553a812fd02da Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 6 May 2026 13:22:57 -0700 Subject: [PATCH 01/12] Correctly handle blocks with "block byte size" fields in the Avro reader (#22387) When the number of elements in the Avro block is stored as a negative number, the block also includes its size in bytes. This PR allows the reader to correctly parse such files. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/22387 --- cpp/src/io/avro/avro.cpp | 14 ++++++-- .../cudf/cudf/tests/data/avro/hang_input.avro | Bin 0 -> 101 bytes .../cudf/cudf/tests/input_output/test_avro.py | 34 +++++++++++++++++- 3 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 python/cudf/cudf/tests/data/avro/hang_input.avro diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index bf7d983d481..4639ea6ba23 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -64,8 +64,16 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row) sig4 |= get_raw() << 24; if (sig4 != avro_magic) { return false; } for (;;) { - auto num_md_items = static_cast(get_encoded()); - if (num_md_items == 0) { break; } + auto md_items_signed = get_encoded(); + if (md_items_signed == 0) { break; } + if (md_items_signed < 0) { + // A negative count means a block's byte size follows. Read it and discard it. + [[maybe_unused]] auto const md_block_size = get_encoded(); + md_items_signed = -md_items_signed; + } + // Check that the claimed item count can fit in the remaining input + if (md_items_signed > (m_end - m_cur) / 2) { return false; } + auto const num_md_items = static_cast(md_items_signed); for (uint32_t i = 0; i < num_md_items; i++) { auto const key = get_encoded(); auto const value = get_encoded(); diff --git a/python/cudf/cudf/tests/data/avro/hang_input.avro b/python/cudf/cudf/tests/data/avro/hang_input.avro new file mode 100644 index 0000000000000000000000000000000000000000..b26cb797fe8e1343a7560135f4d79e31902ca1d8 GIT binary patch literal 101 zcmeZI%3@>^ODrqO*DFrWNX<>`VyspwsVqoUvQjEaP0lY$QPNS$OUwoHfy}hb)SQ%J pC9CLam}psIPH8Gorlis(G_Aa2CKFT0s@-SqzWdK0sALQg0sxX>BQF2| literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/input_output/test_avro.py b/python/cudf/cudf/tests/input_output/test_avro.py index 8664851f2e3..d0f31828b3d 100644 --- a/python/cudf/cudf/tests/input_output/test_avro.py +++ b/python/cudf/cudf/tests/input_output/test_avro.py @@ -1,10 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations import datetime import io import pathlib +import subprocess +import sys +import textwrap import fastavro import numpy as np @@ -644,3 +647,32 @@ def test_avro_reader_multiblock( actual_df = cudf.read_avro(buffer, skiprows=skip_rows, num_rows=num_rows) assert_eq(expected_df, actual_df) + + +def test_avro_reader_no_hang_on_truncated_schema(datadir): + path = datadir / "avro" / "hang_input.avro" + assert path.is_file(), path + + script = textwrap.dedent( + f""" + import cudf + try: + cudf.read_avro({str(path)!r}) + except Exception: + pass + """ + ) + + timeout_s = 10 + try: + subprocess.run( + [sys.executable, "-c", script], + timeout=timeout_s, + check=False, + capture_output=True, + ) + except subprocess.TimeoutExpired: + pytest.fail( + f"cudf.read_avro hung on malformed input {path.name!r} " + f"(no completion within {timeout_s}s)" + ) From 112830f01f3cf492a1d25c25999b4dedc44acd32 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Wed, 6 May 2026 13:29:07 -0700 Subject: [PATCH 02/12] Use `token.rapids.nvidia.com` when issuing S3 bucket creds in devcontainers (#22338) Set AWS_IDP_URL and update AWS_ROLE_ARN to use `token.rapids.nvidia.com` Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Gil Forsyth (https://github.com/gforsyth) URL: https://github.com/rapidsai/cudf/pull/22338 --- .devcontainer/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 4be36d4402c..b4b2ecb69e0 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -37,7 +37,8 @@ ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAG ### # sccache configuration ### -ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" +ENV AWS_IDP_URL="https://token.rapids.nvidia.com" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/rapids-token-sccache-devs" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" ENV SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true From d24f7703fa7a086bd75a6a7886bd5044768e1c0a Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 6 May 2026 16:41:36 -0400 Subject: [PATCH 03/12] Use static cudart by default (#22397) Issue: https://github.com/rapidsai/build-planning/issues/235 Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Bradley Dice (https://github.com/bdice) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/22397 --- .agents/skills/build-test-cudf-java/SKILL.md | 2 +- conda/recipes/cudf/recipe.yaml | 1 - cpp/CMakeLists.txt | 16 ++++++---------- cpp/cmake/Modules/JitifyPreprocessKernels.cmake | 2 +- cpp/tests/CMakeLists.txt | 2 +- java/README.md | 7 ------- java/ci/build-in-docker.sh | 6 +----- java/pom.xml | 2 -- java/src/main/native/CMakeLists.txt | 10 +--------- python/libcudf/CMakeLists.txt | 4 +--- 10 files changed, 12 insertions(+), 40 deletions(-) diff --git a/.agents/skills/build-test-cudf-java/SKILL.md b/.agents/skills/build-test-cudf-java/SKILL.md index 6284a5e4230..ca9eb575c37 100644 --- a/.agents/skills/build-test-cudf-java/SKILL.md +++ b/.agents/skills/build-test-cudf-java/SKILL.md @@ -51,7 +51,7 @@ export MAVEN_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java. Export `MVN_COMMON_OPTS` to match the CI build configuration in `java/ci/build-in-docker.sh`. For example: ```bash -export MVN_COMMON_OPTS="-DCUDF_CPP_BUILD_DIR=$CUDF_CPP_BUILD_DIR -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON -DCUDA_STATIC_CUFILE=ON -DCUDA_STATIC_RUNTIME=ON -DCUDF_JNI_LIBCUDF_STATIC=ON" +export MVN_COMMON_OPTS="-DCUDF_CPP_BUILD_DIR=$CUDF_CPP_BUILD_DIR -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON -DCUDA_STATIC_CUFILE=ON -DCUDF_JNI_LIBCUDF_STATIC=ON" ``` ## Building cudf-java diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml index 4d8a9f2f241..d4c8b5edb9d 100644 --- a/conda/recipes/cudf/recipe.yaml +++ b/conda/recipes/cudf/recipe.yaml @@ -98,7 +98,6 @@ requirements: - pylibcudf =${{ version }} - ${{ pin_compatible("rmm", upper_bound="x.x") }} - fsspec >=0.6.0 - - cuda-cudart - if: cuda_major == "12" then: cuda-python >=12.9.2,<13.0 else: cuda-python >=13.0.1,<14.0 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c2485171c71..6d684af8d99 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -58,9 +58,6 @@ option(CUDA_ENABLE_LINEINFO ) option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) -# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking -option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) - set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) if(NOT BUILD_SHARED_LIBS) @@ -97,7 +94,6 @@ message( VERBOSE "CUDF: Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler): ${CUDA_ENABLE_LINEINFO}" ) -message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUDF: Build with remote IO (e.g. AWS S3) support through KvikIO: ${CUDF_KVIKIO_REMOTE_IO}" ) @@ -1012,7 +1008,7 @@ if(TARGET conda_env) target_link_libraries(cudf PRIVATE conda_env) endif() -rapids_cuda_set_runtime(cudf USE_STATIC ${CUDA_STATIC_RUNTIME}) +rapids_cuda_set_runtime(cudf USE_STATIC ON) file( WRITE "${CUDF_BINARY_DIR}/fatbin.ld" @@ -1059,7 +1055,7 @@ if(CUDF_BUILD_TESTUTIL) PUBLIC cudf PRIVATE $ ) - rapids_cuda_set_runtime(cudftest_default_stream USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(cudftest_default_stream USE_STATIC ON) add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream) @@ -1090,7 +1086,7 @@ if(CUDF_BUILD_TESTUTIL) cudftestutil INTERFACE "$" "$" ) - rapids_cuda_set_runtime(cudftestutil USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(cudftestutil USE_STATIC ON) add_library(cudf::cudftestutil ALIAS cudftestutil) add_library(cudftestutil_impl INTERFACE) @@ -1151,7 +1147,7 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) endif() set(sanitizer_relative_genex - "$,$>" + "$,$>" ) set_target_properties( ${_tgt} @@ -1166,9 +1162,9 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) ${_tgt} PRIVATE "$:${CUDF_CXX_FLAGS}>>" ) target_include_directories(${_tgt} PRIVATE "$") - target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm CUDA::sanitizer) + target_link_libraries(${_tgt} PUBLIC CUDA::cudart_static rmm::rmm CUDA::sanitizer) - rapids_cuda_set_runtime(${_tgt} USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(${_tgt} USE_STATIC ON) add_library(cudf::${_tgt} ALIAS ${_tgt}) if("${_mode}" STREQUAL "testing") diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index 10ba33eb397..d035e1ea6ab 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -9,7 +9,7 @@ add_executable(jitify_preprocess "${JITIFY_INCLUDE_DIR}/jitify2_preprocess.cpp") target_compile_definitions(jitify_preprocess PRIVATE "_FILE_OFFSET_BITS=64") -rapids_cuda_set_runtime(jitify_preprocess USE_STATIC ${CUDA_STATIC_RUNTIME}) +rapids_cuda_set_runtime(jitify_preprocess USE_STATIC ON) target_link_libraries(jitify_preprocess PUBLIC ${CMAKE_DL_LIBS}) # Take a list of files to JIT-compile and run them through jitify_preprocess. diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 68cde65c57b..a45b7280127 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -61,7 +61,7 @@ function(ConfigureTest CMAKE_TEST_NAME) ${CMAKE_TEST_NAME} PRIVATE cudf::cudftestutil_objects nvtx3::nvtx3-cpp $ "${_CUDF_TEST_EXTRA_LIBS}" ) - rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME} diff --git a/java/README.md b/java/README.md index 7b33f303cf3..e1552712587 100644 --- a/java/README.md +++ b/java/README.md @@ -79,13 +79,6 @@ If you decide to build without Docker and the build script, examining the cmake settings in the [Java CI build script](ci/build-in-docker.sh) can be helpful if you are encountering difficulties during the build. -## Statically Linking the CUDA Runtime - -If you use the default cmake options libcudart will be dynamically linked to libcudf and libcudfjni. -To build with a static CUDA runtime, build libcudf with the `-DCUDA_STATIC_RUNTIME=ON` as a cmake -parameter, and similarly build with `-DCUDA_STATIC_RUNTIME=ON` when building the Java bindings -with Maven. - ### Building with a libcudf Archive When statically linking the CUDA runtime, it is recommended to build cuDF as an archive rather than diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh index 66140f387fd..e15536c8b6b 100755 --- a/java/ci/build-in-docker.sh +++ b/java/ci/build-in-docker.sh @@ -1,7 +1,7 @@ #!/bin/bash # -# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # @@ -10,7 +10,6 @@ gcc --version SKIP_JAVA_TESTS=${SKIP_JAVA_TESTS:-true} BUILD_CPP_TESTS=${BUILD_CPP_TESTS:-OFF} -ENABLE_CUDA_STATIC_RUNTIME=${ENABLE_CUDA_STATIC_RUNTIME:-ON} ENABLE_PTDS=${ENABLE_PTDS:-ON} RMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL:-OFF} ENABLE_NVTX=${ENABLE_NVTX:-ON} @@ -27,7 +26,6 @@ OUT_PATH="$WORKSPACE/$OUT" echo "SIGN_FILE: $SIGN_FILE,\ SKIP_JAVA_TESTS: $SKIP_JAVA_TESTS,\ BUILD_CPP_TESTS: $BUILD_CPP_TESTS,\ - ENABLE_CUDA_STATIC_RUNTIME: $ENABLE_CUDA_STATIC_RUNTIME,\ ENABLED_PTDS: $ENABLE_PTDS,\ ENABLE_NVTX: $ENABLE_NVTX,\ ENABLE_GDS: $ENABLE_GDS,\ @@ -47,7 +45,6 @@ mkdir -p "$LIBCUDF_BUILD_PATH" cd "$LIBCUDF_BUILD_PATH" cmake .. -G"${CMAKE_GENERATOR}" \ -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \ - -DCUDA_STATIC_RUNTIME="$ENABLE_CUDA_STATIC_RUNTIME" \ -DUSE_NVTX="$ENABLE_NVTX" \ -DCUDF_LARGE_STRINGS_DISABLED=ON \ -DCUDF_USE_ARROW_STATIC=ON \ @@ -70,7 +67,6 @@ BUILD_ARG=( "-Dmaven.repo.local=$WORKSPACE/.m2" "-DskipTests=$SKIP_JAVA_TESTS" "-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS" - "-DCUDA_STATIC_RUNTIME=$ENABLE_CUDA_STATIC_RUNTIME" "-DCUDF_JNI_LIBCUDF_STATIC=ON" "-DUSE_GDS=$ENABLE_GDS" "-Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest" diff --git a/java/pom.xml b/java/pom.xml index 5df61ec4352..12af51eba71 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -153,7 +153,6 @@ false OFF - OFF OFF OFF RAPIDS @@ -484,7 +483,6 @@ - diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 1e7df3802b9..208bc4035c9 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -29,7 +29,6 @@ option(USE_NVTX "Build with NVTX support" ON) option(BUILD_SHARED_LIBS "Build cuDF JNI shared libraries" ON) option(BUILD_TESTS "Configure CMake to build tests" ON) option(CUDF_USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) -option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF) option(CUDF_JNI_LIBCUDF_STATIC "Link with libcudf.a" OFF) option(CUDF_JNI_ENABLE_PROFILING "Build with profiling support" ON) @@ -41,7 +40,6 @@ message(VERBOSE "CUDF_JNI: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF_JNI: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}" ) -message(VERBOSE "CUDF_JNI: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUDF_JNI: Build with GPUDirect Storage support: ${USE_GDS}") message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}") @@ -279,13 +277,7 @@ target_link_libraries( # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic # linking -if(CUDA_STATIC_RUNTIME) - # Tell CMake what CUDA language runtime to use - set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Static) -else() - # Tell CMake what CUDA language runtime to use - set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Shared) -endif() +set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Static) # ################################################################################################## # * install shared libraries ---------------------------------------------------------------------- diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt index 6feea8e8ba6..7f5176048ad 100644 --- a/python/libcudf/CMakeLists.txt +++ b/python/libcudf/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -63,8 +63,6 @@ if(NOT USE_NVCOMP_RUNTIME_WHEEL) endif() endif() -set(CUDA_STATIC_RUNTIME ON) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) add_subdirectory(../../cpp cudf-cpp) From 41be3965f343dc82df3fb49cb39fe9b9ac27f232 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 6 May 2026 17:33:21 -0500 Subject: [PATCH 04/12] Fix `to_array` to return non-corrupted data (#22342) Fixes #22136 This PR gueared the homogeneous numeric `DataFrame.to_cupy` fast path so it only uses `table_to_array` when `dtype` is `None` or exactly matches the source column `dtype`. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/apps/pre-commit-ci Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/22342 --- python/cudf/cudf/core/frame.py | 1 + .../cudf/tests/dataframe/methods/test_to_cupy.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index fc634cadf43..05bd1be095f 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -826,6 +826,7 @@ def to_cupy( self._num_columns > 1 and na_value is None and self._columns[0].dtype.kind in {"i", "u", "f", "b"} + and (dtype is None or dtype == self._columns[0].dtype) and all( not col.nullable and col.dtype == self._columns[0].dtype for col in self._columns diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py index 44ee7a4278d..3eb69e0e928 100644 --- a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py +++ b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import cupy as cp @@ -64,6 +64,18 @@ def test_dataframe_to_cupy(): np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i]) +@pytest.mark.parametrize("in_dtype", ["int32", "int64", "float32", "float64"]) +@pytest.mark.parametrize("out_dtype", ["int32", "int64", "float32", "float64"]) +def test_dataframe_to_cupy_dtype(in_dtype, out_dtype): + data = np.arange(12, dtype=in_dtype).reshape(3, 4) + df = cudf.DataFrame(data) + + result = df.to_cupy(dtype=out_dtype) + + assert result.dtype == np.dtype(out_dtype) + np.testing.assert_allclose(result.get(), data.astype(out_dtype)) + + @pytest.mark.parametrize("has_nulls", [False, True]) @pytest.mark.parametrize("use_na_value", [False, True]) def test_dataframe_to_cupy_single_column(has_nulls, use_na_value): From 05ab76205f33e8e45ed88e9bfc15cb9d20cbb923 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 6 May 2026 16:21:33 -0700 Subject: [PATCH 05/12] Use cudaStream_t instead of cuda_stream_view in pylibcudf Cython (#22368) Contributes to https://github.com/rapidsai/rmm/issues/2359 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/22368 --- .../cudf_polars/utils/cuda_stream.py | 5 +- python/pylibcudf/pylibcudf/binaryop.pxd | 5 +- python/pylibcudf/pylibcudf/binaryop.pyi | 6 +- python/pylibcudf/pylibcudf/binaryop.pyx | 16 +- python/pylibcudf/pylibcudf/column.pxd | 19 +- python/pylibcudf/pylibcudf/column.pyi | 32 ++- python/pylibcudf/pylibcudf/column.pyx | 118 ++++---- .../pylibcudf/pylibcudf/column_factories.pxd | 15 +- .../pylibcudf/pylibcudf/column_factories.pyi | 16 +- .../pylibcudf/pylibcudf/column_factories.pyx | 58 ++-- python/pylibcudf/pylibcudf/concatenate.pxd | 5 +- python/pylibcudf/pylibcudf/concatenate.pyi | 6 +- python/pylibcudf/pylibcudf/concatenate.pyx | 16 +- .../pylibcudf/pylibcudf/contiguous_split.pxd | 8 +- .../pylibcudf/pylibcudf/contiguous_split.pyi | 14 +- .../pylibcudf/pylibcudf/contiguous_split.pyx | 46 ++-- python/pylibcudf/pylibcudf/copying.pxd | 27 +- python/pylibcudf/pylibcudf/copying.pyi | 32 ++- python/pylibcudf/pylibcudf/copying.pyx | 116 ++++---- python/pylibcudf/pylibcudf/datetime.pxd | 23 +- python/pylibcudf/pylibcudf/datetime.pyi | 24 +- python/pylibcudf/pylibcudf/datetime.pyx | 93 ++++--- .../pylibcudf/experimental/_join_streams.pxd | 5 +- .../pylibcudf/experimental/_join_streams.pyi | 8 +- .../pylibcudf/experimental/_join_streams.pyx | 20 +- python/pylibcudf/pylibcudf/filling.pxd | 13 +- python/pylibcudf/pylibcudf/filling.pyi | 17 +- python/pylibcudf/pylibcudf/filling.pyx | 48 ++-- python/pylibcudf/pylibcudf/groupby.pxd | 15 +- python/pylibcudf/pylibcudf/groupby.pyi | 14 +- python/pylibcudf/pylibcudf/groupby.pyx | 68 +++-- python/pylibcudf/pylibcudf/hashing.pxd | 23 +- python/pylibcudf/pylibcudf/hashing.pyi | 24 +- python/pylibcudf/pylibcudf/hashing.pyx | 93 ++++--- python/pylibcudf/pylibcudf/interop.pxd | 7 +- python/pylibcudf/pylibcudf/interop.pyi | 12 +- python/pylibcudf/pylibcudf/interop.pyx | 17 +- python/pylibcudf/pylibcudf/io/avro.pxd | 5 +- python/pylibcudf/pylibcudf/io/avro.pyi | 6 +- python/pylibcudf/pylibcudf/io/avro.pyx | 8 +- python/pylibcudf/pylibcudf/io/csv.pxd | 7 +- python/pylibcudf/pylibcudf/io/csv.pyi | 8 +- python/pylibcudf/pylibcudf/io/csv.pyx | 13 +- .../pylibcudf/io/experimental/hybrid_scan.pxd | 2 +- .../pylibcudf/io/experimental/hybrid_scan.pyi | 20 +- .../pylibcudf/io/experimental/hybrid_scan.pyx | 66 ++--- python/pylibcudf/pylibcudf/io/json.pxd | 11 +- python/pylibcudf/pylibcudf/io/json.pyi | 10 +- python/pylibcudf/pylibcudf/io/json.pyx | 31 ++- python/pylibcudf/pylibcudf/io/orc.pxd | 9 +- python/pylibcudf/pylibcudf/io/orc.pyi | 10 +- python/pylibcudf/pylibcudf/io/orc.pyx | 24 +- python/pylibcudf/pylibcudf/io/parquet.pxd | 8 +- python/pylibcudf/pylibcudf/io/parquet.pyi | 11 +- python/pylibcudf/pylibcudf/io/parquet.pyx | 26 +- python/pylibcudf/pylibcudf/io/text.pxd | 5 +- python/pylibcudf/pylibcudf/io/text.pyi | 6 +- python/pylibcudf/pylibcudf/io/text.pyx | 14 +- python/pylibcudf/pylibcudf/io/timezone.pxd | 6 +- python/pylibcudf/pylibcudf/io/timezone.pyi | 6 +- python/pylibcudf/pylibcudf/io/timezone.pyx | 14 +- python/pylibcudf/pylibcudf/io/types.pxd | 5 +- python/pylibcudf/pylibcudf/io/types.pyx | 3 +- python/pylibcudf/pylibcudf/join.pxd | 35 ++- python/pylibcudf/pylibcudf/join.pyi | 40 +-- python/pylibcudf/pylibcudf/join.pyx | 211 ++++++++------ python/pylibcudf/pylibcudf/json.pxd | 5 +- python/pylibcudf/pylibcudf/json.pyi | 6 +- python/pylibcudf/pylibcudf/json.pyx | 12 +- python/pylibcudf/pylibcudf/labeling.pxd | 5 +- python/pylibcudf/pylibcudf/labeling.pyi | 6 +- python/pylibcudf/pylibcudf/labeling.pyx | 12 +- .../pylibcudf/pylibcudf/libcudf/binaryop.pxd | 10 +- .../pylibcudf/libcudf/column/column.pxd | 6 +- .../libcudf/column/column_factories.pxd | 28 +- .../pylibcudf/libcudf/concatenate.pxd | 6 +- .../pylibcudf/libcudf/contiguous_split.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/copying.pxd | 40 +-- .../pylibcudf/pylibcudf/libcudf/datetime.pxd | 24 +- .../libcudf/detail/utilities/stream_pool.pxd | 29 +- .../pylibcudf/libcudf/distinct_count.pxd | 6 +- .../pylibcudf/pylibcudf/libcudf/filling.pxd | 14 +- .../pylibcudf/pylibcudf/libcudf/groupby.pxd | 12 +- python/pylibcudf/pylibcudf/libcudf/hash.pxd | 22 +- .../pylibcudf/pylibcudf/libcudf/interop.pxd | 24 +- .../pylibcudf/pylibcudf/libcudf/io/avro.pxd | 4 +- python/pylibcudf/pylibcudf/libcudf/io/csv.pxd | 6 +- .../pylibcudf/libcudf/io/hybrid_scan.pxd | 20 +- .../pylibcudf/pylibcudf/libcudf/io/json.pxd | 6 +- python/pylibcudf/pylibcudf/libcudf/io/orc.pxd | 8 +- .../pylibcudf/libcudf/io/orc_metadata.pxd | 6 +- .../pylibcudf/libcudf/io/parquet.pxd | 12 +- .../pylibcudf/pylibcudf/libcudf/io/text.pxd | 4 +- .../pylibcudf/libcudf/io/timezone.pxd | 4 +- python/pylibcudf/pylibcudf/libcudf/join.pxd | 52 ++-- python/pylibcudf/pylibcudf/libcudf/json.pxd | 4 +- .../pylibcudf/pylibcudf/libcudf/labeling.pxd | 4 +- .../pylibcudf/libcudf/lists/combine.pxd | 8 +- .../pylibcudf/libcudf/lists/contains.pxd | 12 +- .../libcudf/lists/count_elements.pxd | 4 +- .../pylibcudf/libcudf/lists/explode.pxd | 4 +- .../pylibcudf/libcudf/lists/extract.pxd | 6 +- .../pylibcudf/libcudf/lists/filling.pxd | 6 +- .../pylibcudf/libcudf/lists/gather.pxd | 4 +- .../libcudf/lists/lists_column_view.pxd | 4 +- .../pylibcudf/libcudf/lists/reverse.pxd | 4 +- .../libcudf/lists/set_operations.pxd | 10 +- .../pylibcudf/libcudf/lists/sorting.pxd | 6 +- .../libcudf/lists/stream_compaction.pxd | 6 +- python/pylibcudf/pylibcudf/libcudf/merge.pxd | 4 +- .../pylibcudf/pylibcudf/libcudf/null_mask.pxd | 16 +- .../libcudf/nvtext/byte_pair_encode.pxd | 6 +- .../pylibcudf/libcudf/nvtext/deduplicate.pxd | 8 +- .../libcudf/nvtext/edit_distance.pxd | 6 +- .../libcudf/nvtext/generate_ngrams.pxd | 8 +- .../pylibcudf/libcudf/nvtext/jaccard.pxd | 4 +- .../pylibcudf/libcudf/nvtext/minhash.pxd | 10 +- .../libcudf/nvtext/ngrams_tokenize.pxd | 4 +- .../pylibcudf/libcudf/nvtext/normalize.pxd | 8 +- .../pylibcudf/libcudf/nvtext/replace.pxd | 6 +- .../pylibcudf/libcudf/nvtext/stemmer.pxd | 8 +- .../pylibcudf/libcudf/nvtext/tokenize.pxd | 18 +- .../libcudf/nvtext/wordpiece_tokenize.pxd | 6 +- .../pylibcudf/libcudf/partitioning.pxd | 10 +- .../pylibcudf/pylibcudf/libcudf/quantiles.pxd | 6 +- python/pylibcudf/pylibcudf/libcudf/reduce.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/replace.pxd | 18 +- .../pylibcudf/pylibcudf/libcudf/reshape.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/rolling.pxd | 10 +- python/pylibcudf/pylibcudf/libcudf/round.pxd | 6 +- .../pylibcudf/libcudf/scalar/scalar.pxd | 18 +- .../libcudf/scalar/scalar_factories.pxd | 18 +- python/pylibcudf/pylibcudf/libcudf/search.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/sorting.pxd | 26 +- .../pylibcudf/libcudf/stream_compaction.pxd | 18 +- .../pylibcudf/libcudf/strings/attributes.pxd | 8 +- .../pylibcudf/libcudf/strings/capitalize.pxd | 8 +- .../pylibcudf/libcudf/strings/case.pxd | 8 +- .../pylibcudf/libcudf/strings/char_types.pxd | 6 +- .../pylibcudf/libcudf/strings/combine.pxd | 12 +- .../pylibcudf/libcudf/strings/contains.pxd | 12 +- .../strings/convert/convert_booleans.pxd | 6 +- .../strings/convert/convert_datetime.pxd | 8 +- .../strings/convert/convert_durations.pxd | 6 +- .../strings/convert/convert_fixed_point.pxd | 8 +- .../strings/convert/convert_floats.pxd | 8 +- .../strings/convert/convert_integers.pxd | 16 +- .../libcudf/strings/convert/convert_ipv4.pxd | 8 +- .../libcudf/strings/convert/convert_lists.pxd | 4 +- .../libcudf/strings/convert/convert_urls.pxd | 6 +- .../pylibcudf/libcudf/strings/extract.pxd | 8 +- .../pylibcudf/libcudf/strings/find.pxd | 20 +- .../libcudf/strings/find_multiple.pxd | 6 +- .../pylibcudf/libcudf/strings/findall.pxd | 6 +- .../pylibcudf/libcudf/strings/padding.pxd | 8 +- .../pylibcudf/libcudf/strings/repeat.pxd | 6 +- .../pylibcudf/libcudf/strings/replace.pxd | 8 +- .../pylibcudf/libcudf/strings/replace_re.pxd | 8 +- .../pylibcudf/libcudf/strings/reverse.pxd | 4 +- .../libcudf/strings/split/partition.pxd | 6 +- .../pylibcudf/libcudf/strings/split/split.pxd | 20 +- .../libcudf/strings/strings_column_view.pxd | 6 +- .../pylibcudf/libcudf/strings/strip.pxd | 4 +- .../pylibcudf/libcudf/strings/substring.pxd | 6 +- .../pylibcudf/libcudf/strings/translate.pxd | 6 +- .../pylibcudf/libcudf/strings/wrap.pxd | 4 +- .../libcudf/structs/structs_column_view.pxd | 4 +- .../pylibcudf/libcudf/table/table.pxd | 6 +- .../pylibcudf/pylibcudf/libcudf/transform.pxd | 20 +- .../pylibcudf/pylibcudf/libcudf/transpose.pxd | 4 +- python/pylibcudf/pylibcudf/libcudf/unary.pxd | 14 +- .../pylibcudf/libcudf/unique_count.pxd | 6 +- .../libcudf/utilities/default_stream.pxd | 7 +- python/pylibcudf/pylibcudf/lists.pxd | 39 ++- python/pylibcudf/pylibcudf/lists.pyi | 40 +-- python/pylibcudf/pylibcudf/lists.pyx | 167 ++++++----- python/pylibcudf/pylibcudf/merge.pxd | 5 +- python/pylibcudf/pylibcudf/merge.pyi | 6 +- python/pylibcudf/pylibcudf/merge.pyx | 12 +- python/pylibcudf/pylibcudf/null_mask.pxd | 17 +- python/pylibcudf/pylibcudf/null_mask.pyi | 16 +- python/pylibcudf/pylibcudf/null_mask.pyx | 64 +++-- .../pylibcudf/nvtext/byte_pair_encode.pxd | 5 +- .../pylibcudf/nvtext/byte_pair_encode.pyi | 8 +- .../pylibcudf/nvtext/byte_pair_encode.pyx | 23 +- .../pylibcudf/nvtext/deduplicate.pxd | 9 +- .../pylibcudf/nvtext/deduplicate.pyi | 10 +- .../pylibcudf/nvtext/deduplicate.pyx | 34 +-- .../pylibcudf/nvtext/edit_distance.pxd | 7 +- .../pylibcudf/nvtext/edit_distance.pyi | 8 +- .../pylibcudf/nvtext/edit_distance.pyx | 19 +- .../pylibcudf/nvtext/generate_ngrams.pxd | 9 +- .../pylibcudf/nvtext/generate_ngrams.pyi | 10 +- .../pylibcudf/nvtext/generate_ngrams.pyx | 30 +- python/pylibcudf/pylibcudf/nvtext/jaccard.pxd | 5 +- python/pylibcudf/pylibcudf/nvtext/jaccard.pyi | 6 +- python/pylibcudf/pylibcudf/nvtext/jaccard.pyx | 12 +- python/pylibcudf/pylibcudf/nvtext/minhash.pxd | 11 +- python/pylibcudf/pylibcudf/nvtext/minhash.pyi | 12 +- python/pylibcudf/pylibcudf/nvtext/minhash.pyx | 39 +-- .../pylibcudf/nvtext/ngrams_tokenize.pxd | 5 +- .../pylibcudf/nvtext/ngrams_tokenize.pyi | 6 +- .../pylibcudf/nvtext/ngrams_tokenize.pyx | 12 +- .../pylibcudf/pylibcudf/nvtext/normalize.pxd | 9 +- .../pylibcudf/pylibcudf/nvtext/normalize.pyi | 10 +- .../pylibcudf/pylibcudf/nvtext/normalize.pyx | 28 +- python/pylibcudf/pylibcudf/nvtext/replace.pxd | 7 +- python/pylibcudf/pylibcudf/nvtext/replace.pyi | 8 +- python/pylibcudf/pylibcudf/nvtext/replace.pyx | 27 +- python/pylibcudf/pylibcudf/nvtext/stemmer.pxd | 7 +- python/pylibcudf/pylibcudf/nvtext/stemmer.pyi | 8 +- python/pylibcudf/pylibcudf/nvtext/stemmer.pyx | 21 +- .../pylibcudf/pylibcudf/nvtext/tokenize.pxd | 17 +- .../pylibcudf/pylibcudf/nvtext/tokenize.pyi | 20 +- .../pylibcudf/pylibcudf/nvtext/tokenize.pyx | 79 +++--- .../pylibcudf/nvtext/wordpiece_tokenize.pxd | 5 +- .../pylibcudf/nvtext/wordpiece_tokenize.pyi | 8 +- .../pylibcudf/nvtext/wordpiece_tokenize.pyx | 19 +- python/pylibcudf/pylibcudf/partitioning.pxd | 7 +- python/pylibcudf/pylibcudf/partitioning.pyi | 8 +- python/pylibcudf/pylibcudf/partitioning.pyx | 30 +- python/pylibcudf/pylibcudf/quantiles.pxd | 7 +- python/pylibcudf/pylibcudf/quantiles.pyi | 8 +- python/pylibcudf/pylibcudf/quantiles.pyx | 21 +- python/pylibcudf/pylibcudf/reduce.pxd | 11 +- python/pylibcudf/pylibcudf/reduce.pyi | 12 +- python/pylibcudf/pylibcudf/reduce.pyx | 36 +-- python/pylibcudf/pylibcudf/replace.pxd | 11 +- python/pylibcudf/pylibcudf/replace.pyi | 12 +- python/pylibcudf/pylibcudf/replace.pyx | 51 ++-- python/pylibcudf/pylibcudf/reshape.pxd | 9 +- python/pylibcudf/pylibcudf/reshape.pyi | 10 +- python/pylibcudf/pylibcudf/reshape.pyx | 28 +- python/pylibcudf/pylibcudf/rolling.pxd | 9 +- python/pylibcudf/pylibcudf/rolling.pyi | 10 +- python/pylibcudf/pylibcudf/rolling.pyx | 34 ++- python/pylibcudf/pylibcudf/round.pxd | 5 +- python/pylibcudf/pylibcudf/round.pyi | 6 +- python/pylibcudf/pylibcudf/round.pyx | 19 +- python/pylibcudf/pylibcudf/scalar.pxd | 7 +- python/pylibcudf/pylibcudf/scalar.pyi | 19 +- python/pylibcudf/pylibcudf/scalar.pyx | 259 +++++++++++------- python/pylibcudf/pylibcudf/search.pxd | 9 +- python/pylibcudf/pylibcudf/search.pyi | 10 +- python/pylibcudf/pylibcudf/search.pyx | 30 +- python/pylibcudf/pylibcudf/sorting.pxd | 27 +- python/pylibcudf/pylibcudf/sorting.pyi | 28 +- python/pylibcudf/pylibcudf/sorting.pyx | 109 ++++---- .../pylibcudf/pylibcudf/stream_compaction.pxd | 17 +- .../pylibcudf/pylibcudf/stream_compaction.pyi | 18 +- .../pylibcudf/pylibcudf/stream_compaction.pyx | 73 ++--- .../pylibcudf/strings/attributes.pxd | 9 +- .../pylibcudf/strings/attributes.pyi | 10 +- .../pylibcudf/strings/attributes.pyx | 30 +- .../pylibcudf/strings/capitalize.pxd | 9 +- .../pylibcudf/strings/capitalize.pyi | 10 +- .../pylibcudf/strings/capitalize.pyx | 32 ++- python/pylibcudf/pylibcudf/strings/case.pxd | 9 +- python/pylibcudf/pylibcudf/strings/case.pyi | 10 +- python/pylibcudf/pylibcudf/strings/case.pyx | 30 +- .../pylibcudf/strings/char_types.pxd | 7 +- .../pylibcudf/strings/char_types.pyi | 8 +- .../pylibcudf/strings/char_types.pyx | 21 +- .../pylibcudf/pylibcudf/strings/combine.pxd | 9 +- .../pylibcudf/pylibcudf/strings/combine.pyi | 10 +- .../pylibcudf/pylibcudf/strings/combine.pyx | 38 +-- .../pylibcudf/pylibcudf/strings/contains.pxd | 11 +- .../pylibcudf/pylibcudf/strings/contains.pyi | 12 +- .../pylibcudf/pylibcudf/strings/contains.pyx | 43 +-- .../strings/convert/convert_booleans.pxd | 7 +- .../strings/convert/convert_booleans.pyi | 8 +- .../strings/convert/convert_booleans.pyx | 21 +- .../strings/convert/convert_datetime.pxd | 9 +- .../strings/convert/convert_datetime.pyi | 10 +- .../strings/convert/convert_datetime.pyx | 30 +- .../strings/convert/convert_durations.pxd | 7 +- .../strings/convert/convert_durations.pyi | 8 +- .../strings/convert/convert_durations.pyx | 21 +- .../strings/convert/convert_fixed_point.pxd | 9 +- .../strings/convert/convert_fixed_point.pyi | 10 +- .../strings/convert/convert_fixed_point.pyx | 30 +- .../strings/convert/convert_floats.pxd | 9 +- .../strings/convert/convert_floats.pyi | 10 +- .../strings/convert/convert_floats.pyx | 32 ++- .../strings/convert/convert_integers.pxd | 15 +- .../strings/convert/convert_integers.pyi | 16 +- .../strings/convert/convert_integers.pyx | 59 ++-- .../strings/convert/convert_ipv4.pxd | 9 +- .../strings/convert/convert_ipv4.pyi | 10 +- .../strings/convert/convert_ipv4.pyx | 30 +- .../strings/convert/convert_lists.pxd | 5 +- .../strings/convert/convert_lists.pyi | 6 +- .../strings/convert/convert_lists.pyx | 14 +- .../strings/convert/convert_urls.pxd | 7 +- .../strings/convert/convert_urls.pyi | 8 +- .../strings/convert/convert_urls.pyx | 25 +- .../pylibcudf/pylibcudf/strings/extract.pxd | 9 +- .../pylibcudf/pylibcudf/strings/extract.pyi | 10 +- .../pylibcudf/pylibcudf/strings/extract.pyx | 30 +- python/pylibcudf/pylibcudf/strings/find.pxd | 13 +- python/pylibcudf/pylibcudf/strings/find.pyi | 14 +- python/pylibcudf/pylibcudf/strings/find.pyx | 56 ++-- .../pylibcudf/strings/find_multiple.pxd | 7 +- .../pylibcudf/strings/find_multiple.pyi | 8 +- .../pylibcudf/strings/find_multiple.pyx | 21 +- .../pylibcudf/pylibcudf/strings/findall.pxd | 7 +- .../pylibcudf/pylibcudf/strings/findall.pyi | 8 +- .../pylibcudf/pylibcudf/strings/findall.pyx | 21 +- .../pylibcudf/pylibcudf/strings/padding.pxd | 9 +- .../pylibcudf/pylibcudf/strings/padding.pyi | 10 +- .../pylibcudf/pylibcudf/strings/padding.pyx | 30 +- python/pylibcudf/pylibcudf/strings/repeat.pxd | 5 +- python/pylibcudf/pylibcudf/strings/repeat.pyi | 6 +- python/pylibcudf/pylibcudf/strings/repeat.pyx | 14 +- .../pylibcudf/pylibcudf/strings/replace.pxd | 9 +- .../pylibcudf/pylibcudf/strings/replace.pyi | 10 +- .../pylibcudf/pylibcudf/strings/replace.pyx | 32 ++- .../pylibcudf/strings/replace_re.pxd | 7 +- .../pylibcudf/strings/replace_re.pyi | 10 +- .../pylibcudf/strings/replace_re.pyx | 27 +- .../pylibcudf/pylibcudf/strings/reverse.pyi | 6 +- .../pylibcudf/pylibcudf/strings/reverse.pyx | 12 +- python/pylibcudf/pylibcudf/strings/slice.pxd | 5 +- python/pylibcudf/pylibcudf/strings/slice.pyi | 6 +- python/pylibcudf/pylibcudf/strings/slice.pyx | 20 +- .../pylibcudf/strings/split/partition.pxd | 7 +- .../pylibcudf/strings/split/partition.pyi | 8 +- .../pylibcudf/strings/split/partition.pyx | 25 +- .../pylibcudf/strings/split/split.pxd | 19 +- .../pylibcudf/strings/split/split.pyi | 20 +- .../pylibcudf/strings/split/split.pyx | 82 +++--- python/pylibcudf/pylibcudf/strings/strip.pxd | 5 +- python/pylibcudf/pylibcudf/strings/strip.pyi | 6 +- python/pylibcudf/pylibcudf/strings/strip.pyx | 14 +- .../pylibcudf/pylibcudf/strings/translate.pxd | 7 +- .../pylibcudf/pylibcudf/strings/translate.pyi | 8 +- .../pylibcudf/pylibcudf/strings/translate.pyx | 21 +- python/pylibcudf/pylibcudf/strings/wrap.pxd | 5 +- python/pylibcudf/pylibcudf/strings/wrap.pyi | 6 +- python/pylibcudf/pylibcudf/strings/wrap.pyx | 12 +- python/pylibcudf/pylibcudf/table.pxd | 7 +- python/pylibcudf/pylibcudf/table.pyi | 10 +- python/pylibcudf/pylibcudf/table.pyx | 29 +- python/pylibcudf/pylibcudf/transform.pxd | 19 +- python/pylibcudf/pylibcudf/transform.pyi | 20 +- python/pylibcudf/pylibcudf/transform.pyx | 92 ++++--- python/pylibcudf/pylibcudf/transpose.pxd | 5 +- python/pylibcudf/pylibcudf/transpose.pyi | 6 +- python/pylibcudf/pylibcudf/transpose.pyx | 12 +- python/pylibcudf/pylibcudf/unary.pxd | 15 +- python/pylibcudf/pylibcudf/unary.pyi | 16 +- python/pylibcudf/pylibcudf/unary.pyx | 59 ++-- python/pylibcudf/pylibcudf/utils.pxd | 6 +- python/pylibcudf/pylibcudf/utils.pyi | 9 +- python/pylibcudf/pylibcudf/utils.pyx | 6 +- python/pylibcudf/tests/test_experimental.py | 23 +- .../pylibcudf/tests/test_stream_protocol.py | 74 +++++ 357 files changed, 3470 insertions(+), 2967 deletions(-) create mode 100644 python/pylibcudf/tests/test_stream_protocol.py diff --git a/python/cudf_polars/cudf_polars/utils/cuda_stream.py b/python/cudf_polars/cudf_polars/utils/cuda_stream.py index a42252157b4..c0708d3bea8 100644 --- a/python/cudf_polars/cudf_polars/utils/cuda_stream.py +++ b/python/cudf_polars/cudf_polars/utils/cuda_stream.py @@ -13,6 +13,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Sequence + from pylibcudf.utils import CudaStreamLike from rmm.pylibrmm.stream import Stream @@ -27,7 +28,7 @@ def get_cuda_stream() -> Stream: def join_cuda_streams( - *, downstreams: Sequence[Stream], upstreams: Sequence[Stream] + *, downstreams: Sequence[CudaStreamLike], upstreams: Sequence[CudaStreamLike] ) -> None: """ Join multiple CUDA streams. @@ -46,7 +47,7 @@ def join_cuda_streams( def get_joined_cuda_stream( - get_cuda_stream: Callable[[], Stream], *, upstreams: Sequence[Stream] + get_cuda_stream: Callable[[], Stream], *, upstreams: Sequence[CudaStreamLike] ) -> Stream: """ Return a CUDA stream that is joined to the given streams. diff --git a/python/pylibcudf/pylibcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/binaryop.pxd index 29c9f3d98ea..a34a02b2191 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pxd +++ b/python/pylibcudf/pylibcudf/binaryop.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.binaryop cimport binary_operator from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar @@ -25,7 +24,7 @@ cpdef Column binary_operation( RightBinaryOperand rhs, binary_operator op, DataType output_type, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi index 52263440db3..1f3c9a2cb64 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyi +++ b/python/pylibcudf/pylibcudf/binaryop.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class BinaryOperator(IntEnum): ADD = ... @@ -52,7 +52,7 @@ def binary_operation( rhs: Column | Scalar, op: BinaryOperator, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_supported_operation( diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx index a46b6aaaa81..20a69d60727 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyx +++ b/python/pylibcudf/pylibcudf/binaryop.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator import dereference @@ -20,6 +20,7 @@ from .column cimport Column from .scalar cimport Scalar from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["BinaryOperator", "binary_operation", "is_supported_operation"] @@ -28,7 +29,7 @@ cpdef Column binary_operation( RightBinaryOperand rhs, binary_operator op, DataType output_type, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a binary operation between a column and another column or scalar. @@ -61,7 +62,8 @@ cpdef Column binary_operation( The result of the binary operation """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if LeftBinaryOperand is Column and RightBinaryOperand is Column: @@ -71,7 +73,7 @@ cpdef Column binary_operation( rhs.view(), op, output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) elif LeftBinaryOperand is Column and RightBinaryOperand is Scalar: @@ -81,7 +83,7 @@ cpdef Column binary_operation( dereference(rhs.c_obj), op, output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) elif LeftBinaryOperand is Scalar and RightBinaryOperand is Column: @@ -91,13 +93,13 @@ cpdef Column binary_operation( rhs.view(), op, output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid arguments {lhs} and {rhs}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef bool is_supported_operation( diff --git a/python/pylibcudf/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd index 7348d68f6de..429f85f39b0 100644 --- a/python/pylibcudf/pylibcudf/column.pxd +++ b/python/pylibcudf/pylibcudf/column.pxd @@ -6,7 +6,6 @@ from libcpp.vector cimport vector from libc.stdint cimport uint64_t from rmm.librmm.device_buffer cimport device_buffer -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport ( @@ -27,7 +26,7 @@ cdef class OwnerWithCAI: cdef dict cai @staticmethod - cdef create(column_view cv, object owner, Stream stream) + cdef create(column_view cv, object owner, object stream) cdef class OwnerMaskWithCAI: @@ -38,7 +37,7 @@ cdef class OwnerMaskWithCAI: cdef create(column_view cv, object owner) -cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=*) +cdef gpumemoryview _copy_array_to_device(object buf, object stream=*) cdef class Column: @@ -61,7 +60,7 @@ cdef class Column: @staticmethod cdef Column from_libcudf( unique_ptr[column] libcudf_col, - Stream stream, + object stream, DeviceMemoryResource mr ) @@ -72,7 +71,7 @@ cdef class Column: cdef Column from_column_view_of_arbitrary( const column_view& cv, object owner, - Stream stream, + object stream, ) @staticmethod @@ -81,10 +80,10 @@ cdef class Column: tuple shape, DataType dtype, Column base=*, - Stream stream=*, + object stream=*, ) - cpdef Scalar to_scalar(self, Stream stream=*, DeviceMemoryResource mr=*) + cpdef Scalar to_scalar(self, object stream=*, DeviceMemoryResource mr=*) cpdef DataType type(self) cpdef Column child(self, size_type index) cpdef size_type num_children(self) @@ -95,7 +94,7 @@ cdef class Column: cpdef object data(self) cpdef object null_mask(self) cpdef list children(self) - cpdef Column copy(self, Stream stream=*, DeviceMemoryResource mr=*) + cpdef Column copy(self, object stream=*, DeviceMemoryResource mr=*) cpdef uint64_t device_buffer_size(self) cpdef Column with_mask(self, object, size_type, bint validate=*) @@ -108,10 +107,10 @@ cdef class ListsColumnView: cpdef child(self) cpdef offsets(self) cdef lists_column_view view(self) nogil - cpdef Column get_sliced_child(self, Stream stream=*) + cpdef Column get_sliced_child(self, object stream=*) cdef class StructsColumnView: cdef Column _column cdef structs_column_view view(self) nogil - cpdef Column get_sliced_child(self, int index, Stream stream=*) + cpdef Column get_sliced_child(self, int index, object stream=*) diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi index 3ac4641ac13..3ff7f53f356 100644 --- a/python/pylibcudf/pylibcudf/column.pyi +++ b/python/pylibcudf/pylibcudf/column.pyi @@ -6,12 +6,12 @@ from typing import Any, Protocol, TypedDict from rmm.pylibrmm.device_buffer import DeviceBuffer from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf._interop_helpers import ArrowLike, ColumnMetadata from pylibcudf.scalar import Scalar from pylibcudf.span import Span from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class ArrayInterfaceBase(TypedDict): shape: tuple[int, ...] @@ -64,7 +64,7 @@ class Column: def num_children(self) -> int: ... def copy( self, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def device_buffer_size(self) -> int: ... @@ -77,19 +77,19 @@ class Column: def from_scalar( scalar: Scalar, size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def to_scalar( self, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Scalar: ... @staticmethod def all_null_like( like: Column, size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @staticmethod @@ -99,32 +99,34 @@ class Column: def to_arrow( self, metadata: ColumnMetadata | str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> ArrowLike: ... # Private methods below are included because polars is currently using them, # but we want to remove stubs for these private methods eventually def _to_schema(self, metadata: Any = None) -> Any: ... - def _to_host_array(self, stream: Stream) -> Any: ... + def _to_host_array(self, stream: CudaStreamLike) -> Any: ... @staticmethod def from_arrow( obj: ArrowLike, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @classmethod def from_cuda_array_interface( - cls, obj: SupportsCudaArrayInterface, stream: Stream | None = None + cls, + obj: SupportsCudaArrayInterface, + stream: CudaStreamLike | None = None, ) -> Column: ... @classmethod def from_array_interface( - cls, obj: SupportsArrayInterface, stream: Stream | None = None + cls, obj: SupportsArrayInterface, stream: CudaStreamLike | None = None ) -> Column: ... @classmethod def from_array( cls, obj: SupportsCudaArrayInterface | SupportsArrayInterface, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... @staticmethod def struct_from_children(children: Sequence[Column]) -> Column: ... @@ -132,21 +134,23 @@ class Column: def from_iterable_of_py( obj: Iterable, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... class ListsColumnView: def __init__(self, column: Column): ... def child(self) -> Column: ... def offsets(self) -> Column: ... - def get_sliced_child(self, stream: Stream | None = None) -> Column: ... + def get_sliced_child( + self, stream: CudaStreamLike | None = None + ) -> Column: ... class StructsColumnView: def __init__(self, column: Column): ... def child(self) -> Column: ... def offsets(self) -> Column: ... def get_sliced_child( - self, index: int, stream: Stream | None = None + self, index: int, stream: CudaStreamLike | None = None ) -> Column: ... def is_c_contiguous( diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx index 96137f96256..fc8745dae26 100644 --- a/python/pylibcudf/pylibcudf/column.pyx +++ b/python/pylibcudf/pylibcudf/column.pyx @@ -67,6 +67,7 @@ from itertools import accumulate import functools import operator from typing import Iterable +from cuda.bindings.cyruntime cimport cudaStream_t try: import pyarrow as pa @@ -96,7 +97,7 @@ cdef class _ArrowColumnHolder: cdef class OwnerWithCAI: """An interface for column view's data with gpumemoryview via CAI.""" @staticmethod - cdef create(column_view cv, object owner, Stream stream): + cdef create(column_view cv, object owner, object stream): obj = OwnerWithCAI() obj.owner = owner # The default size of 0 will be applied for any type that stores data in the @@ -108,7 +109,7 @@ cdef class OwnerWithCAI: # Cast to Python integers before multiplying to avoid overflow. size = int(cv.size()) * int(cpp_size_of(cv.type())) elif cv.type().id() == type_id.STRING: - size = strings_column_view(cv).chars_size(stream.view()) + size = strings_column_view(cv).chars_size((stream).view().value()) obj.cai = { "shape": (size,), @@ -156,7 +157,7 @@ class ArrayInterfaceWrapper: self.__array_interface__ = iface -cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=None): +cdef gpumemoryview _copy_array_to_device(object buf, object stream=None): """ Copy a host-side array.array buffer to device memory. @@ -175,11 +176,11 @@ cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=None): cdef memoryview mv = memoryview(buf) cdef uintptr_t ptr = mv.obj.buffer_info()[0] cdef size_t nbytes = len(mv) * mv.itemsize - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) return gpumemoryview(DeviceBuffer.to_device( ptr, - stream + _stream )) @@ -401,7 +402,7 @@ cdef class Column: def from_arrow( obj: ArrowLike, dtype: DataType | None = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ) -> ArrowLike: """ @@ -453,7 +454,8 @@ cdef class Column: cdef _ArrowColumnHolder result cdef unique_ptr[arrow_column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if hasattr(obj, "__arrow_c_device_array__"): @@ -469,7 +471,7 @@ cdef class Column: c_result = make_unique[arrow_column]( move(dereference(c_schema)), move(dereference(c_device_array)), - stream.view(), + _cs, result.mr.get_mr(), ) result.col.swap(c_result) @@ -477,7 +479,7 @@ cdef class Column: return Column.from_column_view_of_arbitrary( result.col.get().view(), result, - stream, + _stream, ) elif hasattr(obj, "__arrow_c_array__"): schema, h_array = obj.__arrow_c_array__() @@ -490,7 +492,7 @@ cdef class Column: c_result = make_unique[arrow_column]( move(dereference(c_schema)), move(dereference(c_array)), - stream.view(), + _cs, result.mr.get_mr(), ) result.col.swap(c_result) @@ -498,7 +500,7 @@ cdef class Column: return Column.from_column_view_of_arbitrary( result.col.get().view(), result, - stream, + _stream, ) elif hasattr(obj, "__arrow_c_stream__"): arrow_stream = obj.__arrow_c_stream__() @@ -514,7 +516,7 @@ cdef class Column: with nogil: c_result = make_unique[arrow_column]( move(dereference(c_arrow_stream)), - stream.view(), + _cs, result.mr.get_mr(), ) result.col.swap(c_result) @@ -522,7 +524,7 @@ cdef class Column: return Column.from_column_view_of_arbitrary( result.col.get().view(), result, - stream, + _stream, ) elif hasattr(obj, "__arrow_c_device_stream__"): # TODO: When we add support for this case, it should be moved above @@ -656,7 +658,7 @@ cdef class Column: @staticmethod cdef Column from_libcudf( unique_ptr[column] libcudf_col, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Column from a libcudf column. @@ -667,6 +669,7 @@ cdef class Column: """ assert stream is not None, "stream cannot be None" assert mr is not None, "mr cannot be None" + cdef Stream _stream = stream cdef DataType dtype = DataType.from_libcudf(libcudf_col.get().type()) cdef size_type size = libcudf_col.get().size() @@ -677,13 +680,13 @@ cdef class Column: # Note that when converting to cudf Column objects we'll need to pull # out the base object. cdef gpumemoryview data = gpumemoryview( - DeviceBuffer.c_from_unique_ptr(move(contents.data), stream, mr) + DeviceBuffer.c_from_unique_ptr(move(contents.data), _stream, mr) ) cdef gpumemoryview mask = None if null_count > 0: mask = gpumemoryview( - DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), stream, mr) + DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), _stream, mr) ) children = [] @@ -772,7 +775,7 @@ cdef class Column: cdef Column from_column_view_of_arbitrary( const column_view& cv, object owner, - Stream stream, + object stream, ): """Create a Column from a libcudf column_view into an arbitrary owner. @@ -818,7 +821,7 @@ cdef class Column: def from_scalar( Scalar slr, size_type size, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a Column from a Scalar. @@ -839,18 +842,19 @@ cdef class Column: """ cdef const scalar* c_scalar = slr.get() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = make_column_from_scalar( dereference(c_scalar), size, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) - cpdef Scalar to_scalar(self, Stream stream=None, DeviceMemoryResource mr=None): + cpdef Scalar to_scalar(self, object stream=None, DeviceMemoryResource mr=None): """ Return the first value of 1-element column as a Scalar. @@ -873,11 +877,12 @@ cdef class Column: cdef column_view cv = self.view() cdef unique_ptr[scalar] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = get_element(cv, 0, stream.view(), mr.get_mr()) + result = get_element(cv, 0, _cs, mr.get_mr()) return Scalar.from_libcudf(move(result)) @@ -885,7 +890,7 @@ cdef class Column: def all_null_like( Column like, size_type size, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create an all null column from a template. @@ -904,18 +909,19 @@ cdef class Column: Column An all-null column of `size` rows and type matching `like`. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) - cdef Scalar slr = Scalar.empty_like(like, stream, mr) + cdef Scalar slr = Scalar.empty_like(like, _stream, mr) cdef unique_ptr[column] c_result with nogil: c_result = make_column_from_scalar( dereference(slr.get()), size, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) @staticmethod cdef Column _wrap_nested_list_column( @@ -923,7 +929,7 @@ cdef class Column: tuple shape, DataType dtype, Column base=None, - Stream stream=None, + object stream=None, ): """ Construct a list Column from a gpumemoryview and array @@ -937,7 +943,7 @@ cdef class Column: """ ndim = len(shape) flat_size = functools.reduce(operator.mul, shape) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) if base is None: base = Column( @@ -958,9 +964,9 @@ cdef class Column: offsets_col = sequence( outer_len + 1, - Scalar.from_py(0, int32_dtype, stream=stream), - Scalar.from_py(shape[i], int32_dtype, stream=stream), - stream, + Scalar.from_py(0, int32_dtype, stream=_stream), + Scalar.from_py(shape[i], int32_dtype, stream=_stream), + _stream, ) nested = Column( @@ -976,7 +982,7 @@ cdef class Column: return nested @classmethod - def from_array_interface(cls, obj, Stream stream=None): + def from_array_interface(cls, obj, object stream=None): """ Create a Column from an object implementing the NumPy Array Interface. @@ -1016,21 +1022,21 @@ cdef class Column: cdef const unsigned char* ptr cdef const unsigned char[:] view - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) if nbytes > 0: ptr = data_ptr view = ( ptr)[:nbytes] - dbuf = DeviceBuffer.to_device(view, stream) + dbuf = DeviceBuffer.to_device(view, _stream) else: - dbuf = DeviceBuffer(size=0, stream=stream) + dbuf = DeviceBuffer(size=0, stream=_stream) return Column._wrap_nested_list_column( - gpumemoryview(dbuf), shape, dtype, None, stream + gpumemoryview(dbuf), shape, dtype, None, _stream ) @classmethod - def from_cuda_array_interface(cls, obj, Stream stream=None): + def from_cuda_array_interface(cls, obj, object stream=None): """ Create a Column from an object implementing the CUDA Array Interface. @@ -1069,7 +1075,7 @@ cdef class Column: ) @classmethod - def from_array(cls, obj, Stream stream=None): + def from_array(cls, obj, object stream=None): """ Create a Column from any object which supports the NumPy or CUDA array interface. @@ -1115,7 +1121,7 @@ cdef class Column: def from_iterable_of_py( obj: Iterable, dtype: DataType | None = None, - Stream stream=None + object stream=None ) -> Column: """ Create a Column from a Python iterable of scalar values or nested iterables. @@ -1364,14 +1370,15 @@ cdef class Column: """The children of the column.""" return self._children - cpdef Column copy(self, Stream stream=None, DeviceMemoryResource mr=None): + cpdef Column copy(self, object stream=None, DeviceMemoryResource mr=None): """Create a copy of the column.""" cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = make_unique[column](self.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = make_unique[column](self.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef uint64_t device_buffer_size(self): """ @@ -1419,10 +1426,12 @@ cdef class Column: return PyCapsule_New(raw_schema_ptr, 'arrow_schema', _release_schema) - def _to_host_array(self, Stream stream): + def _to_host_array(self, object stream): cdef ArrowArray* raw_host_array_ptr + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: - raw_host_array_ptr = to_arrow_host_raw(self.view(), stream.view()) + raw_host_array_ptr = to_arrow_host_raw(self.view(), _cs) return PyCapsule_New(raw_host_array_ptr, "arrow_array", _release_array) @@ -1484,7 +1493,7 @@ cdef class ListsColumnView: """ return lists_column_view(self._column.view()) - cpdef Column get_sliced_child(self, Stream stream=None): + cpdef Column get_sliced_child(self, object stream=None): """ Get the list elements child properly sliced to match parent's view. @@ -1498,9 +1507,9 @@ cdef class ListsColumnView: Column The sliced elements column """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) - cdef column_view c_child = self.view().get_sliced_child(stream.view()) + cdef column_view c_child = self.view().get_sliced_child(_stream.view().value()) return Column.from_column_view(c_child, self._column.child(1)) @@ -1522,7 +1531,7 @@ cdef class StructsColumnView: """ return structs_column_view(self._column.view()) - cpdef Column get_sliced_child(self, int index, Stream stream=None): + cpdef Column get_sliced_child(self, int index, object stream=None): """ Get the struct elements child properly sliced to match parent's view. @@ -1538,9 +1547,10 @@ cdef class StructsColumnView: Column The sliced elements column """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) - cdef column_view c_child = self.view().get_sliced_child(index, stream.view()) + cdef cudaStream_t _cs = _stream.view().value() + cdef column_view c_child = self.view().get_sliced_child(index, _cs) return Column.from_column_view(c_child, self._column.child(index)) diff --git a/python/pylibcudf/pylibcudf/column_factories.pxd b/python/pylibcudf/pylibcudf/column_factories.pxd index d26b3396e30..3f9841c045d 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pxd +++ b/python/pylibcudf/pylibcudf/column_factories.pxd @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 from pylibcudf.libcudf.types cimport mask_state from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .types cimport DataType, size_type, type_id @@ -20,7 +19,7 @@ cpdef Column make_numeric_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -28,7 +27,7 @@ cpdef Column make_fixed_point_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -36,7 +35,7 @@ cpdef Column make_timestamp_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -44,7 +43,7 @@ cpdef Column make_duration_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -52,18 +51,18 @@ cpdef Column make_fixed_width_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column make_empty_column( MakeEmptyColumnOperand type_or_id, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column make_empty_lists_column( DataType child_type, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/column_factories.pyi b/python/pylibcudf/pylibcudf/column_factories.pyi index 66d46d88949..a9e92c5f823 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pyi +++ b/python/pylibcudf/pylibcudf/column_factories.pyi @@ -1,53 +1,53 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType, MaskState, TypeId +from pylibcudf.utils import CudaStreamLike def make_numeric_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_fixed_point_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_timestamp_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_duration_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_fixed_width_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_empty_column( type_or_id: DataType | TypeId, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_empty_lists_column( child_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx index 0848f1aff03..45d590f4106 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pyx +++ b/python/pylibcudf/pylibcudf/column_factories.pyx @@ -20,6 +20,7 @@ from .types cimport DataType, type_id from .types import MaskState, TypeId from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -34,7 +35,7 @@ __all__ = [ cpdef Column make_empty_column( MakeEmptyColumnOperand type_or_id, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates an empty column of the specified type. @@ -53,7 +54,7 @@ cpdef Column make_empty_column( """ cdef unique_ptr[column] result cdef type_id id - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) if MakeEmptyColumnOperand is object: @@ -75,14 +76,14 @@ cpdef Column make_empty_column( raise TypeError( "Must pass a TypeId or DataType" ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_numeric_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates an empty numeric column. @@ -102,7 +103,8 @@ cpdef Column make_numeric_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -110,17 +112,17 @@ cpdef Column make_numeric_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_fixed_point_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -136,7 +138,8 @@ cpdef Column make_fixed_point_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -144,18 +147,18 @@ cpdef Column make_fixed_point_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_timestamp_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -171,7 +174,8 @@ cpdef Column make_timestamp_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -179,18 +183,18 @@ cpdef Column make_timestamp_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_duration_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -206,7 +210,8 @@ cpdef Column make_duration_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -214,18 +219,18 @@ cpdef Column make_duration_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_fixed_width_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -241,7 +246,8 @@ cpdef Column make_fixed_width_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -249,16 +255,16 @@ cpdef Column make_fixed_width_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_empty_lists_column( DataType child_type, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates an empty column of the specified type. @@ -276,10 +282,10 @@ cpdef Column make_empty_lists_column( An empty Column """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) with nogil: result = cpp_make_empty_lists_column(child_type.c_obj) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/concatenate.pxd index 60adf27c9a3..60189ba4406 100644 --- a/python/pylibcudf/pylibcudf/concatenate.pxd +++ b/python/pylibcudf/pylibcudf/concatenate.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from .table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -11,4 +10,4 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource # unify the column and table paths without using runtime dispatch instead. In this case # we choose to prioritize API consistency over performance, so we use the same function # with a bit of runtime dispatch overhead. -cpdef concatenate(list objects, Stream stream=*, DeviceMemoryResource mr=*) +cpdef concatenate(list objects, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/concatenate.pyi b/python/pylibcudf/pylibcudf/concatenate.pyi index 18e8bff2e2f..59379e01c46 100644 --- a/python/pylibcudf/pylibcudf/concatenate.pyi +++ b/python/pylibcudf/pylibcudf/concatenate.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def concatenate[ColumnOrTable: (Column, Table)]( objects: list[ColumnOrTable], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> ColumnOrTable: ... diff --git a/python/pylibcudf/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx index 36fa0984a68..9921d5b1a39 100644 --- a/python/pylibcudf/pylibcudf/concatenate.pyx +++ b/python/pylibcudf/pylibcudf/concatenate.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -16,10 +16,11 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["concatenate"] -cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None): +cpdef concatenate(list objects, object stream=None, DeviceMemoryResource mr=None): """Concatenate columns or tables. Parameters @@ -41,7 +42,8 @@ cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None cdef vector[column_view] c_columns cdef vector[table_view] c_tables - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef unique_ptr[column] c_col_result @@ -53,17 +55,17 @@ cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None with nogil: c_tbl_result = cpp_concatenate.concatenate( - c_tables, stream.view(), mr.get_mr() + c_tables, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_tbl_result), stream, mr) + return Table.from_libcudf(move(c_tbl_result), _stream, mr) elif isinstance(objects[0], Column): for column in objects: c_columns.push_back((column).view()) with nogil: c_col_result = cpp_concatenate.concatenate( - c_columns, stream.view(), mr.get_mr() + c_columns, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_col_result), stream, mr) + return Column.from_libcudf(move(c_col_result), _stream, mr) else: raise ValueError("input must be a list of Columns or Tables") diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/contiguous_split.pxd index a294e70a4a6..95259723dfa 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/contiguous_split.pxd @@ -32,13 +32,13 @@ cdef class HostBuffer: cdef class PackedColumns: cdef unique_ptr[packed_columns] c_obj - cdef Stream stream + cdef object stream cdef DeviceMemoryResource mr @staticmethod cdef PackedColumns from_libcudf( unique_ptr[packed_columns] data, - Stream stream, + object stream, DeviceMemoryResource mr ) cpdef tuple release(self) @@ -58,10 +58,10 @@ cdef class ChunkedPack: cpdef PackedColumns pack(Table input) -cpdef Table unpack(PackedColumns input, Stream stream=*) +cpdef Table unpack(PackedColumns input, object stream = *) cpdef Table unpack_from_memoryviews( memoryview metadata, object gpu_data, - Stream stream=*, + object stream = *, ) diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi index df241c079ae..6e0e653b5bb 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyi +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi @@ -2,28 +2,30 @@ # SPDX-License-Identifier: Apache-2.0 from rmm.mr import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.gpumemoryview import gpumemoryview from pylibcudf.span import Span from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class PackedColumns: def __init__(self): ... def release( - self, stream: Stream | None = None + self, stream: CudaStreamLike | None = None ) -> tuple[memoryview[bytes], gpumemoryview]: ... def pack( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> PackedColumns: ... -def unpack(input: PackedColumns, stream: Stream | None = None) -> Table: ... +def unpack( + input: PackedColumns, stream: CudaStreamLike | None = None +) -> Table: ... def unpack_from_memoryviews( metadata: memoryview[bytes], gpu_data: Span, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Table: ... class ChunkedPack: @@ -32,7 +34,7 @@ class ChunkedPack: def create( input: Table, user_buffer_size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, temp_mr: DeviceMemoryResource | None = None, ) -> ChunkedPack: ... def has_next(self) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx index 6b24def5dc8..239d89d6470 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyx +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx @@ -15,6 +15,8 @@ from cuda.bindings.cyruntime cimport ( cudaError_t, cudaMemcpyAsync, cudaMemcpyKind, + cudaStream_t, + cudaStreamSynchronize, ) from pylibcudf.libcudf.contiguous_split cimport ( @@ -27,7 +29,6 @@ from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.utilities.span cimport device_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view from rmm.pylibrmm.device_buffer cimport DeviceBuffer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream @@ -36,6 +37,7 @@ from .gpumemoryview cimport gpumemoryview from .table cimport Table from .span import is_span from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -105,7 +107,7 @@ cdef class PackedColumns: @staticmethod cdef PackedColumns from_libcudf( unique_ptr[packed_columns] data, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Python PackedColumns from a libcudf packed_columns.""" @@ -163,7 +165,7 @@ cdef class ChunkedPack: def create( Table input, size_t user_buffer_size, - Stream stream=None, + object stream=None, DeviceMemoryResource temp_mr=None, ): """ @@ -184,16 +186,16 @@ cdef class ChunkedPack: ------- New ChunkedPack object. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) temp_mr = _get_memory_resource(temp_mr) cdef unique_ptr[chunked_pack] obj = chunked_pack.create( - input.view(), user_buffer_size, stream.view(), temp_mr.get_mr() + input.view(), user_buffer_size, _stream.view().value(), temp_mr.get_mr() ) cdef ChunkedPack out = ChunkedPack.__new__(ChunkedPack) out.table = input out.mr = temp_mr - out.stream = stream + out.stream = _stream out.c_obj = move(obj) return out @@ -292,7 +294,8 @@ cdef class ChunkedPack: dereference(self.c_obj).get_total_contiguous_size() ) ) - cdef cuda_stream_view stream = self.stream.view() + cdef Stream py_stream = self.stream + cdef cudaStream_t stream = py_stream.view().value() with nogil: while dereference(self.c_obj).has_next(): size = dereference(self.c_obj).next(d_span) @@ -301,22 +304,22 @@ cdef class ChunkedPack: d_span.data(), size, cudaMemcpyKind.cudaMemcpyDeviceToHost, - stream.value(), + stream, ) offset += size if err != cudaError.cudaSuccess: - stream.synchronize() + cudaStreamSynchronize(stream) raise RuntimeError( f"Memcpy in pack_to_host failed error: {err}" ) - stream.synchronize() + cudaStreamSynchronize(stream) return ( self.build_metadata(), memoryview(HostBuffer.from_unique_ptr(move(h_buf))), ) -cpdef PackedColumns pack(Table input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef PackedColumns pack(Table input, object stream=None, DeviceMemoryResource mr=None): """Deep-copy a table into a serialized contiguous memory format. Later use `unpack` or `unpack_from_memoryviews` to unpack the serialized @@ -346,16 +349,17 @@ cpdef PackedColumns pack(Table input, Stream stream=None, DeviceMemoryResource m For details, see :cpp:func:`pack`. """ cdef unique_ptr[packed_columns] pack - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: pack = move(make_unique[packed_columns]( - cpp_pack(input.view(), stream.view(), mr.get_mr()) + cpp_pack(input.view(), _cs, mr.get_mr()) )) - return PackedColumns.from_libcudf(move(pack), stream, mr) + return PackedColumns.from_libcudf(move(pack), _stream, mr) -cpdef Table unpack(PackedColumns input, Stream stream=None): +cpdef Table unpack(PackedColumns input, object stream=None): """Deserialize the result of `pack`. Copies the result of a serialized table into a table. @@ -375,16 +379,16 @@ cpdef Table unpack(PackedColumns input, Stream stream=None): Copy of the packed columns. """ cdef table_view v - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) with nogil: v = cpp_unpack(dereference(input.c_obj)) - return Table.from_table_view_of_arbitrary(v, input, stream) + return Table.from_table_view_of_arbitrary(v, input, _stream) cpdef Table unpack_from_memoryviews( memoryview metadata, object gpu_data, - Stream stream=None, + object stream=None, ): """Deserialize the result of `pack`. @@ -406,7 +410,7 @@ cpdef Table unpack_from_memoryviews( Table Copy of the packed columns. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) cdef device_span[uint8_t] d_span = _get_device_span(gpu_data) if metadata.nbytes == 0: @@ -416,7 +420,7 @@ cpdef Table unpack_from_memoryviews( # used for any operations. return Table.from_libcudf( make_unique[table](table_view()), - stream, + _stream, _get_memory_resource(), ) @@ -428,4 +432,4 @@ cpdef Table unpack_from_memoryviews( cdef table_view v with nogil: v = cpp_unpack(metadata_ptr, gpu_data_ptr) - return Table.from_table_view_of_arbitrary(v, gpu_data, stream) + return Table.from_table_view_of_arbitrary(v, gpu_data, _stream) diff --git a/python/pylibcudf/pylibcudf/copying.pxd b/python/pylibcudf/pylibcudf/copying.pxd index caaa590de15..4143e846994 100644 --- a/python/pylibcudf/pylibcudf/copying.pxd +++ b/python/pylibcudf/pylibcudf/copying.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool as cbool @@ -9,7 +9,6 @@ from pylibcudf.libcudf.copying cimport ( from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar @@ -40,7 +39,7 @@ cpdef Table gather( Table source_table, Column gather_map, out_of_bounds_policy bounds_policy, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -48,19 +47,19 @@ cpdef Table scatter( TableOrListOfScalars source, Column scatter_map, Table target_table, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef ColumnOrTable empty_like( - ColumnOrTable input, Stream stream=*, DeviceMemoryResource mr=* + ColumnOrTable input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column allocate_like( Column input_column, mask_allocation_policy policy, size=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -70,7 +69,7 @@ cpdef Column copy_range_in_place( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=*, + object stream = *, ) cpdef Column copy_range( @@ -79,7 +78,7 @@ cpdef Column copy_range( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -87,19 +86,19 @@ cpdef Column shift( Column input, size_type offset, Scalar fill_value, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) -cpdef list slice(ColumnOrTable input, list indices, Stream stream=*) +cpdef list slice(ColumnOrTable input, list indices, object stream = *) -cpdef list split(ColumnOrTable input, list splits, Stream stream=*) +cpdef list split(ColumnOrTable input, list splits, object stream = *) cpdef Column copy_if_else( LeftCopyIfElseOperand lhs, RightCopyIfElseOperand rhs, Column boolean_mask, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -107,13 +106,13 @@ cpdef Table boolean_mask_scatter( TableOrListOfScalars input, Table target, Column boolean_mask, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Scalar get_element( Column input_column, size_type index, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi index 04acecc2f1b..bdff6cddad5 100644 --- a/python/pylibcudf/pylibcudf/copying.pyi +++ b/python/pylibcudf/pylibcudf/copying.pyi @@ -1,15 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from typing import TypeVar from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class MaskAllocationPolicy(IntEnum): NEVER = ... @@ -26,26 +26,26 @@ def gather( source_table: Table, gather_map: Column, bounds_policy: OutOfBoundsPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def scatter( source: Table | list[Scalar], scatter_map: Column, target_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def empty_like( input: ColumnOrTable, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> ColumnOrTable: ... def allocate_like( input_column: Column, policy: MaskAllocationPolicy, size: int | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def copy_range_in_place( @@ -54,7 +54,7 @@ def copy_range_in_place( input_begin: int, input_end: int, target_begin: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... def copy_range( input_column: Column, @@ -62,39 +62,43 @@ def copy_range( input_begin: int, input_end: int, target_begin: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def shift( input: Column, offset: int, fill_value: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def slice( - input: ColumnOrTable, indices: list[int], stream: Stream | None = None + input: ColumnOrTable, + indices: list[int], + stream: CudaStreamLike | None = None, ) -> list[ColumnOrTable]: ... def split( - input: ColumnOrTable, splits: list[int], stream: Stream | None = None + input: ColumnOrTable, + splits: list[int], + stream: CudaStreamLike | None = None, ) -> list[ColumnOrTable]: ... def copy_if_else( lhs: Column | Scalar, rhs: Column | Scalar, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def boolean_mask_scatter( input: Table | list[Scalar], target: Table, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def get_element( input_column: Column, index: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Scalar: ... diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx index f8f44e03938..30be1ea7d0a 100644 --- a/python/pylibcudf/pylibcudf/copying.pyx +++ b/python/pylibcudf/pylibcudf/copying.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator import dereference @@ -40,6 +40,7 @@ from .column cimport Column from .scalar cimport Scalar from .table cimport Table from .utils cimport _as_vector, _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -64,7 +65,7 @@ cpdef Table gather( Table source_table, Column gather_map, out_of_bounds_policy bounds_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Select rows from source_table according to the provided gather_map. @@ -94,7 +95,8 @@ cpdef Table gather( If the gather_map contains nulls. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -102,18 +104,18 @@ cpdef Table gather( source_table.view(), gather_map.view(), bounds_policy, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table scatter( TableOrListOfScalars source, Column scatter_map, Table target_table, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Scatter from source into target_table according to scatter_map. @@ -155,7 +157,8 @@ cpdef Table scatter( """ cdef unique_ptr[table] c_result cdef vector[reference_wrapper[const scalar]] source_scalars - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if TableOrListOfScalars is Table: @@ -164,7 +167,7 @@ cpdef Table scatter( source.view(), scatter_map.view(), target_table.view(), - stream.view(), + _cs, mr.get_mr() ) else: @@ -174,14 +177,14 @@ cpdef Table scatter( source_scalars, scatter_map.view(), target_table.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef ColumnOrTable empty_like( - ColumnOrTable input, Stream stream=None, DeviceMemoryResource mr=None + ColumnOrTable input, object stream=None, DeviceMemoryResource mr=None ): """Create an empty column or table with the same type as ``input``. @@ -201,23 +204,23 @@ cpdef ColumnOrTable empty_like( """ cdef unique_ptr[table] c_tbl_result cdef unique_ptr[column] c_col_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) if ColumnOrTable is Column: with nogil: c_col_result = cpp_copying.empty_like(input.view()) - return Column.from_libcudf(move(c_col_result), stream, mr) + return Column.from_libcudf(move(c_col_result), _stream, mr) else: with nogil: c_tbl_result = cpp_copying.empty_like(input.view()) - return Table.from_libcudf(move(c_tbl_result), stream, mr) + return Table.from_libcudf(move(c_tbl_result), _stream, mr) cpdef Column allocate_like( Column input_column, mask_allocation_policy policy, size=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Allocate a column with the same type as input_column. @@ -244,7 +247,8 @@ cpdef Column allocate_like( cdef unique_ptr[column] c_result cdef size_type c_size = size if size is not None else input_column.size() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -252,11 +256,11 @@ cpdef Column allocate_like( input_column.view(), c_size, policy, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column copy_range_in_place( @@ -265,7 +269,7 @@ cpdef Column copy_range_in_place( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=None + object stream=None ): """Copy a range of elements from input_column to target_column. @@ -301,7 +305,8 @@ cpdef Column copy_range_in_place( """ cdef mutable_column_view target_view = target_column.mutable_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: cpp_copying.copy_range_in_place( @@ -310,7 +315,7 @@ cpdef Column copy_range_in_place( input_begin, input_end, target_begin, - stream.view() + _cs ) target_column.set_null_count(target_view.null_count()) @@ -321,7 +326,7 @@ cpdef Column copy_range( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copy a range of elements from input_column to target_column. @@ -357,7 +362,8 @@ cpdef Column copy_range( If target and source have different types. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -367,18 +373,18 @@ cpdef Column copy_range( input_begin, input_end, target_begin, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column shift( Column input, size_type offset, Scalar fill_value, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Shift the elements of input by offset. @@ -409,7 +415,8 @@ cpdef Column shift( of fixed width or string type. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -417,13 +424,13 @@ cpdef Column shift( input.view(), offset, dereference(fill_value.c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): +cpdef list slice(ColumnOrTable input, list indices, object stream=None): """Slice input according to indices. For details on the implementation, see :cpp:func:`slice`. @@ -454,11 +461,12 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): cdef vector[column_view] c_col_result cdef vector[table_view] c_tbl_result cdef int i - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() if ColumnOrTable is Column: with nogil: - c_col_result = cpp_copying.slice(input.view(), c_indices, stream.view()) + c_col_result = cpp_copying.slice(input.view(), c_indices, _cs) return [ Column.from_column_view(c_col_result[i], input) @@ -466,7 +474,7 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): ] else: with nogil: - c_tbl_result = cpp_copying.slice(input.view(), c_indices, stream.view()) + c_tbl_result = cpp_copying.slice(input.view(), c_indices, _cs) return [ Table.from_table_view(c_tbl_result[i], input) @@ -474,7 +482,7 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): ] -cpdef list split(ColumnOrTable input, list splits, Stream stream=None): +cpdef list split(ColumnOrTable input, list splits, object stream=None): """Split input into multiple. For details on the implementation, see :cpp:func:`split`. @@ -497,11 +505,12 @@ cpdef list split(ColumnOrTable input, list splits, Stream stream=None): cdef vector[column_view] c_col_result cdef vector[table_view] c_tbl_result cdef int i - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() if ColumnOrTable is Column: with nogil: - c_col_result = cpp_copying.split(input.view(), c_splits, stream.view()) + c_col_result = cpp_copying.split(input.view(), c_splits, _cs) return [ Column.from_column_view(c_col_result[i], input) @@ -509,7 +518,7 @@ cpdef list split(ColumnOrTable input, list splits, Stream stream=None): ] else: with nogil: - c_tbl_result = cpp_copying.split(input.view(), c_splits, stream.view()) + c_tbl_result = cpp_copying.split(input.view(), c_splits, _cs) return [ Table.from_table_view(c_tbl_result[i], input) @@ -521,7 +530,7 @@ cpdef Column copy_if_else( LeftCopyIfElseOperand lhs, RightCopyIfElseOperand rhs, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copy elements from lhs or rhs into a new column according to boolean_mask. @@ -556,7 +565,8 @@ cpdef Column copy_if_else( columns), or if lhs and rhs are not of the same length (if both are columns). """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Column: @@ -565,7 +575,7 @@ cpdef Column copy_if_else( lhs.view(), rhs.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) elif LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Scalar: @@ -574,7 +584,7 @@ cpdef Column copy_if_else( lhs.view(), dereference(rhs.c_obj), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) elif LeftCopyIfElseOperand is Scalar and RightCopyIfElseOperand is Column: @@ -583,7 +593,7 @@ cpdef Column copy_if_else( dereference(lhs.c_obj), rhs.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) else: @@ -592,18 +602,18 @@ cpdef Column copy_if_else( dereference(lhs.c_obj), dereference(rhs.c_obj), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Table boolean_mask_scatter( TableOrListOfScalars input, Table target, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Scatter rows from input into target according to boolean_mask. @@ -641,7 +651,8 @@ cpdef Table boolean_mask_scatter( """ cdef unique_ptr[table] result cdef vector[reference_wrapper[const scalar]] source_scalars - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if TableOrListOfScalars is Table: @@ -650,7 +661,7 @@ cpdef Table boolean_mask_scatter( input.view(), target.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) else: @@ -660,17 +671,17 @@ cpdef Table boolean_mask_scatter( source_scalars, target.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef Scalar get_element( Column input_column, size_type index, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Get the element at index from input_column. @@ -697,12 +708,13 @@ cpdef Scalar get_element( If index is out of bounds. """ cdef unique_ptr[scalar] c_output - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_output = cpp_copying.get_element( - input_column.view(), index, stream.view(), mr.get_mr() + input_column.view(), index, _cs, mr.get_mr() ) return Scalar.from_libcudf(move(c_output)) diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd index 1a93ee62c43..d7d15f0c19f 100644 --- a/python/pylibcudf/pylibcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/datetime.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -14,54 +13,54 @@ ctypedef fused ColumnOrScalar: cpdef Column extract_datetime_component( Column input, datetime_component component, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column ceil_datetimes( Column input, rounding_frequency freq, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column floor_datetimes( Column input, rounding_frequency freq, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column round_datetimes( Column input, rounding_frequency freq, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column add_calendrical_months( Column timestamps, ColumnOrScalar months, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column day_of_year( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column is_leap_year( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column last_day_of_month( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column extract_quarter( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column days_in_month( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi index abcc608daa4..e671d2d18cf 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyi +++ b/python/pylibcudf/pylibcudf/datetime.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class DatetimeComponent(IntEnum): YEAR = ... @@ -33,55 +33,55 @@ class RoundingFrequency(IntEnum): def extract_datetime_component( input: Column, component: DatetimeComponent, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def ceil_datetimes( input: Column, freq: RoundingFrequency, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def floor_datetimes( input: Column, freq: RoundingFrequency, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def round_datetimes( input: Column, freq: RoundingFrequency, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def add_calendrical_months( input: Column, months: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def day_of_year( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_leap_year( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def last_day_of_month( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def extract_quarter( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def days_in_month( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index 2a837c5b749..1e5270bad92 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -30,6 +30,7 @@ from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "DatetimeComponent", @@ -49,7 +50,7 @@ __all__ = [ cpdef Column extract_datetime_component( Column input, datetime_component component, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -73,19 +74,20 @@ cpdef Column extract_datetime_component( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_extract_datetime_component( - input.view(), component, stream.view(), mr.get_mr() + input.view(), component, _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column ceil_datetimes( Column input, rounding_frequency freq, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -109,17 +111,18 @@ cpdef Column ceil_datetimes( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_ceil_datetimes(input.view(), freq, stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_ceil_datetimes(input.view(), freq, _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column floor_datetimes( Column input, rounding_frequency freq, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -143,17 +146,18 @@ cpdef Column floor_datetimes( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_floor_datetimes(input.view(), freq, stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_floor_datetimes(input.view(), freq, _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column round_datetimes( Column input, rounding_frequency freq, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -177,17 +181,18 @@ cpdef Column round_datetimes( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_round_datetimes(input.view(), freq, stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_round_datetimes(input.view(), freq, _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column add_calendrical_months( Column input, ColumnOrScalar months, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -216,7 +221,8 @@ cpdef Column add_calendrical_months( cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -224,13 +230,13 @@ cpdef Column add_calendrical_months( input.view(), months.view() if ColumnOrScalar is Column else dereference(months.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column day_of_year( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Computes the day number since the start of @@ -253,15 +259,16 @@ cpdef Column day_of_year( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_day_of_year(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_day_of_year(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column is_leap_year( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Check if the year of the given date is a leap year. @@ -283,15 +290,16 @@ cpdef Column is_leap_year( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_is_leap_year(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_is_leap_year(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column last_day_of_month( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Computes the last day of the month. @@ -313,15 +321,16 @@ cpdef Column last_day_of_month( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_last_day_of_month(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_last_day_of_month(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column extract_quarter( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns the quarter (ie. a value from {1, 2, 3, 4}) @@ -343,15 +352,16 @@ cpdef Column extract_quarter( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_extract_quarter(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_extract_quarter(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column days_in_month( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Extract the number of days in the month. @@ -372,12 +382,13 @@ cpdef Column days_in_month( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_days_in_month(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_days_in_month(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) DatetimeComponent.__str__ = DatetimeComponent.__repr__ RoundingFrequency.__str__ = RoundingFrequency.__repr__ diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd b/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd index db9ca865197..832d572b467 100644 --- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd +++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd @@ -1,6 +1,5 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream -cpdef void join_streams(list streams, Stream stream) +cpdef void join_streams(list streams, object stream) diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi index 522239c6a80..c9c2ba79e36 100644 --- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi +++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi @@ -1,6 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream import Stream +from pylibcudf.utils import CudaStreamLike -def join_streams(streams: list[Stream], stream: Stream) -> None: ... +def join_streams( + streams: list[CudaStreamLike], stream: CudaStreamLike +) -> None: ... diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx index 7f3d2f228fb..d9efcb19ed9 100644 --- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx +++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx @@ -1,21 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from cuda.bindings.cyruntime cimport cudaStream_t from libcpp.vector cimport vector from pylibcudf.libcudf.detail.utilities cimport stream_pool as cpp_stream_pool +from pylibcudf.libcudf.detail.utilities.stream_pool cimport const_cudaStream_t from pylibcudf.libcudf.utilities.span cimport host_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view from rmm.pylibrmm.stream cimport Stream -ctypedef const cuda_stream_view const_cuda_stream_view +from ..utils cimport _get_stream __all__ = ["join_streams"] -cpdef void join_streams(list streams, Stream stream): +cpdef void join_streams(list streams, object stream): """Synchronize a stream to an event on a set of streams. This function synchronizes the joined stream with the waited-on streams @@ -42,15 +43,16 @@ cpdef void join_streams(list streams, Stream stream): >>> plc.experimental.join_streams([stream1, stream2], join_stream) >>> # ... continue work on join_stream ... """ - cdef Stream c_stream = stream - cdef vector[cuda_stream_view] c_streams + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + cdef vector[cudaStream_t] c_streams c_streams.reserve(len(streams)) for s in streams: - c_streams.push_back((s).view()) + c_streams.push_back((_get_stream(s)).view().value()) with nogil: cpp_stream_pool.join_streams( - host_span[const_cuda_stream_view](c_streams.data(), c_streams.size()), - c_stream.view() + host_span[const_cudaStream_t](c_streams.data(), c_streams.size()), + _cs ) diff --git a/python/pylibcudf/pylibcudf/filling.pxd b/python/pylibcudf/pylibcudf/filling.pxd index b90d567b2c2..acb92e0212a 100644 --- a/python/pylibcudf/pylibcudf/filling.pxd +++ b/python/pylibcudf/pylibcudf/filling.pxd @@ -1,7 +1,6 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.libcudf.types cimport size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -17,7 +16,7 @@ cpdef Column fill( size_type begin, size_type end, Scalar value, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -26,21 +25,21 @@ cpdef void fill_in_place( size_type c_begin, size_type c_end, Scalar value, - Stream stream = *, + object stream = *, ) cpdef Column sequence( size_type size, Scalar init, Scalar step, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Table repeat( Table input_table, ColumnOrSize count, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -48,6 +47,6 @@ cpdef Column calendrical_month_sequence( size_type n, Scalar init, size_type months, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi index a1023f8016c..2789ecd5aca 100644 --- a/python/pylibcudf/pylibcudf/filling.pyi +++ b/python/pylibcudf/pylibcudf/filling.pyi @@ -1,32 +1,33 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream import Stream - from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def fill( destination: Column, begin: int, end: int, value: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... def fill_in_place( destination: Column, begin: int, end: int, value: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> None: ... def sequence( - size: int, init: Scalar, step: Scalar, stream: Stream | None = None + size: int, init: Scalar, step: Scalar, stream: CudaStreamLike | None = None ) -> Column: ... def repeat( - input_table: Table, count: Column | int, stream: Stream | None = None + input_table: Table, + count: Column | int, + stream: CudaStreamLike | None = None, ) -> Table: ... def calendrical_month_sequence( - n: int, init: Scalar, months: int, stream: Stream | None = None + n: int, init: Scalar, months: int, stream: CudaStreamLike | None = None ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx index 68e4862dfb8..ce6002eb24e 100644 --- a/python/pylibcudf/pylibcudf/filling.pyx +++ b/python/pylibcudf/pylibcudf/filling.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -22,6 +22,7 @@ from .column cimport Column from .scalar cimport Scalar from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -37,7 +38,7 @@ cpdef Column fill( size_type begin, size_type end, Scalar value, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -68,7 +69,8 @@ cpdef Column fill( cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -77,17 +79,17 @@ cpdef Column fill( begin, end, dereference(( value).c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef void fill_in_place( Column destination, size_type begin, size_type end, Scalar value, - Stream stream=None, + object stream=None, ): """Fill destination column in place from begin to end with value. @@ -112,7 +114,8 @@ cpdef void fill_in_place( None """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() cdef mutable_column_view c_destination = destination.mutable_view() with nogil: @@ -121,7 +124,7 @@ cpdef void fill_in_place( begin, end, dereference(value.c_obj), - stream.view() + _cs ) destination.set_null_count(c_destination.null_count()) @@ -129,7 +132,7 @@ cpdef Column sequence( size_type size, Scalar init, Scalar step, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a sequence column of size ``size`` with initial value ``init`` and step @@ -157,7 +160,8 @@ cpdef Column sequence( cdef unique_ptr[column] result cdef size_type c_size = size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -165,16 +169,16 @@ cpdef Column sequence( c_size, dereference(init.c_obj), dereference(step.c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Table repeat( Table input_table, ColumnOrSize count, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Repeat rows of a Table. @@ -203,7 +207,8 @@ cpdef Table repeat( cdef unique_ptr[table] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrSize is Column: @@ -211,7 +216,7 @@ cpdef Table repeat( result = cpp_repeat( input_table.view(), count.view(), - stream.view(), + _cs, mr.get_mr() ) if ColumnOrSize is size_type: @@ -219,17 +224,17 @@ cpdef Table repeat( result = cpp_repeat( input_table.view(), count, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef Column calendrical_month_sequence( size_type n, Scalar init, size_type months, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -256,7 +261,8 @@ cpdef Column calendrical_month_sequence( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -264,7 +270,7 @@ cpdef Column calendrical_month_sequence( n, dereference(init.c_obj), months, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/groupby.pxd b/python/pylibcudf/pylibcudf/groupby.pxd index b5654ff6df8..a46146a145a 100644 --- a/python/pylibcudf/pylibcudf/groupby.pxd +++ b/python/pylibcudf/pylibcudf/groupby.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -19,7 +19,6 @@ from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport null_order, order from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -46,31 +45,31 @@ cdef class GroupBy: cdef unique_ptr[vector[null_order]] _null_precedence cpdef tuple aggregate( - self, list requests, Stream stream=*, DeviceMemoryResource mr=* + self, list requests, object stream = *, DeviceMemoryResource mr=* ) - cpdef tuple scan(self, list requests, Stream stream=*, DeviceMemoryResource mr=*) + cpdef tuple scan(self, list requests, object stream = *, DeviceMemoryResource mr=*) cpdef tuple shift( self, Table values, list offset, list fill_values, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef tuple replace_nulls( self, Table values, list replace_policies, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef tuple get_groups( - self, Table values=*, Stream stream=*, DeviceMemoryResource mr=* + self, Table values=*, object stream = *, DeviceMemoryResource mr=* ) @staticmethod cdef tuple _parse_outputs( pair[unique_ptr[table], vector[aggregation_result]] c_res, - Stream stream, + object stream, DeviceMemoryResource mr, ) diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi index 75322706187..01c732175f4 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyi +++ b/python/pylibcudf/pylibcudf/groupby.pyi @@ -1,8 +1,7 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import Aggregation from pylibcudf.column import Column @@ -10,6 +9,7 @@ from pylibcudf.replace import ReplacePolicy from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import NullOrder, NullPolicy, Order, Sorted +from pylibcudf.utils import CudaStreamLike class GroupByRequest: def __init__( @@ -28,13 +28,13 @@ class GroupBy: def aggregate( self, requests: list[GroupByRequest], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[Table]]: ... def scan( self, requests: list[GroupByRequest], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[Table]]: ... def shift( @@ -42,19 +42,19 @@ class GroupBy: values: Table, offset: list[int], fill_values: list[Scalar], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, Table]: ... def replace_nulls( self, value: Table, replace_policies: list[ReplacePolicy], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, Table]: ... def get_groups( self, values: Table | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[list[int], Table, Table]: ... diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx index 94a292996a0..4b2f842a360 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyx +++ b/python/pylibcudf/pylibcudf/groupby.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -28,6 +28,7 @@ from .column cimport Column from .table cimport Table from .types cimport null_order, null_policy, order, sorted from .utils cimport _as_vector, _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["GroupBy", "GroupByRequest"] @@ -141,12 +142,13 @@ cdef class GroupBy: @staticmethod cdef tuple _parse_outputs( pair[unique_ptr[table], vector[aggregation_result]] c_res, - Stream stream, + object stream, DeviceMemoryResource mr, ): # Convert libcudf aggregation/scan outputs into pylibcudf objects. # This function is for internal use only. - cdef Table group_keys = Table.from_libcudf(move(c_res.first), stream, mr) + cdef Stream _stream = stream + cdef Table group_keys = Table.from_libcudf(move(c_res.first), _stream, mr) cdef int i, j cdef list results = [] @@ -155,13 +157,13 @@ cdef class GroupBy: inner_results = [] for j in range(c_res.second[i].results.size()): inner_results.append( - Column.from_libcudf(move(c_res.second[i].results[j]), stream, mr) + Column.from_libcudf(move(c_res.second[i].results[j]), _stream, mr) ) results.append(Table(inner_results)) return group_keys, results cpdef tuple aggregate( - self, list requests, Stream stream=None, DeviceMemoryResource mr=None + self, list requests, object stream=None, DeviceMemoryResource mr=None ): """Compute aggregations on columns. @@ -189,19 +191,20 @@ cdef class GroupBy: c_requests.push_back(move(request._to_libcudf_agg_request())) cdef pair[unique_ptr[table], vector[aggregation_result]] c_res - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # TODO: Need to capture C++ exceptions indicating that an invalid type was used. # We rely on libcudf to tell us this rather than checking the types beforehand # ourselves. with nogil: c_res = dereference(self.c_obj).aggregate( - c_requests, stream.view(), mr.get_mr() + c_requests, _cs, mr.get_mr() ) - return GroupBy._parse_outputs(move(c_res), stream, mr) + return GroupBy._parse_outputs(move(c_res), _stream, mr) cpdef tuple scan( - self, list requests, Stream stream=None, DeviceMemoryResource mr=None + self, list requests, object stream=None, DeviceMemoryResource mr=None ): """Compute scans on columns. @@ -229,18 +232,23 @@ cdef class GroupBy: c_requests.push_back(move(request._to_libcudf_scan_request())) cdef pair[unique_ptr[table], vector[aggregation_result]] c_res - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_res = dereference(self.c_obj).scan(c_requests, stream.view(), mr.get_mr()) - return GroupBy._parse_outputs(move(c_res), stream, mr) + c_res = dereference(self.c_obj).scan( + c_requests, + _cs, + mr.get_mr(), + ) + return GroupBy._parse_outputs(move(c_res), _stream, mr) cpdef tuple shift( self, Table values, list offset, list fill_values, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Compute shifts on columns. @@ -269,26 +277,27 @@ cdef class GroupBy: cdef vector[size_type] c_offset = offset cdef pair[unique_ptr[table], unique_ptr[table]] c_res - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_res = dereference(self.c_obj).shift( values.view(), c_offset, c_fill_values, - stream.view(), + _cs, mr.get_mr() ) return ( - Table.from_libcudf(move(c_res.first), stream, mr), - Table.from_libcudf(move(c_res.second), stream, mr), + Table.from_libcudf(move(c_res.first), _stream, mr), + Table.from_libcudf(move(c_res.second), _stream, mr), ) cpdef tuple replace_nulls( self, Table value, list replace_policies, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replace nulls in columns. @@ -312,22 +321,23 @@ cdef class GroupBy: """ cdef pair[unique_ptr[table], unique_ptr[table]] c_res cdef vector[replace_policy] c_replace_policies = replace_policies - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_res = dereference(self.c_obj).replace_nulls( value.view(), c_replace_policies, - stream.view(), + _cs, mr.get_mr() ) return ( - Table.from_libcudf(move(c_res.first), stream, mr), - Table.from_libcudf(move(c_res.second), stream, mr), + Table.from_libcudf(move(c_res.first), _stream, mr), + Table.from_libcudf(move(c_res.second), _stream, mr), ) cpdef tuple get_groups( - self, Table values=None, Stream stream=None, DeviceMemoryResource mr=None + self, Table values=None, object stream=None, DeviceMemoryResource mr=None ): """Get the grouped keys and values labels for each row. @@ -352,24 +362,24 @@ cdef class GroupBy: cdef groups c_groups cdef table_view empty_view - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) if values: c_groups = dereference(self.c_obj).get_groups( - values.view(), stream.view(), mr.get_mr() + values.view(), _stream.view().value(), mr.get_mr() ) return ( c_groups.offsets, - Table.from_libcudf(move(c_groups.keys), stream, mr), - Table.from_libcudf(move(c_groups.values), stream, mr), + Table.from_libcudf(move(c_groups.keys), _stream, mr), + Table.from_libcudf(move(c_groups.values), _stream, mr), ) else: # c_groups.values is nullptr - call get_groups with empty table view c_groups = dereference(self.c_obj).get_groups( - empty_view, stream.view(), mr.get_mr() + empty_view, _stream.view().value(), mr.get_mr() ) return ( c_groups.offsets, - Table.from_libcudf(move(c_groups.keys), stream, mr), + Table.from_libcudf(move(c_groups.keys), _stream, mr), None, ) diff --git a/python/pylibcudf/pylibcudf/hashing.pxd b/python/pylibcudf/pylibcudf/hashing.pxd index 4febd6e4949..b824f2dbcb8 100644 --- a/python/pylibcudf/pylibcudf/hashing.pxd +++ b/python/pylibcudf/pylibcudf/hashing.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -12,34 +11,34 @@ from .table cimport Table cpdef Column murmurhash3_x86_32( Table input, uint32_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Table murmurhash3_x64_128( Table input, uint64_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column xxhash_32( Table input, uint32_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column xxhash_64( Table input, uint64_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) -cpdef Column md5(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha1(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha224(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha256(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha384(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha512(Table input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column md5(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha1(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha224(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha256(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha384(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha512(Table input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi index 1b8d055368a..dae03796b9c 100644 --- a/python/pylibcudf/pylibcudf/hashing.pyi +++ b/python/pylibcudf/pylibcudf/hashing.pyi @@ -1,67 +1,67 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import Final from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike LIBCUDF_DEFAULT_HASH_SEED: Final[int] def murmurhash3_x86_32( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def murmurhash3_x64_128( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def xxhash_32( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def xxhash_64( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def md5( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha1( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha224( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha256( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha384( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha512( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx index d9db52720bf..941393cf949 100644 --- a/python/pylibcudf/pylibcudf/hashing.pyx +++ b/python/pylibcudf/pylibcudf/hashing.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t from libcpp.memory cimport unique_ptr @@ -24,6 +24,7 @@ from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "LIBCUDF_DEFAULT_HASH_SEED", @@ -44,7 +45,7 @@ LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED cpdef Column murmurhash3_x86_32( Table input, uint32_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the MurmurHash3 32-bit hash value of each row in the given table. @@ -65,24 +66,25 @@ cpdef Column murmurhash3_x86_32( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_murmurhash3_x86_32( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table murmurhash3_x64_128( Table input, uint64_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the MurmurHash3 64-bit hash value of each row in the given table. @@ -103,24 +105,25 @@ cpdef Table murmurhash3_x64_128( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_murmurhash3_x64_128( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column xxhash_32( Table input, uint32_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the xxHash 32-bit hash value of each row in the given table. @@ -142,24 +145,25 @@ cpdef Column xxhash_32( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_xxhash_32( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column xxhash_64( Table input, uint64_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the xxHash 64-bit hash value of each row in the given table. @@ -181,23 +185,24 @@ cpdef Column xxhash_64( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_xxhash_64( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column md5( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the MD5 hash value of each row in the given table. @@ -220,16 +225,17 @@ cpdef Column md5( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_md5(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_md5(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha1( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-1 hash value of each row in the given table. @@ -250,17 +256,18 @@ cpdef Column sha1( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha1(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha1(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha224( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-224 hash value of each row in the given table. @@ -281,17 +288,18 @@ cpdef Column sha224( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha224(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha224(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha256( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-256 hash value of each row in the given table. @@ -312,17 +320,18 @@ cpdef Column sha256( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha256(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha256(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha384( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-384 hash value of each row in the given table. @@ -343,17 +352,18 @@ cpdef Column sha384( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha384(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha384(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha512( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-512 hash value of each row in the given table. @@ -374,9 +384,10 @@ cpdef Column sha512( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha512(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha512(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/interop.pxd b/python/pylibcudf/pylibcudf/interop.pxd index dfa62233541..942b9e806bc 100644 --- a/python/pylibcudf/pylibcudf/interop.pxd +++ b/python/pylibcudf/pylibcudf/interop.pxd @@ -1,12 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cpdef Table from_dlpack( - object managed_tensor, Stream stream=*, DeviceMemoryResource mr=* + object managed_tensor, object stream = *, DeviceMemoryResource mr=* ) -cpdef object to_dlpack(Table input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef object to_dlpack(Table input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi index 0c10d71ec4f..34fe9394f7d 100644 --- a/python/pylibcudf/pylibcudf/interop.pyi +++ b/python/pylibcudf/pylibcudf/interop.pyi @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from collections.abc import Iterable, Mapping @@ -8,12 +8,12 @@ from typing import Any, overload import pyarrow as pa from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike @dataclass class ColumnMetadata: @@ -33,14 +33,14 @@ def from_arrow( obj: pa.Array[Any], *, data_type: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @overload def from_arrow( obj: pa.Table, *, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... @overload @@ -67,11 +67,11 @@ def to_arrow( ) -> pa.Scalar[Any]: ... def from_dlpack( managed_tensor: Any, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def to_dlpack( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Any: ... diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx index ffc14415470..23c47bb090f 100644 --- a/python/pylibcudf/pylibcudf/interop.pyx +++ b/python/pylibcudf/pylibcudf/interop.pyx @@ -23,6 +23,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .table cimport Table from .utils cimport _get_stream, _get_memory_resource from ._interop_helpers import ColumnMetadata +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -35,7 +36,7 @@ __all__ = [ cpdef Table from_dlpack( - object managed_tensor, Stream stream=None, DeviceMemoryResource mr=None + object managed_tensor, object stream=None, DeviceMemoryResource mr=None ): """ Convert a DLPack DLTensor into a cudf table. @@ -65,7 +66,8 @@ cpdef Table from_dlpack( if dlpack_tensor is NULL: raise ValueError("PyCapsule object contained a NULL pointer") PyCapsule_SetName(managed_tensor, "used_dltensor") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # Note: A copy is always performed when converting the dlpack @@ -74,14 +76,14 @@ cpdef Table from_dlpack( # TODO: https://github.com/rapidsai/cudf/issues/10874 # TODO: https://github.com/rapidsai/cudf/issues/10849 with nogil: - c_result = cpp_from_dlpack(dlpack_tensor, stream.view(), mr.get_mr()) + c_result = cpp_from_dlpack(dlpack_tensor, _cs, mr.get_mr()) - cdef Table result = Table.from_libcudf(move(c_result), stream, mr) + cdef Table result = Table.from_libcudf(move(c_result), _stream, mr) dlpack_tensor.deleter(dlpack_tensor) return result -cpdef object to_dlpack(Table input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef object to_dlpack(Table input, object stream=None, DeviceMemoryResource mr=None): """ Convert a cudf table into a DLPack DLTensor. @@ -109,11 +111,12 @@ cpdef object to_dlpack(Table input, Stream stream=None, DeviceMemoryResource mr= "Input is required to have null count as zero." ) cdef DLManagedTensor *dlpack_tensor - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - dlpack_tensor = cpp_to_dlpack(input.view(), stream.view(), mr.get_mr()) + dlpack_tensor = cpp_to_dlpack(input.view(), _cs, mr.get_mr()) return PyCapsule_New( dlpack_tensor, diff --git a/python/pylibcudf/pylibcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/io/avro.pxd index d76f2c1e628..0e8cb7ee283 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pxd +++ b/python/pylibcudf/pylibcudf/io/avro.pxd @@ -1,6 +1,5 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, TableWithMetadata @@ -29,5 +28,5 @@ cdef class AvroReaderOptionsBuilder: cpdef AvroReaderOptions build(self) cpdef TableWithMetadata read_avro( - AvroReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + AvroReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/io/avro.pyi b/python/pylibcudf/pylibcudf/io/avro.pyi index d7b6c87d388..7e41c39a2be 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pyi +++ b/python/pylibcudf/pylibcudf/io/avro.pyi @@ -1,9 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.io.types import SourceInfo, TableWithMetadata +from pylibcudf.utils import CudaStreamLike __all__ = ["AvroReaderOptions", "AvroReaderOptionsBuilder", "read_avro"] @@ -21,6 +21,6 @@ class AvroReaderOptionsBuilder: def read_avro( options: AvroReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx index 9c5e2c05b11..f2bd021cdde 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pyx +++ b/python/pylibcudf/pylibcudf/io/avro.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string @@ -6,6 +6,7 @@ from libcpp.utility cimport move from libcpp.vector cimport vector from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, TableWithMetadata @@ -152,7 +153,7 @@ cdef class AvroReaderOptionsBuilder: cpdef TableWithMetadata read_avro( AvroReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr=None, ): """ @@ -173,8 +174,9 @@ cpdef TableWithMetadata read_avro( Device memory resource used to allocate the returned table's device memory. """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_avro(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_avro(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) diff --git a/python/pylibcudf/pylibcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/io/csv.pxd index 2f138e3aaa1..4293452311d 100644 --- a/python/pylibcudf/pylibcudf/io/csv.pxd +++ b/python/pylibcudf/pylibcudf/io/csv.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.string cimport string from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SinkInfo, SourceInfo, TableWithMetadata @@ -74,7 +73,7 @@ cdef class CsvReaderOptionsBuilder: cpdef CsvReaderOptions build(self) cpdef TableWithMetadata read_csv( - CsvReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + CsvReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) cdef class CsvWriterOptions: @@ -98,6 +97,6 @@ cdef class CsvWriterOptionsBuilder: cpdef CsvWriterOptions build(self) -cpdef void write_csv(CsvWriterOptions options, Stream stream = *) +cpdef void write_csv(CsvWriterOptions options, object stream = *) cpdef bool is_supported_write_csv(DataType type) diff --git a/python/pylibcudf/pylibcudf/io/csv.pyi b/python/pylibcudf/pylibcudf/io/csv.pyi index ade964da509..41465b3ba43 100644 --- a/python/pylibcudf/pylibcudf/io/csv.pyi +++ b/python/pylibcudf/pylibcudf/io/csv.pyi @@ -4,7 +4,6 @@ from typing import Self from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.io.types import ( CompressionType, @@ -15,6 +14,7 @@ from pylibcudf.io.types import ( ) from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class CsvReaderOptions: def __init__(self): ... @@ -61,10 +61,12 @@ class CsvReaderOptionsBuilder: def read_csv( options: CsvReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... -def write_csv(options: CsvWriterOptions, stream: Stream | None = None): ... +def write_csv( + options: CsvWriterOptions, stream: CudaStreamLike | None = None +): ... class CsvWriterOptions: def __init__(self): ... diff --git a/python/pylibcudf/pylibcudf/io/csv.pyx b/python/pylibcudf/pylibcudf/io/csv.pyx index 749cd45fcb5..1c3ae9cb0bf 100644 --- a/python/pylibcudf/pylibcudf/io/csv.pyx +++ b/python/pylibcudf/pylibcudf/io/csv.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -8,6 +8,7 @@ from libcpp.utility cimport move from libcpp.vector cimport vector from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, SinkInfo, TableWithMetadata @@ -672,7 +673,7 @@ cdef class CsvReaderOptionsBuilder: cpdef TableWithMetadata read_csv( CsvReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr=None, ): """ @@ -694,9 +695,10 @@ cpdef TableWithMetadata read_csv( """ cdef table_with_metadata c_result cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_csv(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_csv(options.c_obj, _cs, mr.get_mr())) cdef TableWithMetadata tbl_meta = TableWithMetadata.from_libcudf(c_result, s, mr) return tbl_meta @@ -882,7 +884,7 @@ cdef class CsvWriterOptionsBuilder: cpdef void write_csv( CsvWriterOptions options, - Stream stream = None, + object stream = None, ): """ Write to CSV format. @@ -900,8 +902,9 @@ cpdef void write_csv( CUDA stream used for device memory operations and kernel launches """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() with nogil: - cpp_write_csv(move(options.c_obj), s.view()) + cpp_write_csv(move(options.c_obj), _cs) cpdef bool is_supported_write_csv(DataType type): diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd index 298b36651c3..8c471831823 100644 --- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd +++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd @@ -32,5 +32,5 @@ cdef class FileMetaData: cdef class HybridScanReader: cdef unique_ptr[cpp_hybrid_scan_reader] c_obj - cdef Stream stream + cdef Stream _stream cdef DeviceMemoryResource mr diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi index 0f0429a66db..6f1fbc250d8 100644 --- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi +++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi @@ -4,13 +4,13 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.io.parquet import ParquetReaderOptions from pylibcudf.io.text import ByteRangeInfo from pylibcudf.io.types import TableWithMetadata from pylibcudf.span import Span +from pylibcudf.utils import CudaStreamLike class UseDataPageMask(IntEnum): YES: int @@ -44,7 +44,7 @@ class HybridScanReader: self, row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> list[int]: ... def secondary_filters_byte_ranges( self, row_group_indices: list[int], options: ParquetReaderOptions @@ -54,20 +54,20 @@ class HybridScanReader: dictionary_page_data: list[Span], row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> list[int]: ... def filter_row_groups_with_bloom_filters( self, bloom_filter_data: list[Span], row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> list[int]: ... def build_row_mask_with_page_index_stats( self, row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_column_chunks_byte_ranges( @@ -80,7 +80,7 @@ class HybridScanReader: row_mask: Column, mask_data_pages: UseDataPageMask, options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def payload_column_chunks_byte_ranges( @@ -93,7 +93,7 @@ class HybridScanReader: row_mask: Column, mask_data_pages: UseDataPageMask, options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def all_column_chunks_byte_ranges( @@ -104,7 +104,7 @@ class HybridScanReader: row_group_indices: list[int], column_chunk_data: list[Span], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def setup_chunking_for_filter_columns( @@ -116,7 +116,7 @@ class HybridScanReader: mask_data_pages: UseDataPageMask, column_chunk_data: list[Span], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> None: ... def materialize_filter_columns_chunk( @@ -132,7 +132,7 @@ class HybridScanReader: mask_data_pages: UseDataPageMask, column_chunk_data: list[Span], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> None: ... def materialize_payload_columns_chunk( diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx index beb28f6a1b0..4d25a05d362 100644 --- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx +++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx @@ -225,7 +225,7 @@ cdef class HybridScanReader: self, list row_group_indices, ParquetReaderOptions options, - Stream stream=None + object stream=None ): """Filter row groups using column chunk statistics. @@ -243,7 +243,7 @@ cdef class HybridScanReader: list[int] Filtered row group indices """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) cdef vector[size_type] indices_vec = row_group_indices cdef vector[size_type] filtered = ( self.c_obj.get()[0].filter_row_groups_with_stats( @@ -251,7 +251,7 @@ cdef class HybridScanReader: indices_vec.data(), indices_vec.size() ), options.c_obj, - stream.view() + _stream.view().value() ) ) return list(filtered) @@ -295,7 +295,7 @@ cdef class HybridScanReader: list dictionary_page_data, list row_group_indices, ParquetReaderOptions options, - Stream stream=None + object stream=None ): """Filter row groups using column chunk dictionary pages. @@ -316,7 +316,7 @@ cdef class HybridScanReader: Filtered row group indices """ cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) for span in dictionary_page_data: spans_vec.push_back(_get_device_span(span)) @@ -329,7 +329,7 @@ cdef class HybridScanReader: ), host_span[const_size_type](indices_vec.data(), indices_vec.size()), options.c_obj, - stream.view() + _stream.view().value() ) return list(filtered) @@ -338,7 +338,7 @@ cdef class HybridScanReader: list bloom_filter_data, list row_group_indices, ParquetReaderOptions options, - Stream stream=None + object stream=None ): """Filter row groups using column chunk bloom filters. @@ -359,7 +359,7 @@ cdef class HybridScanReader: Filtered row group indices """ cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) for span in bloom_filter_data: spans_vec.push_back(_get_device_span(span)) @@ -372,7 +372,7 @@ cdef class HybridScanReader: ), host_span[const_size_type](indices_vec.data(), indices_vec.size()), options.c_obj, - stream.view() + _stream.view().value() ) return list(filtered) @@ -380,7 +380,7 @@ cdef class HybridScanReader: self, list row_group_indices, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Build a boolean column indicating surviving rows from page stats. @@ -402,16 +402,16 @@ cdef class HybridScanReader: Boolean column indicating surviving rows """ cdef vector[size_type] indices_vec = row_group_indices - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) cdef unique_ptr[column] c_result = \ self.c_obj.get()[0].build_row_mask_with_page_index_stats( host_span[const_size_type](indices_vec.data(), indices_vec.size()), options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) def filter_column_chunks_byte_ranges( self, @@ -447,7 +447,7 @@ cdef class HybridScanReader: Column row_mask, cpp_use_data_page_mask mask_data_pages, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Materialize filter columns and update the row mask. @@ -477,7 +477,7 @@ cdef class HybridScanReader: cdef vector[size_type] indices_vec = row_group_indices cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) @@ -492,10 +492,10 @@ cdef class HybridScanReader: mask_view, mask_data_pages, options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) def payload_column_chunks_byte_ranges( self, @@ -531,7 +531,7 @@ cdef class HybridScanReader: Column row_mask, cpp_use_data_page_mask mask_data_pages, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Materialize payload columns and apply the row mask. @@ -561,7 +561,7 @@ cdef class HybridScanReader: cdef vector[size_type] indices_vec = row_group_indices cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) @@ -576,10 +576,10 @@ cdef class HybridScanReader: mask_view, mask_data_pages, options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) def all_column_chunks_byte_ranges( self, @@ -613,7 +613,7 @@ cdef class HybridScanReader: list row_group_indices, list column_chunk_data, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Materialize all columns. @@ -639,7 +639,7 @@ cdef class HybridScanReader: cdef vector[size_type] indices_vec = row_group_indices cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) @@ -650,10 +650,10 @@ cdef class HybridScanReader: spans_vec.data(), spans_vec.size() ), options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) def setup_chunking_for_filter_columns( self, @@ -664,7 +664,7 @@ cdef class HybridScanReader: cpp_use_data_page_mask mask_data_pages, list column_chunk_data, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Setup chunking information for filter columns. @@ -696,7 +696,7 @@ cdef class HybridScanReader: for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) - self.stream = _get_stream(stream) + self._stream = _get_stream(stream) self.mr = _get_memory_resource(mr) cdef column_view mask_view = row_mask.view() @@ -710,7 +710,7 @@ cdef class HybridScanReader: spans_vec.data(), spans_vec.size() ), options.c_obj, - self.stream.view(), + self._stream.view().value(), self.mr.get_mr() ) @@ -735,7 +735,7 @@ cdef class HybridScanReader: mask_view ) return TableWithMetadata.from_libcudf( - c_result, self.stream, self.mr + c_result, self._stream, self.mr ) def setup_chunking_for_payload_columns( @@ -747,7 +747,7 @@ cdef class HybridScanReader: cpp_use_data_page_mask mask_data_pages, list column_chunk_data, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Setup chunking information for payload columns. @@ -779,7 +779,7 @@ cdef class HybridScanReader: for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) - self.stream = _get_stream(stream) + self._stream = _get_stream(stream) self.mr = _get_memory_resource(mr) cdef column_view mask_view = row_mask.view() @@ -793,7 +793,7 @@ cdef class HybridScanReader: spans_vec.data(), spans_vec.size() ), options.c_obj, - self.stream.view(), + self._stream.view().value(), self.mr.get_mr() ) @@ -818,7 +818,7 @@ cdef class HybridScanReader: mask_view ) return TableWithMetadata.from_libcudf( - c_result, self.stream, self.mr + c_result, self._stream, self.mr ) def construct_row_group_passes( diff --git a/python/pylibcudf/pylibcudf/io/json.pxd b/python/pylibcudf/pylibcudf/io/json.pxd index 96bc102ef0b..e46942ea14b 100644 --- a/python/pylibcudf/pylibcudf/io/json.pxd +++ b/python/pylibcudf/pylibcudf/io/json.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.map cimport map from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport ( @@ -83,7 +82,7 @@ cdef class JsonReaderOptionsBuilder: cpdef build(self) cpdef TableWithMetadata read_json( - JsonReaderOptions options, Stream stream = *, DeviceMemoryResource mr = * + JsonReaderOptions options, object stream = *, DeviceMemoryResource mr = * ) cpdef TableWithMetadata read_json_from_string_column( @@ -93,7 +92,7 @@ cpdef TableWithMetadata read_json_from_string_column( list dtypes = *, compression_type compression = *, json_recovery_mode_t recovery_mode = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *) cdef class JsonWriterOptions: @@ -117,13 +116,13 @@ cdef class JsonWriterOptionsBuilder: cpdef JsonWriterOptionsBuilder utf8_escaped(self, bool val) cpdef JsonWriterOptions build(self) -cpdef void write_json(JsonWriterOptions options, Stream stream = *) +cpdef void write_json(JsonWriterOptions options, object stream = *) cpdef bool is_supported_write_json(DataType type) cpdef tuple chunked_read_json( JsonReaderOptions options, int chunk_size= *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi index f19da874a0d..a03d8ef407c 100644 --- a/python/pylibcudf/pylibcudf/io/json.pyi +++ b/python/pylibcudf/pylibcudf/io/json.pyi @@ -4,7 +4,6 @@ from collections.abc import Mapping from typing import Self, TypeAlias from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.io.types import ( @@ -17,6 +16,7 @@ from pylibcudf.io.types import ( from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike ChildNameToTypeMap: TypeAlias = Mapping[str, ChildNameToTypeMap] @@ -73,7 +73,7 @@ class JsonReaderOptionsBuilder: def read_json( options: JsonReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def read_json_from_string_column( @@ -83,7 +83,7 @@ def read_json_from_string_column( dtypes: list | None = None, compression: CompressionType = CompressionType.NONE, recovery_mode: JSONRecoveryMode = JSONRecoveryMode.RECOVER_WITH_NULL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... @@ -105,12 +105,12 @@ class JsonWriterOptionsBuilder: def build(self) -> JsonWriterOptions: ... def write_json( - options: JsonWriterOptions, stream: Stream | None = None + options: JsonWriterOptions, stream: CudaStreamLike | None = None ) -> None: ... def chunked_read_json( options: JsonReaderOptions, chunk_size: int = 100_000_000, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[list[Column], list[str], ChildNameToTypeMap]: ... def is_supported_write_json(type: DataType) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx index aa66c6fe5c2..1bce364fdd8 100644 --- a/python/pylibcudf/pylibcudf/io/json.pyx +++ b/python/pylibcudf/pylibcudf/io/json.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.map cimport map @@ -49,6 +49,7 @@ from pylibcudf.utils cimport _get_stream from cython.operator import dereference from rmm.pylibrmm.device_buffer cimport DeviceBuffer +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "chunked_read_json", @@ -704,7 +705,7 @@ cdef class JsonReaderOptionsBuilder: cpdef tuple chunked_read_json( JsonReaderOptions options, int chunk_size=100_000_000, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None, ): """ @@ -735,6 +736,7 @@ cpdef tuple chunked_read_json( child_names = None i = 0 cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) while True: options.enable_lines(True) @@ -743,7 +745,7 @@ cpdef tuple chunked_read_json( try: with nogil: - c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr())) except (ValueError, OverflowError): break if meta_names is None: @@ -772,7 +774,7 @@ cpdef tuple chunked_read_json( cpdef TableWithMetadata read_json( JsonReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None ): """ @@ -797,9 +799,10 @@ cpdef TableWithMetadata read_json( """ cdef table_with_metadata c_result cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) @@ -810,7 +813,7 @@ cpdef TableWithMetadata read_json_from_string_column( list dtypes = None, compression_type compression = compression_type.NONE, json_recovery_mode_t recovery_mode = json_recovery_mode_t.RECOVER_WITH_NULL, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None ): """ @@ -852,7 +855,8 @@ cpdef TableWithMetadata read_json_from_string_column( cdef unique_ptr[column] c_join_string_column cdef column_contents c_contents cdef table_with_metadata c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # Join the string column into a single string @@ -862,7 +866,7 @@ cpdef TableWithMetadata read_json_from_string_column( input.view(), dereference(c_separator), dereference(c_narep), - stream.view(), + _cs, mr.get_mr() ) ) @@ -870,7 +874,7 @@ cpdef TableWithMetadata read_json_from_string_column( # Create a new source from the joined string data cdef SourceInfo joined_source = SourceInfo( - [DeviceBuffer.c_from_unique_ptr(move(c_contents.data), stream, mr)]) + [DeviceBuffer.c_from_unique_ptr(move(c_contents.data), _stream, mr)]) # Create new options using the joined string as source cdef JsonReaderOptions options = ( @@ -886,9 +890,9 @@ cpdef TableWithMetadata read_json_from_string_column( # Read JSON from the joined string with nogil: - c_result = move(cpp_read_json(options.c_obj, stream.view(), mr.get_mr())) + c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr())) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) cdef class JsonWriterOptions: """ @@ -1090,7 +1094,7 @@ cdef class JsonWriterOptionsBuilder: return json_options -cpdef void write_json(JsonWriterOptions options, Stream stream = None): +cpdef void write_json(JsonWriterOptions options, object stream = None): """ Writes a set of columns to JSON format. @@ -1106,8 +1110,9 @@ cpdef void write_json(JsonWriterOptions options, Stream stream = None): None """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() with nogil: - cpp_write_json(options.c_obj, s.view()) + cpp_write_json(options.c_obj, _cs) cpdef bool is_supported_write_json(DataType type): """Check if the dtype is supported for JSON writing diff --git a/python/pylibcudf/pylibcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/io/orc.pxd index 24221163917..72ad5aac534 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pxd +++ b/python/pylibcudf/pylibcudf/io/orc.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint64_t, int64_t @@ -9,7 +9,6 @@ from libcpp.optional cimport optional from libcpp.string cimport string from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport ( @@ -65,7 +64,7 @@ cdef class OrcReaderOptionsBuilder: cpdef OrcReaderOptions build(self) cpdef TableWithMetadata read_orc( - OrcReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + OrcReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) cdef class OrcColumnStatistics: @@ -89,7 +88,7 @@ cdef class ParsedOrcStatistics: cpdef ParsedOrcStatistics read_parsed_orc_statistics( SourceInfo source_info, - Stream stream=* + object stream = * ) cdef class OrcWriterOptions: @@ -110,7 +109,7 @@ cdef class OrcWriterOptionsBuilder: cpdef OrcWriterOptionsBuilder metadata(self, TableInputMetadata meta) cpdef OrcWriterOptions build(self) -cpdef void write_orc(OrcWriterOptions options, Stream stream = *) +cpdef void write_orc(OrcWriterOptions options, object stream = *) cdef class OrcChunkedWriter: cdef unique_ptr[orc_chunked_writer] c_obj diff --git a/python/pylibcudf/pylibcudf/io/orc.pyi b/python/pylibcudf/pylibcudf/io/orc.pyi index dcf2b731bac..3cb6daff240 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pyi +++ b/python/pylibcudf/pylibcudf/io/orc.pyi @@ -4,7 +4,6 @@ from typing import Any, Self from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.io.types import ( CompressionType, @@ -16,6 +15,7 @@ from pylibcudf.io.types import ( ) from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class OrcReaderOptions: def set_num_rows(self, nrows: int) -> None: ... @@ -34,7 +34,7 @@ class OrcReaderOptionsBuilder: def read_orc( options: OrcReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... @@ -59,7 +59,7 @@ class ParsedOrcStatistics: def read_parsed_orc_statistics( source_info: SourceInfo, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> ParsedOrcStatistics: ... class OrcWriterOptions: @@ -79,7 +79,7 @@ class OrcWriterOptionsBuilder: def build(self) -> OrcWriterOptions: ... def write_orc( - options: OrcWriterOptions, stream: Stream | None = None + options: OrcWriterOptions, stream: CudaStreamLike | None = None ) -> None: ... def is_supported_read_orc(compression: CompressionType) -> bool: ... def is_supported_write_orc(compression: CompressionType) -> bool: ... @@ -90,7 +90,7 @@ class OrcChunkedWriter: def write(self, table: Table) -> None: ... @staticmethod def from_options( - options: ChunkedOrcWriterOptions, stream: Stream | None = None + options: ChunkedOrcWriterOptions, stream: CudaStreamLike | None = None ) -> OrcChunkedWriter: ... class ChunkedOrcWriterOptions: diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx index 8c3687ec232..3a2fabc5683 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pyx +++ b/python/pylibcudf/pylibcudf/io/orc.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.string cimport string @@ -8,6 +8,7 @@ from libcpp.vector cimport vector import datetime from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, TableWithMetadata, SinkInfo @@ -444,7 +445,7 @@ cdef class OrcReaderOptionsBuilder: cpdef TableWithMetadata read_orc( - OrcReaderOptions options, Stream stream = None, DeviceMemoryResource mr=None + OrcReaderOptions options, object stream = None, DeviceMemoryResource mr=None ): """ Read from ORC format. @@ -465,17 +466,17 @@ cpdef TableWithMetadata read_orc( """ cdef table_with_metadata c_result cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) - with nogil: - c_result = move(cpp_read_orc(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_orc(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) cpdef ParsedOrcStatistics read_parsed_orc_statistics( SourceInfo source_info, - Stream stream=None + object stream=None ): """ Read ORC statistics from a source. @@ -494,8 +495,9 @@ cpdef ParsedOrcStatistics read_parsed_orc_statistics( """ cdef Stream s = _get_stream(stream) cdef parsed_orc_statistics parsed + cdef cudaStream_t _cs = s.view().value() with nogil: - parsed = cpp_read_parsed_orc_statistics(source_info.c_obj, s.view()) + parsed = cpp_read_parsed_orc_statistics(source_info.c_obj, _cs) return ParsedOrcStatistics.from_libcudf(parsed) @@ -667,7 +669,7 @@ cdef class OrcWriterOptionsBuilder: return orc_options -cpdef void write_orc(OrcWriterOptions options, Stream stream = None): +cpdef void write_orc(OrcWriterOptions options, object stream = None): """ Write to ORC format. @@ -688,8 +690,9 @@ cpdef void write_orc(OrcWriterOptions options, Stream stream = None): None """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() with nogil: - cpp_write_orc(move(options.c_obj), s.view()) + cpp_write_orc(move(options.c_obj), _cs) cdef class OrcChunkedWriter: @@ -721,7 +724,7 @@ cdef class OrcChunkedWriter: self.c_obj.get()[0].write(table.view()) @staticmethod - def from_options(ChunkedOrcWriterOptions options, Stream stream = None): + def from_options(ChunkedOrcWriterOptions options, object stream = None): """ Creates a chunked ORC writer from options @@ -740,7 +743,8 @@ cdef class OrcChunkedWriter: OrcChunkedWriter ) cdef Stream s = _get_stream(stream) - orc_writer.c_obj.reset(new orc_chunked_writer(options.c_obj, s.view())) + cdef cudaStream_t _cs = s.view().value() + orc_writer.c_obj.reset(new orc_chunked_writer(options.c_obj, _cs)) return orc_writer diff --git a/python/pylibcudf/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd index d9350f77721..c98a90dd692 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pxd +++ b/python/pylibcudf/pylibcudf/io/parquet.pxd @@ -6,8 +6,8 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource +from rmm.pylibrmm.stream cimport Stream from pylibcudf.expressions cimport Expression @@ -74,7 +74,7 @@ cdef class ParquetReaderOptionsBuilder: cdef class ChunkedParquetReader: - cdef readonly Stream stream + cdef Stream _stream cdef DeviceMemoryResource mr cdef unique_ptr[cpp_chunked_parquet_reader] reader @@ -83,7 +83,7 @@ cdef class ChunkedParquetReader: cpdef read_parquet( - ParquetReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + ParquetReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) @@ -180,7 +180,7 @@ cdef class ParquetWriterOptionsBuilder: cpdef ParquetWriterOptions build(self) -cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = *) +cpdef memoryview write_parquet(ParquetWriterOptions options, object stream = *) cpdef bool is_supported_read_parquet(compression_type compression) diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyi b/python/pylibcudf/pylibcudf/io/parquet.pyi index c0c31e22007..f0a092f63e0 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyi +++ b/python/pylibcudf/pylibcudf/io/parquet.pyi @@ -5,7 +5,6 @@ from collections.abc import Mapping, Sequence from typing import Self from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.expressions import Expression from pylibcudf.io.types import ( @@ -20,6 +19,7 @@ from pylibcudf.io.types import ( ) from pylibcudf.table import Table from pylibcudf.types import TypeId +from pylibcudf.utils import CudaStreamLike class ParquetReaderOptions: def __init__(self): ... @@ -53,7 +53,7 @@ class ChunkedParquetReader: def __init__( self, options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, chunk_read_limit: int = 0, pass_read_limit: int = 1024000000, ) -> None: ... @@ -62,7 +62,7 @@ class ChunkedParquetReader: def read_parquet( options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... @@ -101,7 +101,7 @@ class ParquetWriterOptionsBuilder: def build(self) -> ParquetWriterOptions: ... def write_parquet( - options: ParquetWriterOptions, stream: Stream | None = None + options: ParquetWriterOptions, stream: CudaStreamLike | None = None ) -> memoryview: ... def is_supported_read_parquet(compression: CompressionType) -> bool: ... def is_supported_write_parquet(compression: CompressionType) -> bool: ... @@ -112,7 +112,8 @@ class ChunkedParquetWriter: def write(self, table: Table, partitions_info: object = None) -> None: ... @staticmethod def from_options( - options: ChunkedParquetWriterOptions, stream: Stream | None = None + options: ChunkedParquetWriterOptions, + stream: CudaStreamLike | None = None, ) -> ChunkedParquetWriter: ... class ChunkedParquetWriterOptions: diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx index c4bad082304..86904513cfa 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyx +++ b/python/pylibcudf/pylibcudf/io/parquet.pyx @@ -46,6 +46,7 @@ from pylibcudf.libcudf.io.types cimport ( from pylibcudf.libcudf.types cimport size_type, type_id from pylibcudf.table cimport Table from pylibcudf.utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ChunkedParquetReader", @@ -507,20 +508,21 @@ cdef class ChunkedParquetReader: def __init__( self, ParquetReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None, size_t chunk_read_limit=0, size_t pass_read_limit=1024000000, ): - self.stream = _get_stream(stream) + self._stream = _get_stream(stream) self.mr = _get_memory_resource(mr) + cdef cudaStream_t stream_view = self._stream.view().value() with nogil: self.reader.reset( new cpp_chunked_parquet_reader( chunk_read_limit, pass_read_limit, options.c_obj, - self.stream.view(), + stream_view, self.mr.get_mr() ) ) @@ -560,11 +562,11 @@ cdef class ChunkedParquetReader: with nogil: c_result = move(self.reader.get()[0].read_chunk()) - return TableWithMetadata.from_libcudf(c_result, self.stream, mr) + return TableWithMetadata.from_libcudf(c_result, self._stream, mr) cpdef read_parquet( - ParquetReaderOptions options, Stream stream = None, DeviceMemoryResource mr=None + ParquetReaderOptions options, object stream = None, DeviceMemoryResource mr=None ): """ Read from Parquet format. @@ -584,9 +586,10 @@ cpdef read_parquet( Device memory resource used to allocate the returned table's device memory. """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_parquet(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_parquet(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) @@ -640,7 +643,7 @@ cdef class ChunkedParquetWriter: self.c_obj.get()[0].write(table.view(), partitions) @staticmethod - def from_options(ChunkedParquetWriterOptions options, Stream stream = None): + def from_options(ChunkedParquetWriterOptions options, object stream = None): """ Creates a chunked Parquet writer from options @@ -659,8 +662,9 @@ cdef class ChunkedParquetWriter: ChunkedParquetWriter ) cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() parquet_writer.c_obj.reset( - new cpp_chunked_parquet_writer(options.c_obj, s.view()) + new cpp_chunked_parquet_writer(options.c_obj, _cs) ) return parquet_writer @@ -1235,7 +1239,7 @@ cdef class ParquetWriterOptionsBuilder: return parquet_options -cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = None): +cpdef memoryview write_parquet(ParquetWriterOptions options, object stream = None): """ Writes a set of columns to parquet format. @@ -1255,9 +1259,9 @@ cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = Non """ cdef unique_ptr[vector[uint8_t]] c_result cdef Stream s = _get_stream(stream) - + cdef cudaStream_t _cs = s.view().value() with nogil: - c_result = cpp_write_parquet(move(options.c_obj), s.view()) + c_result = cpp_write_parquet(move(options.c_obj), _cs) return memoryview(HostBuffer.from_unique_ptr(move(c_result))) diff --git a/python/pylibcudf/pylibcudf/io/text.pxd b/python/pylibcudf/pylibcudf/io/text.pxd index 7623c8da26b..5276f9ffaba 100644 --- a/python/pylibcudf/pylibcudf/io/text.pxd +++ b/python/pylibcudf/pylibcudf/io/text.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.string cimport string from pylibcudf.column cimport Column -from pylibcudf.io.types cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.libcudf.io.text cimport parse_options, data_chunk_source, byte_range_info @@ -23,7 +22,7 @@ cpdef Column multibyte_split( DataChunkSource source, str delimiter, ParseOptions options=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/io/text.pyi b/python/pylibcudf/pylibcudf/io/text.pyi index 66406c94dd2..581e45c3194 100644 --- a/python/pylibcudf/pylibcudf/io/text.pyi +++ b/python/pylibcudf/pylibcudf/io/text.pyi @@ -1,10 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class ByteRangeInfo: def __init__(self, offset: int, size: int) -> None: ... @@ -35,6 +35,6 @@ def multibyte_split( source: DataChunkSource, delimiter: str, options: ParseOptions | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/io/text.pyx b/python/pylibcudf/pylibcudf/io/text.pyx index 9fb220b0a37..be15701a4d8 100644 --- a/python/pylibcudf/pylibcudf/io/text.pyx +++ b/python/pylibcudf/pylibcudf/io/text.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -9,10 +9,11 @@ from libcpp.utility cimport move from pylibcudf.column cimport Column from pylibcudf.utils cimport _get_stream, _get_memory_resource -from pylibcudf.io.types cimport Stream +from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.io cimport text as cpp_text +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ByteRangeInfo", @@ -193,7 +194,7 @@ cpdef Column multibyte_split( DataChunkSource source, str delimiter, ParseOptions options=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -224,7 +225,8 @@ cpdef Column multibyte_split( cdef unique_ptr[column] c_result cdef unique_ptr[data_chunk_source] c_source = move(source.c_source) cdef string c_delimiter = delimiter.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if options is None: @@ -237,8 +239,8 @@ cpdef Column multibyte_split( dereference(c_source), c_delimiter, c_options, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/io/timezone.pxd index a2fa33d102d..9a12be928b2 100644 --- a/python/pylibcudf/pylibcudf/io/timezone.pxd +++ b/python/pylibcudf/pylibcudf/io/timezone.pxd @@ -1,11 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from ..table cimport Table -from .types cimport Stream + from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cpdef Table make_timezone_transition_table( - str tzif_dir, str timezone_name, Stream stream=*, DeviceMemoryResource mr=* + str tzif_dir, str timezone_name, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyi b/python/pylibcudf/pylibcudf/io/timezone.pyi index d83f68424b4..f87dda70f70 100644 --- a/python/pylibcudf/pylibcudf/io/timezone.pyi +++ b/python/pylibcudf/pylibcudf/io/timezone.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def make_timezone_transition_table( tzif_dir: str, timezone_name: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx index 0416df1cf0b..033ed15a1ba 100644 --- a/python/pylibcudf/pylibcudf/io/timezone.pyx +++ b/python/pylibcudf/pylibcudf/io/timezone.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,13 +12,14 @@ from pylibcudf.libcudf.table.table cimport table from ..utils cimport _get_stream, _get_memory_resource from ..table cimport Table -from .types cimport Stream +from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["make_timezone_transition_table"] cpdef Table make_timezone_transition_table( - str tzif_dir, str timezone_name, Stream stream=None, DeviceMemoryResource mr=None, + str tzif_dir, str timezone_name, object stream=None, DeviceMemoryResource mr=None, ): """ Creates a transition table to convert ORC timestamps to UTC. @@ -42,15 +43,16 @@ cpdef Table make_timezone_transition_table( cdef unique_ptr[table] c_result cdef string c_tzdir = tzif_dir.encode() cdef string c_tzname = timezone_name.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_make_timezone_transition_table( make_optional[string](c_tzdir), c_tzname, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/io/types.pxd b/python/pylibcudf/pylibcudf/io/types.pxd index db7e2ad95c5..1e52f4faa05 100644 --- a/python/pylibcudf/pylibcudf/io/types.pxd +++ b/python/pylibcudf/pylibcudf/io/types.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint8_t, int32_t @@ -29,7 +29,6 @@ from pylibcudf.libcudf.utilities.span cimport host_span from pylibcudf.table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cdef class PartitionInfo: @@ -86,7 +85,7 @@ cdef class TableWithMetadata: @staticmethod cdef TableWithMetadata from_libcudf( - table_with_metadata& tbl, Stream stream, DeviceMemoryResource mr + table_with_metadata& tbl, object stream, DeviceMemoryResource mr ) cdef class SourceInfo: diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 1c4a7f49268..27c3bb47caf 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -33,7 +33,6 @@ from pylibcudf.libcudf.utilities.span cimport device_span, host_span from pylibcudf.span import is_span from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream import codecs import errno @@ -396,7 +395,7 @@ cdef class TableWithMetadata: @staticmethod cdef TableWithMetadata from_libcudf( table_with_metadata& tbl_with_meta, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Python TableWithMetadata from a libcudf table_with_metadata""" diff --git a/python/pylibcudf/pylibcudf/join.pxd b/python/pylibcudf/pylibcudf/join.pxd index 31a998029e3..f0b69a42621 100644 --- a/python/pylibcudf/pylibcudf/join.pxd +++ b/python/pylibcudf/pylibcudf/join.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from pylibcudf.libcudf cimport join as cpp_join from pylibcudf.libcudf.types cimport null_equality -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -16,7 +15,7 @@ cpdef tuple inner_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -24,7 +23,7 @@ cpdef tuple left_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -32,7 +31,7 @@ cpdef tuple full_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -40,7 +39,7 @@ cpdef Column left_semi_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -48,19 +47,19 @@ cpdef Column left_anti_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Table cross_join( - Table left, Table right, Stream stream=*, DeviceMemoryResource mr=* + Table left, Table right, object stream = *, DeviceMemoryResource mr=* ) cpdef tuple conditional_inner_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -68,7 +67,7 @@ cpdef tuple conditional_left_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -76,7 +75,7 @@ cpdef tuple conditional_full_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -84,7 +83,7 @@ cpdef Column conditional_left_semi_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -92,7 +91,7 @@ cpdef Column conditional_left_anti_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -103,7 +102,7 @@ cpdef tuple mixed_inner_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -114,7 +113,7 @@ cpdef tuple mixed_left_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -125,7 +124,7 @@ cpdef tuple mixed_full_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -136,7 +135,7 @@ cpdef Column mixed_left_semi_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -147,7 +146,7 @@ cpdef Column mixed_left_anti_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/join.pyi b/python/pylibcudf/pylibcudf/join.pyi index 615eb914618..1cf86c7c704 100644 --- a/python/pylibcudf/pylibcudf/join.pyi +++ b/python/pylibcudf/pylibcudf/join.pyi @@ -4,12 +4,12 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.expressions import Expression from pylibcudf.table import Table from pylibcudf.types import NullEquality +from pylibcudf.utils import CudaStreamLike class SetAsBuildTable(IntEnum): LEFT = ... @@ -19,76 +19,76 @@ def inner_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def left_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def full_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def left_semi_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def left_anti_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def cross_join( left: Table, right: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def conditional_inner_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def conditional_left_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def conditional_full_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def conditional_left_semi_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def conditional_left_anti_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def mixed_inner_join( @@ -98,7 +98,7 @@ def mixed_inner_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def mixed_left_join( @@ -108,7 +108,7 @@ def mixed_left_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def mixed_full_join( @@ -118,7 +118,7 @@ def mixed_full_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def mixed_left_semi_join( @@ -128,7 +128,7 @@ def mixed_left_semi_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def mixed_left_anti_join( @@ -138,7 +138,7 @@ def mixed_left_anti_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @@ -148,17 +148,17 @@ class FilteredJoin: build: Table, compare_nulls: NullEquality, load_factor: float = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> None: ... def semi_join( self, probe: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def anti_join( self, probe: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx index 61a321b27a8..78a44554dff 100644 --- a/python/pylibcudf/pylibcudf/join.pyx +++ b/python/pylibcudf/pylibcudf/join.pyx @@ -22,6 +22,7 @@ from .table cimport Table from .utils cimport _get_stream, _get_memory_resource from pylibcudf.libcudf.join import set_as_build_table as SetAsBuildTable # no-cython-lint # noqa: F401, deprecated +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "conditional_full_join", @@ -45,9 +46,10 @@ __all__ = [ ] cdef Column _column_from_gather_map( - cpp_join.gather_map_type gather_map, Stream stream, DeviceMemoryResource mr + cpp_join.gather_map_type gather_map, object stream, DeviceMemoryResource mr ): # helper to convert a gather map to a Column + cdef Stream _stream = _get_stream(stream) return Column.from_libcudf( move( make_unique[column]( @@ -55,9 +57,7 @@ cdef Column _column_from_gather_map( device_buffer(), 0 ) - ), - stream, - mr + ), _stream, mr ) @@ -65,7 +65,7 @@ cpdef tuple inner_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform an inner join between two tables. @@ -89,16 +89,21 @@ cpdef tuple inner_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_join.inner_join( - left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr() + left_keys.view(), + right_keys.view(), + nulls_equal, + _cs, + mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -106,7 +111,7 @@ cpdef tuple left_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a left join between two tables. @@ -130,16 +135,21 @@ cpdef tuple left_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_join.left_join( - left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr() + left_keys.view(), + right_keys.view(), + nulls_equal, + _cs, + mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -147,7 +157,7 @@ cpdef tuple full_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a full join between two tables. @@ -171,16 +181,21 @@ cpdef tuple full_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_join.full_join( - left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr() + left_keys.view(), + right_keys.view(), + nulls_equal, + _cs, + mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -188,7 +203,7 @@ cpdef Column left_semi_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a left semi join between two tables. @@ -211,7 +226,8 @@ cpdef Column left_semi_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef unique_ptr[cpp_join.filtered_join] join_obj @@ -221,22 +237,22 @@ cpdef Column left_semi_join( new cpp_join.filtered_join( right_keys.view(), nulls_equal, - stream.view() + _cs ) ) c_result = join_obj.get()[0].semi_join( left_keys.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Column left_anti_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a left anti join between two tables. @@ -259,7 +275,8 @@ cpdef Column left_anti_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef unique_ptr[cpp_join.filtered_join] join_obj @@ -269,19 +286,19 @@ cpdef Column left_anti_join( new cpp_join.filtered_join( right_keys.view(), nulls_equal, - stream.view() + _cs ) ) c_result = join_obj.get()[0].anti_join( left_keys.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Table cross_join( - Table left, Table right, Stream stream=None, DeviceMemoryResource mr=None + Table left, Table right, object stream=None, DeviceMemoryResource mr=None ): """Perform a cross join on two tables. @@ -305,21 +322,22 @@ cpdef Table cross_join( """ cdef unique_ptr[table] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_join.cross_join( - left.view(), right.view(), stream.view(), mr.get_mr() + left.view(), right.view(), _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef tuple conditional_inner_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional inner join between two tables. @@ -344,7 +362,8 @@ cpdef tuple conditional_inner_join( cdef cpp_join.gather_map_pair_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -353,12 +372,12 @@ cpdef tuple conditional_inner_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -366,7 +385,7 @@ cpdef tuple conditional_left_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional left join between two tables. @@ -391,7 +410,8 @@ cpdef tuple conditional_left_join( cdef cpp_join.gather_map_pair_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -400,12 +420,12 @@ cpdef tuple conditional_left_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -413,7 +433,7 @@ cpdef tuple conditional_full_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional full join between two tables. @@ -437,7 +457,8 @@ cpdef tuple conditional_full_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -445,12 +466,12 @@ cpdef tuple conditional_full_join( left.view(), right.view(), dereference(binary_predicate.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -458,7 +479,7 @@ cpdef Column conditional_left_semi_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional left semi join between two tables. @@ -482,7 +503,8 @@ cpdef Column conditional_left_semi_join( cdef cpp_join.gather_map_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -491,17 +513,17 @@ cpdef Column conditional_left_semi_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Column conditional_left_anti_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional left anti join between two tables. @@ -525,7 +547,8 @@ cpdef Column conditional_left_anti_join( cdef cpp_join.gather_map_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -534,10 +557,10 @@ cpdef Column conditional_left_anti_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef tuple mixed_inner_join( @@ -547,7 +570,7 @@ cpdef tuple mixed_inner_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed inner join between two tables. @@ -578,7 +601,8 @@ cpdef tuple mixed_inner_join( cdef cpp_join.gather_map_pair_type c_result cdef cpp_join.output_size_data_type empty_optional - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -590,12 +614,12 @@ cpdef tuple mixed_inner_join( dereference(binary_predicate.c_obj.get()), nulls_equal, empty_optional, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -606,7 +630,7 @@ cpdef tuple mixed_left_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed left join between two tables. @@ -637,7 +661,8 @@ cpdef tuple mixed_left_join( cdef cpp_join.gather_map_pair_type c_result cdef cpp_join.output_size_data_type empty_optional - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -649,12 +674,12 @@ cpdef tuple mixed_left_join( dereference(binary_predicate.c_obj.get()), nulls_equal, empty_optional, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -665,7 +690,7 @@ cpdef tuple mixed_full_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed full join between two tables. @@ -696,7 +721,8 @@ cpdef tuple mixed_full_join( cdef cpp_join.gather_map_pair_type c_result cdef cpp_join.output_size_data_type empty_optional - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -708,12 +734,12 @@ cpdef tuple mixed_full_join( dereference(binary_predicate.c_obj.get()), nulls_equal, empty_optional, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -724,7 +750,7 @@ cpdef Column mixed_left_semi_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed left semi join between two tables. @@ -753,7 +779,8 @@ cpdef Column mixed_left_semi_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -764,10 +791,10 @@ cpdef Column mixed_left_semi_join( right_conditional.view(), dereference(binary_predicate.c_obj.get()), nulls_equal, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Column mixed_left_anti_join( @@ -777,7 +804,7 @@ cpdef Column mixed_left_anti_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed left anti join between two tables. @@ -806,7 +833,8 @@ cpdef Column mixed_left_anti_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -817,10 +845,10 @@ cpdef Column mixed_left_anti_join( right_conditional.view(), dereference(binary_predicate.c_obj.get()), nulls_equal, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cdef class FilteredJoin: @@ -841,7 +869,7 @@ cdef class FilteredJoin: Table build, null_equality compare_nulls, double load_factor=0.5, - Stream stream=None, + object stream=None, ): """ Construct a filtered hash join object for subsequent probe calls. @@ -858,7 +886,8 @@ cdef class FilteredJoin: stream : Stream, optional CUDA stream used for device memory operations and kernel launches. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: self.c_obj.reset( @@ -866,14 +895,14 @@ cdef class FilteredJoin: build.view(), compare_nulls, load_factor, - stream.view() + _cs ) ) def semi_join( self, Table probe, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -898,21 +927,22 @@ cdef class FilteredJoin: """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = self.c_obj.get()[0].semi_join( probe.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) def anti_join( self, Table probe, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -937,13 +967,14 @@ cdef class FilteredJoin: """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = self.c_obj.get()[0].anti_join( probe.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/json.pxd b/python/pylibcudf/pylibcudf/json.pxd index 5489fa26ee8..47cf3b37c63 100644 --- a/python/pylibcudf/pylibcudf/json.pxd +++ b/python/pylibcudf/pylibcudf/json.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -6,7 +6,6 @@ from pylibcudf.libcudf.json cimport get_json_object_options from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class GetJsonObjectOptions: @@ -17,6 +16,6 @@ cpdef Column get_json_object( Column col, Scalar json_path, GetJsonObjectOptions options=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi index fa6bb08d510..a60bcb36f26 100644 --- a/python/pylibcudf/pylibcudf/json.pyi +++ b/python/pylibcudf/pylibcudf/json.pyi @@ -1,11 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class GetJsonObjectOptions: def __init__( @@ -26,6 +26,6 @@ def get_json_object( col: Column, json_path: Scalar, options: GetJsonObjectOptions | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx index b50bd4e7714..a470f6a1cb3 100644 --- a/python/pylibcudf/pylibcudf/json.pyx +++ b/python/pylibcudf/pylibcudf/json.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["GetJsonObjectOptions", "get_json_object"] @@ -120,7 +121,7 @@ cpdef Column get_json_object( Column col, Scalar json_path, GetJsonObjectOptions options=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -155,7 +156,8 @@ cpdef Column get_json_object( options = GetJsonObjectOptions() cdef cpp_json.get_json_object_options c_options = options.options - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -163,8 +165,8 @@ cpdef Column get_json_object( col.view(), dereference(c_json_path), c_options, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/labeling.pxd b/python/pylibcudf/pylibcudf/labeling.pxd index fc93568ed7c..0d8f02d48ce 100644 --- a/python/pylibcudf/pylibcudf/labeling.pxd +++ b/python/pylibcudf/pylibcudf/labeling.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.labeling cimport inclusive from .column cimport Column -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -15,6 +14,6 @@ cpdef Column label_bins( inclusive left_inclusive, Column right_edges, inclusive right_inclusive, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi index e9ff5c97f0b..272edd43f5f 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyi +++ b/python/pylibcudf/pylibcudf/labeling.pyi @@ -1,12 +1,12 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class Inclusive(IntEnum): YES = ... @@ -18,6 +18,6 @@ def label_bins( left_inclusive: Inclusive, right_edges: Column, right_inclusive: Inclusive, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx index 878390543cb..e3a052f7cb8 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyx +++ b/python/pylibcudf/pylibcudf/labeling.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["Inclusive", "label_bins"] @@ -23,7 +24,7 @@ cpdef Column label_bins( inclusive left_inclusive, Column right_edges, inclusive right_inclusive, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Labels elements based on membership in the specified bins. @@ -54,7 +55,8 @@ cpdef Column label_bins( according to the specified bins. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -64,10 +66,10 @@ cpdef Column label_bins( left_inclusive, right_edges.view(), right_inclusive, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) Inclusive.__str__ = Inclusive.__repr__ diff --git a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd index 7ec2c6fe31f..303b112f71e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd @@ -10,7 +10,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -57,7 +57,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const column_view& rhs, binary_operator op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -66,7 +66,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const scalar& rhs, binary_operator op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -75,7 +75,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const column_view& rhs, binary_operator op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -84,7 +84,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const column_view& rhs, const string& op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd index daefd24fb7b..b22eeb1dd40 100644 --- a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport ( from pylibcudf.libcudf.types cimport data_type, size_type from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -25,13 +25,13 @@ cdef extern from "cudf/column/column.hpp" namespace "cudf" nogil: column() except +libcudf_exception_handler column( const column& other, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler column( column_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd index 5e17d3b89bd..f8cf3b38ccb 100644 --- a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd @@ -13,7 +13,7 @@ from pylibcudf.libcudf.types cimport ( ) from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,7 +22,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,7 +31,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_point_column( @@ -47,14 +47,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_timestamp_column( data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_timestamp_column( @@ -62,14 +62,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_duration_column( data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_duration_column( @@ -77,14 +77,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_width_column( data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_width_column( @@ -92,27 +92,27 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_column_from_scalar( const scalar& s, size_type size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] make_dictionary_from_scalar( const scalar& s, size_type size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] make_dictionary_column( unique_ptr[column] keys_column, unique_ptr[column] indices_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_empty_column( diff --git a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd index 272f452a0a0..53cadee79c9 100644 --- a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view from pylibcudf.libcudf.utilities.span cimport host_span from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -24,11 +24,11 @@ cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil: cdef unique_ptr[column] concatenate( const vector[column_view] columns, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] concatenate( const vector[table_view] tables, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd index 9d839835465..dd439d0d01d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd @@ -10,7 +10,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from pylibcudf.libcudf.utilities.span cimport device_span from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -32,7 +32,7 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil: unique_ptr[chunked_pack] create( const table_view & input, size_t user_buffer_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref temp_mr, ) except +libcudf_exception_handler @@ -43,13 +43,13 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil: cdef vector[contiguous_split_result] contiguous_split ( table_view input_table, vector[size_type] splits, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef packed_columns pack ( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd index 2c3741342e9..36c95fa777c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/copying.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd @@ -17,7 +17,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref ctypedef const scalar constscalar @@ -31,7 +31,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const table_view& source_table, const column_view& gather_map, out_of_bounds_policy policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const column_view& input, size_type offset, const scalar& fill_values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -47,7 +47,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const table_view& source_table, const column_view& scatter_map, const table_view& target_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -55,7 +55,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const vector[reference_wrapper[constscalar]]& source_scalars, const column_view& indices, const table_view& target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -71,7 +71,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: cdef unique_ptr[column] allocate_like ( const column_view& input_column, mask_allocation_policy policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -79,7 +79,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const column_view& input_column, size_type size, mask_allocation_policy policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -93,7 +93,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: size_type input_begin, size_type input_end, size_type target_begin, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[column] copy_range ( @@ -102,39 +102,39 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: size_type input_begin, size_type input_end, size_type target_begin, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef vector[column_view] slice ( const column_view& input_column, vector[size_type] indices, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef vector[table_view] slice ( const table_view& input_table, vector[size_type] indices, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef vector[column_view] split ( const column_view& input_column, vector[size_type] splits, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef vector[table_view] split ( const table_view& input_table, vector[size_type] splits, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[column] copy_if_else ( const column_view& lhs, const column_view& rhs, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -142,7 +142,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const scalar& lhs, const column_view& rhs, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -150,7 +150,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const column_view& lhs, const scalar& rhs, const column_view boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -158,7 +158,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const scalar& lhs, const scalar& rhs, const column_view boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -166,7 +166,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const table_view& input, const table_view& target, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -174,14 +174,14 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const vector[reference_wrapper[constscalar]]& input, const table_view& target, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] get_element ( const column_view& input, size_type index, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index a14932f8910..7db66dc1070 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -27,7 +27,7 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_datetime_component( const column_view& column, datetime_component component, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -42,54 +42,54 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] ceil_datetimes( const column_view& column, rounding_frequency freq, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] floor_datetimes( const column_view& column, rounding_frequency freq, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] round_datetimes( const column_view& column, rounding_frequency freq, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const scalar& months, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] day_of_year( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] is_leap_year( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] last_day_of_month( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] extract_quarter( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] days_in_month( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd b/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd index 7aea4aafcd1..399a868db71 100644 --- a/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd @@ -1,14 +1,31 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.utilities.span cimport host_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +ctypedef const cudaStream_t const_cudaStream_t -cdef extern from "cudf/detail/utilities/stream_pool.hpp" namespace "cudf::detail" nogil: - cdef void join_streams( - host_span[const cuda_stream_view] streams, - cuda_stream_view stream +cdef extern from * nogil: + """ + #include + #include + #include + #include + + namespace { + void join_streams_wrapper( + cudf::host_span streams, + cudaStream_t stream + ) { + std::vector stream_views(streams.begin(), streams.end()); + cudf::detail::join_streams(stream_views, stream); + } + } + """ + cdef void join_streams "join_streams_wrapper"( + host_span[const_cudaStream_t] streams, + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd b/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd index 5707f34f578..2cbf79c0c17 100644 --- a/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport ( null_policy, size_type, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/reduction/distinct_count.hpp" namespace "cudf" nogil: @@ -17,9 +17,9 @@ cdef extern from "cudf/reduction/distinct_count.hpp" namespace "cudf" nogil: column_view column, null_policy null_handling, nan_policy nan_handling, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler cdef size_type distinct_count( table_view source_table, null_equality nulls_equal, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/filling.pxd index ac969cb8822..e9470a828a7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/filling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/filling.pxd @@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,7 +22,7 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type begin, size_type end, const scalar & value, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,20 +31,20 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type begin, size_type end, const scalar & value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[table] repeat( const table_view & input, const column_view & count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] repeat( const table_view & input, size_type count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -52,7 +52,7 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type size, const scalar & init, const scalar & step, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -60,6 +60,6 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type n, const scalar& init, size_type months, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd index 5ba69a12290..b5ba1031813 100644 --- a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd @@ -24,7 +24,7 @@ from pylibcudf.libcudf.types cimport ( sorted, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref # workaround for https://github.com/cython/cython/issues/3885 @@ -67,7 +67,7 @@ cdef extern from "cudf/groupby.hpp" \ vector[aggregation_result] ] aggregate( const vector[aggregation_request]& requests, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -76,7 +76,7 @@ cdef extern from "cudf/groupby.hpp" \ vector[aggregation_result] ] scan( const vector[scan_request]& requests, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -87,19 +87,19 @@ cdef extern from "cudf/groupby.hpp" \ const table_view values, const vector[size_type] offset, const vector[reference_wrapper[constscalar]] fill_values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler groups get_groups( table_view values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler pair[unique_ptr[table], unique_ptr[table]] replace_nulls( const table_view& values, const vector[replace_policy] replace_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd index 380afc96c58..9610fa2a09f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/hash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,64 +15,64 @@ cdef extern from "cudf/hashing.hpp" namespace "cudf::hashing" nogil: cdef unique_ptr[column] murmurhash3_x86_32( const table_view& input, const uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] murmurhash3_x64_128( const table_view& input, const uint64_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] md5( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha1( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha224( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha256( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha384( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha512( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] xxhash_32( const table_view& input, const uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] xxhash_64( const table_view& input, const uint64_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd index b09524a257b..78fc455dd35 100644 --- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd @@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -40,13 +40,13 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \ nogil: cdef unique_ptr[table] from_dlpack( const DLManagedTensor* managed_tensor, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler DLManagedTensor* to_dlpack( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -65,18 +65,18 @@ cdef extern from "cudf/interop.hpp" namespace "cudf::interop" \ arrow_column( ArrowSchema&& schema, ArrowArray&& array, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler arrow_column( ArrowSchema&& schema, ArrowDeviceArray&& array, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler arrow_column( ArrowArrayStream&& stream, - cuda_stream_view cuda_stream, + cudaStream_t cuda_stream, device_async_resource_ref mr ) except +libcudf_exception_handler column_view view() except +libcudf_exception_handler @@ -84,13 +84,13 @@ cdef extern from "cudf/interop.hpp" namespace "cudf::interop" \ cdef cppclass arrow_table: arrow_table( ArrowArrayStream&& stream, - cuda_stream_view cuda_stream, + cudaStream_t cuda_stream, device_async_resource_ref mr ) except +libcudf_exception_handler arrow_table( ArrowSchema&& schema, ArrowDeviceArray&& array, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler table_view view() except +libcudf_exception_handler @@ -135,7 +135,7 @@ cdef extern from *: template ArrowArray* to_arrow_host_raw( ViewType const& obj, - rmm::cuda_stream_view stream, + cudaStream_t stream, rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { ArrowArray *arr = new ArrowArray(); auto device_arr = cudf::to_arrow_host(obj, stream, mr); @@ -175,7 +175,7 @@ cdef extern from *: ArrowDeviceArray* to_arrow_device_raw( ViewType const& obj, PyObject* owner, - rmm::cuda_stream_view stream = cudf::get_default_stream(), + cudaStream_t stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto tmp = cudf::to_arrow_device(obj, stream, mr); @@ -222,11 +222,11 @@ cdef extern from *: ) except +libcudf_exception_handler nogil cdef ArrowArray* to_arrow_host_raw( const table_view& tbl, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler nogil cdef ArrowArray* to_arrow_host_raw( const column_view& tbl, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler nogil cdef void release_arrow_array_raw( ArrowArray * diff --git a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd index ff84ad922fc..521147218bf 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd @@ -5,7 +5,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -49,6 +49,6 @@ cdef extern from "cudf/io/avro.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata read_avro( avro_reader_options &options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd index 31f626b7d9d..45987fbedcd 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd @@ -10,7 +10,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/io/csv.hpp" \ @@ -263,7 +263,7 @@ cdef extern from "cudf/io/csv.hpp" \ cdef cudf_io_types.table_with_metadata read_csv( csv_reader_options &options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -337,7 +337,7 @@ cdef extern from "cudf/io/csv.hpp" \ cdef void write_csv( csv_writer_options args, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler cdef bool is_supported_write_csv( diff --git a/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd b/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd index 9f7462f6b86..8578908fc43 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd @@ -15,7 +15,7 @@ from pylibcudf.libcudf.io.text cimport byte_range_info from pylibcudf.libcudf.io.types cimport table_with_metadata from pylibcudf.libcudf.types cimport size_type from pylibcudf.libcudf.utilities.span cimport device_span, host_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref ctypedef const uint8_t const_uint8_t @@ -61,7 +61,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ vector[size_type] filter_row_groups_with_stats( host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler pair[ @@ -75,20 +75,20 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ host_span[const_device_span_const_uint8_t] dictionary_page_data, host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler vector[size_type] filter_row_groups_with_bloom_filters( host_span[const_device_span_const_uint8_t] bloom_filter_data, host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler unique_ptr[column] build_row_mask_with_page_index_stats( host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -103,7 +103,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ mutable_column_view& row_mask, use_data_page_mask mask_data_pages, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -118,7 +118,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ const column_view& row_mask, use_data_page_mask mask_data_pages, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -131,7 +131,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ host_span[const_size_type] row_group_indices, host_span[const_device_span_const_uint8_t] column_chunk_data, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -143,7 +143,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ use_data_page_mask mask_data_pages, host_span[const_device_span_const_uint8_t] column_chunk_data, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -159,7 +159,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ use_data_page_mask mask_data_pages, host_span[const_device_span_const_uint8_t] column_chunk_data, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd index 6d5a506d18a..af3b1e59bd1 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd @@ -11,7 +11,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -158,7 +158,7 @@ cdef extern from "cudf/io/json.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata read_json( json_reader_options &options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -240,7 +240,7 @@ cdef extern from "cudf/io/json.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata write_json( json_writer_options &options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef bool is_supported_write_json( diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd index 0455c0fa1b1..bea5c1e06f0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd @@ -11,7 +11,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -80,7 +80,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata read_orc( orc_reader_options opts, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr, ) except +libcudf_exception_handler @@ -150,7 +150,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil: cdef void write_orc( orc_writer_options options, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler cdef bool is_supported_read_orc( @@ -228,7 +228,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil: orc_chunked_writer() except +libcudf_exception_handler orc_chunked_writer( chunked_orc_writer_options args, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler orc_chunked_writer& write( cudf_table_view.table_view table_, diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd index e0c67e14e1d..f365a45b34a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t from libcpp cimport bool @@ -8,7 +8,7 @@ from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.io cimport types as cudf_io_types from pylibcudf.variant cimport monostate, variant -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/io/orc_metadata.hpp" \ @@ -71,5 +71,5 @@ cdef extern from "cudf/io/orc_metadata.hpp" \ cdef parsed_orc_statistics read_parsed_orc_statistics( const cudf_io_types.source_info& src_info, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd index dc0dff818a3..00b62e55514 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd @@ -22,7 +22,7 @@ from pylibcudf.libcudf.io.types cimport ( ) from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport data_type, size_type, type_id -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -124,7 +124,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cdef table_with_metadata read_parquet( parquet_reader_options args, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -256,7 +256,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cdef unique_ptr[vector[uint8_t]] write_parquet( parquet_writer_options options, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler cdef bool is_supported_read_parquet( @@ -288,7 +288,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: chunked_parquet_writer() except +libcudf_exception_handler chunked_parquet_writer( const chunked_parquet_writer_options& args, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler chunked_parquet_writer& write( const table_view& table_, @@ -303,14 +303,14 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: chunked_parquet_reader( size_t chunk_read_limit, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler chunked_parquet_reader( size_t chunk_read_limit, size_t pass_read_limit, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler bool has_next() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd index 77552a80cfd..7152e5d0afb 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd @@ -6,7 +6,7 @@ from libcpp.memory cimport unique_ptr from libcpp.string cimport string from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -63,6 +63,6 @@ cdef extern from "cudf/io/text/multibyte_split.hpp" \ data_chunk_source source, string delimiter, parse_options options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd index 557e8856b28..45cfb4f15da 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd @@ -6,7 +6,7 @@ from libcpp.optional cimport optional from libcpp.string cimport string from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,6 +14,6 @@ cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil: unique_ptr[table] make_timezone_transition_table( optional[string] tzif_dir, string timezone_name, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd index 06a7d497ad5..d13bf245119 100644 --- a/python/pylibcudf/pylibcudf/libcudf/join.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd @@ -13,7 +13,7 @@ from pylibcudf.libcudf.expressions cimport expression from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport null_equality, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref from rmm.librmm.device_uvector cimport device_uvector @@ -28,7 +28,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -36,7 +36,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -44,7 +44,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -52,7 +52,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -60,7 +60,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -68,14 +68,14 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] cross_join( const table_view left, const table_view right, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -84,7 +84,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -93,7 +93,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -101,7 +101,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -110,7 +110,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -118,7 +118,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -126,7 +126,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -135,7 +135,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -143,7 +143,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -152,7 +152,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -165,7 +165,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const expression binary_predicate, null_equality compare_nulls, output_size_data_type output_size_data, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -177,7 +177,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const expression binary_predicate, null_equality compare_nulls, output_size_data_type output_size_data, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -189,7 +189,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const expression binary_predicate, null_equality compare_nulls, output_size_data_type output_size_data, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -200,7 +200,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const table_view right_conditional, const expression binary_predicate, null_equality compare_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -211,7 +211,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const table_view right_conditional, const expression binary_predicate, null_equality compare_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -225,21 +225,21 @@ cdef extern from "cudf/join/filtered_join.hpp" namespace "cudf" nogil: filtered_join( const table_view build, null_equality compare_nulls, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler filtered_join( const table_view build, null_equality compare_nulls, double load_factor, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler gather_map_type semi_join( const table_view probe, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler gather_map_type anti_join( const table_view probe, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/json.pxd b/python/pylibcudf/pylibcudf/libcudf/json.pxd index 39899490cac..bb606b86b33 100644 --- a/python/pylibcudf/pylibcudf/libcudf/json.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/json.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -30,6 +30,6 @@ cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil: column_view col, string_scalar json_path, get_json_object_options options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd index ad9611511dd..0b2c1651714 100644 --- a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -21,6 +21,6 @@ cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil: inclusive left_inclusive, const column_view &right_edges, inclusive right_inclusive, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd index 66e90dcd66a..310d166df59 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -21,19 +21,19 @@ cdef extern from "cudf/lists/combine.hpp" namespace \ cdef unique_ptr[column] concatenate_rows( const table_view input_table, concatenate_null_policy null_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] concatenate_list_elements( const table_view input_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] concatenate_list_elements( const column_view input_table, concatenate_null_policy null_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd index efb2d760366..3736e42b32d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,20 +20,20 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] contains( const lists_column_view& lists, const scalar& search_key, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] contains( const lists_column_view& lists, const column_view& search_keys, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] contains_nulls( const lists_column_view& lists, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -41,7 +41,7 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: const lists_column_view& lists, const scalar& search_key, duplicate_find_option find_option, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -49,6 +49,6 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: const lists_column_view& lists, const column_view& search_keys, duplicate_find_option find_option, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd index 6203bafdc38..6fa64c8b291 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd @@ -4,13 +4,13 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] count_elements( const lists_column_view&, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd index b31d3a7cdca..fa15fb1eeef 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,6 +13,6 @@ cdef extern from "cudf/lists/explode.hpp" namespace "cudf" nogil: cdef unique_ptr[table] explode_outer( const table_view, size_type explode_column_idx, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd index c82a9029311..66a07f41e38 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column, column_view from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,12 +13,12 @@ cdef extern from "cudf/lists/extract.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] extract_list_element( const lists_column_view&, size_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] extract_list_element( const lists_column_view&, const column_view&, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd index 11cc19b86f9..1e55916d299 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd @@ -4,7 +4,7 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -12,7 +12,7 @@ cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] sequences( const column_view& starts, const column_view& sizes, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -20,6 +20,6 @@ cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil: const column_view& starts, const column_view& steps, const column_view& sizes, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd index bae67a96b0d..b7212bea51e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.copying cimport out_of_bounds_policy from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil: @@ -13,6 +13,6 @@ cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil: const lists_column_view& source_column, const lists_column_view& gather_map_list, out_of_bounds_policy bounds_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd index fe1630c1728..69a6c80f242 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport ( @@ -26,7 +26,7 @@ cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil: column_view offsets() except +libcudf_exception_handler column_view child() except +libcudf_exception_handler column_view get_sliced_child( - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef enum: diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd index f831024ec82..e60c8acbb38 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd @@ -4,13 +4,13 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/lists/reverse.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] reverse( const lists_column_view& lists_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd index 5e02d11d95a..b56caa9adb5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport nan_equality, null_equality -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,7 +15,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -24,7 +24,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -33,7 +33,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -42,6 +42,6 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd index 4036ccec6c5..9899591d6d1 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport null_order, order -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,7 +14,7 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil: const lists_column_view source_column, order column_order, null_order null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -22,6 +22,6 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil: const lists_column_view source_column, order column_order, null_order null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd index dec32027402..0187642e0c7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option from pylibcudf.libcudf.types cimport nan_equality, null_equality -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,7 +15,7 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \ cdef unique_ptr[column] apply_boolean_mask( const lists_column_view& lists_column, const lists_column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -24,6 +24,6 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \ null_equality nulls_equal, nan_equality nans_equal, duplicate_keep_option keep_option, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/merge.pxd b/python/pylibcudf/pylibcudf/libcudf/merge.pxd index 860e4263c1c..f4389ac991a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/merge.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/merge.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,6 +17,6 @@ cdef extern from "cudf/merge.hpp" namespace "cudf" nogil: vector[libcudf_types.size_type] key_cols, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd index 1b1b3001981..330c69f0579 100644 --- a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd @@ -8,14 +8,14 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: cdef device_buffer copy_bitmask ( column_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -23,7 +23,7 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: const bitmask_type* null_mask, size_type begin_bit, size_type end_bit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -35,19 +35,19 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: cdef device_buffer create_null_mask ( size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[device_buffer, size_type] bitmask_and( table_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) cdef pair[device_buffer, size_type] bitmask_or( table_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) @@ -55,12 +55,12 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: const bitmask_type * bitmask, size_type start, size_type stop, - cuda_stream_view stream + cudaStream_t stream ) cdef size_type index_of_first_set_bit( const bitmask_type * bitmask, size_type start, size_type stop, - cuda_stream_view stream + cudaStream_t stream ) diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd index eca30faa630..94a7fe3db9d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,7 +17,7 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil: cdef unique_ptr[bpe_merge_pairs] load_merge_pairs( const column_view &merge_pairs, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -25,6 +25,6 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil: const column_view &strings, const bpe_merge_pairs &merge_pairs, const string_scalar &separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd index 26e39c963d2..82a8581ea0a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref from rmm.librmm.device_uvector cimport device_uvector @@ -19,7 +19,7 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil: cdef suffix_array_type build_suffix_array( column_view source_strings, size_type min_width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -27,7 +27,7 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil: column_view source_strings, column_view indices, size_type min_width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -37,6 +37,6 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil: column_view input2, column_view indices2, size_type min_width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd index b7f3e97a4b0..f3c10c11abf 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,12 +15,12 @@ cdef extern from "nvtext/edit_distance.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] edit_distance( const column_view & strings, const column_view & targets, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] edit_distance_matrix( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd index 43619d356f6..3d97aaf93b1 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,14 +17,14 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil: const column_view &strings, size_type ngrams, const string_scalar & separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] generate_character_ngrams( const column_view &strings, size_type ngrams, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -32,6 +32,6 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil: const column_view &strings, size_type ngrams, uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd index de45913fbb5..0a3ba52a3d5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,6 +15,6 @@ cdef extern from "nvtext/jaccard.hpp" namespace "nvtext" nogil: const column_view &input1, const column_view &input2, size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd index eaf0b8c63b1..94083fbafd3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -19,7 +19,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const column_view &a, const column_view &b, const size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -29,7 +29,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const column_view &a, const column_view &b, const size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const uint32_t seed, const column_view &a, const column_view &b, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -49,6 +49,6 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const uint64_t seed, const column_view &a, const column_view &b, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd index 41d153b99a0..6e4cc18e17f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,6 +17,6 @@ cdef extern from "nvtext/ngrams_tokenize.hpp" namespace "nvtext" nogil: size_type ngrams, const string_scalar & delimiter, const string_scalar & separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd index 25678d12091..0184c1d8785 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd @@ -5,7 +5,7 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,7 +13,7 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] normalize_spaces( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -23,13 +23,13 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil: cdef unique_ptr[character_normalizer] create_character_normalizer( bool do_lower_case, const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] normalize_characters( const column_view & strings, const character_normalizer & normalizer, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd index d14ce40b168..628181b3f89 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,7 +17,7 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil: const column_view & targets, const column_view & replacements, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -26,6 +26,6 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil: size_type min_token_length, const string_scalar & replacement, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd index e6e2866008b..2088440749a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] porter_stemmer_measure( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -26,12 +26,12 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil: column_view source_strings, letter_type ltype, size_type character_index, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler cdef unique_ptr[column] is_letter( column_view source_strings, letter_type ltype, column_view indices, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler ctypedef int32_t underlying_type_t_letter_type diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd index 3b7ae2e9b6f..1c6eccb0476 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,34 +15,34 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] tokenize( const column_view & strings, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] tokenize( const column_view & strings, const column_view & delimiters, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] count_tokens( const column_view & strings, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] count_tokens( const column_view & strings, const column_view & delimiters, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] character_tokenize( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -50,7 +50,7 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: const column_view & strings, const column_view & row_indices, const string_scalar & separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -59,7 +59,7 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[tokenize_vocabulary] load_vocabulary( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -68,6 +68,6 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: const tokenize_vocabulary & vocabulary, const string_scalar & delimiter, size_type default_id, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd index a4bcde47f80..0c43f0d21ff 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,7 +16,7 @@ cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[wordpiece_vocabulary] load_wordpiece_vocabulary( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -24,6 +24,6 @@ cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil: const column_view & strings, const wordpiece_vocabulary & vocabulary, size_type max_tokens_per_row, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd index e7c0f496de8..2e0c978f77d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.hash cimport DEFAULT_HASH_SEED from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: @@ -28,7 +28,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: int num_partitions, hash_id hash_function, uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: int num_partitions, hash_id hash_function, uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -48,7 +48,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: const table_view& t, const column_view& partition_map, int num_partitions, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -57,6 +57,6 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: const table_view& input, int num_partitions, int start_partition, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd index 823bd34e4a7..8bc636da998 100644 --- a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd @@ -15,7 +15,7 @@ from pylibcudf.libcudf.types cimport ( order_info, sorted, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -27,7 +27,7 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil: interpolation interp, column_view ordered_indices, bool exact, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -38,6 +38,6 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil: sorted is_input_sorted, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd index 9da4159d0c1..5fb383149a7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport data_type, null_policy -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref ctypedef const scalar constscalar @@ -22,7 +22,7 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil: const reduce_aggregation& agg, data_type output_type, optional[reference_wrapper[constscalar]] init, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -35,13 +35,13 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil: const scan_aggregation& agg, scan_type inclusive, null_policy null_handling, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[scalar], unique_ptr[scalar]] minmax( const column_view& col, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/replace.pxd index 35078b64ee3..4821a13924c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/replace.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column_view cimport ( mutable_column_view, ) from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,47 +22,47 @@ cdef extern from "cudf/replace.hpp" namespace "cudf" nogil: cdef unique_ptr[column] replace_nulls( column_view source_column, column_view replacement_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_nulls( column_view source_column, scalar replacement, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_nulls( column_view source_column, replace_policy replace_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find_and_replace_all( column_view source_column, column_view values_to_replace, column_view replacement_values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] clamp( column_view source_column, scalar lo, scalar lo_replace, scalar hi, scalar hi_replace, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] clamp( column_view source_column, scalar lo, scalar hi, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] normalize_nans_and_zeros( column_view source_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef void normalize_nans_and_zeros( mutable_column_view source_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd index 598e148d643..beda4ec09fc 100644 --- a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type, data_type from pylibcudf.libcudf.utilities.span cimport device_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cuda/functional" namespace "cuda::std": @@ -19,17 +19,17 @@ cdef extern from "cuda/functional" namespace "cuda::std": cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil: cdef unique_ptr[column] interleave_columns( table_view source_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] tile( table_view source_table, size_type count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef void table_to_array( table_view input_table, device_span[byte] output, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd index 6ea400f92d3..69cdbd6f396 100644 --- a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd @@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport data_type, null_order, order, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -44,7 +44,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: range_window_type preceding, range_window_type following, vector[rolling_request]& requests, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -54,7 +54,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: column_view following_window, size_type min_periods, rolling_aggregation& agg, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rolling_window( @@ -63,7 +63,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: size_type following_window, size_type min_periods, rolling_aggregation& agg, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef pair[unique_ptr[column], unique_ptr[column]] make_range_windows( @@ -73,7 +73,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: null_order null_order, range_window_type preceding, range_window_type following, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/round.pxd b/python/pylibcudf/pylibcudf/libcudf/round.pxd index 39965d025c6..f21987844f3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/round.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/round.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,7 +20,7 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil: const column_view& input, int32_t decimal_places, rounding_method method, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -28,6 +28,6 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil: const column_view& input, int32_t decimal_places, rounding_method method, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd index 6c3dc71e019..10d3a42c572 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,31 +18,31 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: scalar(scalar other) except +libcudf_exception_handler data_type type() except +libcudf_exception_handler void set_valid_async( - bool is_valid, cuda_stream_view stream + bool is_valid, cudaStream_t stream ) except +libcudf_exception_handler - bool is_valid(cuda_stream_view stream) except +libcudf_exception_handler + bool is_valid(cudaStream_t stream) except +libcudf_exception_handler cdef cppclass numeric_scalar[T](scalar): void set_value( T value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler - T value(cuda_stream_view stream) except +libcudf_exception_handler + T value(cudaStream_t stream) except +libcudf_exception_handler cdef cppclass timestamp_scalar[T](scalar): void set_value( T value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef cppclass duration_scalar[T](scalar): void set_value( T value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef cppclass string_scalar(scalar): - string to_string(cuda_stream_view stream) except +libcudf_exception_handler + string to_string(cudaStream_t stream) except +libcudf_exception_handler cdef cppclass list_scalar(scalar): pass @@ -57,4 +57,4 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: scale_type scale, bool is_valid ) except +libcudf_exception_handler - T value(cuda_stream_view stream) except +libcudf_exception_handler + T value(cudaStream_t stream) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd index 6034b2ecc08..6b1329962cd 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd @@ -9,49 +9,49 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type from pylibcudf.libcudf.types cimport int128 as int128_t -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil: cdef unique_ptr[scalar] make_string_scalar( const string & _string, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_fixed_width_scalar[T]( T value, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_fixed_point_scalar[T]( int128_t value, scale_type scale, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_numeric_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_timestamp_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_empty_scalar_like( const column_view &, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_duration_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_default_constructed_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/search.pxd b/python/pylibcudf/pylibcudf/libcudf/search.pxd index b369ec05392..c1e41893d2e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/search.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/search.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil: table_view needles, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -27,13 +27,13 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil: table_view needles, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] contains( column_view haystack, column_view needles, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd index 97822e2c374..c8e252ced2c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd @@ -17,7 +17,7 @@ from pylibcudf.libcudf.types cimport ( null_order, size_type ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -26,7 +26,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -34,7 +34,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -45,7 +45,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: null_policy null_handling, null_order null_precedence, bool percentage, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -53,7 +53,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const table_view& table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[table] segmented_sort_by_key( @@ -62,7 +62,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& segment_offsets, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -72,7 +72,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& segment_offsets, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -81,7 +81,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const table_view& keys, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -90,7 +90,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const table_view& keys, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -98,7 +98,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -106,7 +106,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -114,7 +114,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& col, size_type k, order sort_order, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -122,6 +122,6 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& col, size_type k, order sort_order, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd index 0358aa4068c..9f8686da472 100644 --- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd @@ -14,7 +14,7 @@ from pylibcudf.libcudf.types cimport ( null_equality, size_type, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -29,7 +29,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: table_view source_table, vector[size_type] keys, size_type keep_threshold, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -37,14 +37,14 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: table_view source_table, vector[size_type] keys, size_type keep_threshold, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] apply_boolean_mask( table_view source_table, column_view boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -53,7 +53,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: vector[size_type] keys, duplicate_keep_option keep, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -63,7 +63,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equals, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -72,7 +72,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -82,7 +82,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -90,6 +90,6 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: table_view predicate_table, const expression& predicate_expr, table_view filter_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd index 06e95c95870..0cee9e43346 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd @@ -4,7 +4,7 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -12,15 +12,15 @@ cdef extern from "cudf/strings/attributes.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] count_characters( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] count_bytes( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] code_points( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd index b615cd984db..7b8ac094311 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.char_types cimport string_character_types -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,18 +14,18 @@ cdef extern from "cudf/strings/capitalize.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] capitalize( const column_view & strings, const string_scalar & delimiters, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] title( const column_view & strings, string_character_types sequence_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] is_title( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd index 463586d9f37..a056f1b4737 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd @@ -4,22 +4,22 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/strings/case.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] to_lower( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] to_upper( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] swapcase( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd index 7706498eceb..c6af0fb73d2 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -29,7 +29,7 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \ column_view source_strings, string_character_types types, string_character_types verify_types, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] filter_characters_of_type( @@ -37,5 +37,5 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \ string_character_types types_to_remove, string_scalar replacement, string_character_types types_to_keep, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd index ef831d3b167..2e2b6656797 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -27,7 +27,7 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar separator, string_scalar narep, separator_on_nulls separate_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] concatenate( @@ -36,14 +36,14 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar separator_narep, string_scalar col_narep, separator_on_nulls separate_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] join_strings( column_view input, string_scalar separator, string_scalar narep, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] join_list_elements( @@ -53,7 +53,7 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] join_list_elements( @@ -62,5 +62,5 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd index f60782e93b7..cc9a7c6835d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.regex_program cimport regex_program -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,31 +16,31 @@ cdef extern from "cudf/strings/contains.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] contains_re( column_view source_strings, regex_program, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] count_re( column_view source_strings, regex_program, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] matches_re( column_view source_strings, regex_program, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] like( column_view source_strings, string pattern, string escape_character, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] like( column_view source_strings, column_view patterns, string_scalar escape_character, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd index b5b837878f9..8875bc62ed5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,12 +15,12 @@ cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \ cdef unique_ptr[column] to_booleans( column_view input, string_scalar true_string, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_booleans( column_view booleans, string_scalar true_string, string_scalar false_string, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd index 5779839a685..92983f9dc49 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,18 +17,18 @@ cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \ column_view input, data_type timestamp_type, string format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_timestamps( column_view timestamps, string format, column_view names, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_timestamp( column_view input_col, string format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd index 2eae8b987b9..4f22b715ef9 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,11 +17,11 @@ cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \ const column_view & input, data_type duration_type, const string & format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_durations( const column_view & durations, const string & format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd index e5f512c331f..8aaa0ebf4c7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,17 +15,17 @@ cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \ cdef unique_ptr[column] to_fixed_point( column_view input, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_fixed_point( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_fixed_point( column_view input, data_type decimal_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd index 4ea1cd527f4..5a111c1979d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,16 +15,16 @@ cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \ cdef unique_ptr[column] to_floats( column_view strings, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_floats( column_view floats, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_float( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd index 306c4b66758..4d3f4ff758a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,40 +15,40 @@ cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \ cdef unique_ptr[column] to_integers( column_view input, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_integers( column_view integers, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_integer( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] is_integer( column_view input, data_type int_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] hex_to_integers( column_view input, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_hex( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] integers_to_hex( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd index d12f3992d85..00a64787957 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,16 +13,16 @@ cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] ipv4_to_integers( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] integers_to_ipv4( column_view integers, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_ipv4( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd index 8ed381e87da..bfae49bae4b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,5 +17,5 @@ cdef extern from "cudf/strings/convert/convert_lists.hpp" namespace \ column_view input, string_scalar na_rep, column_view separators, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd index b20c03f976b..db2d4f4efc0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,10 +13,10 @@ cdef extern from "cudf/strings/convert/convert_urls.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] url_encode( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] url_decode( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd index 845de206dbf..d3e0d0fd35a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,18 +16,18 @@ cdef extern from "cudf/strings/extract.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[table] extract( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] extract_all_record( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] extract_single( column_view input, regex_program prog, size_type group, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd index b8934aeb7fe..42752152de8 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,37 +16,37 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] contains( column_view source_strings, string_scalar target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] contains( column_view source_strings, column_view target_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] ends_with( column_view source_strings, string_scalar target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] ends_with( column_view source_strings, column_view target_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] starts_with( column_view source_strings, string_scalar target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] starts_with( column_view source_strings, column_view target_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find( @@ -54,14 +54,14 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil: string_scalar target, size_type start, size_type stop, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find( column_view source_strings, column_view target, size_type start, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rfind( @@ -69,5 +69,5 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil: string_scalar target, size_type start, size_type stop, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd index da751990053..1e42a476c13 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table cimport table -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,11 +15,11 @@ cdef extern from "cudf/strings/find_multiple.hpp" namespace "cudf::strings" \ cdef unique_ptr[table] contains_multiple( column_view input, column_view targets, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find_multiple( column_view input, column_view targets, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd index 02ecbef7095..d72ffd09d8e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.strings.regex_program cimport regex_program -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,11 +14,11 @@ cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] findall( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find_re( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd index 5e3e5c43f61..8b291a22a05 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.side_type cimport side_type from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,17 +20,17 @@ cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil: size_type width, side_type side, string fill_char, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] zfill( column_view input, size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] zfill_by_widths( column_view input, column_view widths, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd index 05a2954af35..86519de0b90 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,13 +16,13 @@ cdef extern from "cudf/strings/repeat_strings.hpp" namespace "cudf::strings" \ cdef unique_ptr[column] repeat_strings( column_view input, size_type repeat_times, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] repeat_strings( column_view input, column_view repeat_times, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd index 263b91475b8..cf2573af5ed 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil: string_scalar repl, size_type start, size_type stop, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace( @@ -26,12 +26,12 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil: string_scalar target, string_scalar repl, int32_t maxrepl, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_multiple( column_view source_strings, column_view target_strings, column_view repl_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd index 5f5cbaeaf55..d3e958841ab 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.strings.regex_flags cimport regex_flags from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,7 +22,7 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil: regex_program prog, string_scalar replacement, size_type max_replace_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_re( @@ -30,12 +30,12 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil: vector[string] patterns, column_view replacements, regex_flags flags, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_with_backrefs( column_view input, regex_program prog, string replacement, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd index 6e6fc2acac4..39a3ac4b769 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd @@ -4,12 +4,12 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/strings/reverse.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] reverse( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd index 0c99455ea33..6c9031482ca 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.table.table cimport table -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,11 +17,11 @@ cdef extern from "cudf/strings/split/partition.hpp" namespace \ cdef unique_ptr[table] partition( column_view input, string_scalar delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[table] rpartition( column_view input, string_scalar delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd index 9ed741b608a..5d14fefdb1b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,35 +20,35 @@ cdef extern from "cudf/strings/split/split.hpp" namespace \ column_view strings_column, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[table] rsplit( column_view strings_column, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] split_record( column_view strings, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rsplit_record( column_view strings, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] split_part( column_view strings, string_scalar delimiter, size_type index, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler @@ -59,26 +59,26 @@ cdef extern from "cudf/strings/split/split_re.hpp" namespace \ const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[table] rsplit_re( const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] split_record_re( const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rsplit_record_re( const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd index 8c72fed7219..5fa0dfb4289 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport int64_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/strings/strings_column_view.hpp" namespace "cudf" nogil: cdef cppclass strings_column_view: strings_column_view(column_view) except +libcudf_exception_handler - int64_t chars_size(cuda_stream_view) except +libcudf_exception_handler + int64_t chars_size(cudaStream_t) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd index 13e017c33f7..4d56b2de5d3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.side_type cimport side_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,5 +16,5 @@ cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil: column_view input, side_type side, string_scalar to_strip, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd index 21c2fe4a77b..d0b4f192307 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,7 +17,7 @@ cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil: numeric_scalar[size_type] start, numeric_scalar[size_type] end, numeric_scalar[size_type] step, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -25,6 +25,6 @@ cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil: column_view source_strings, column_view starts, column_view stops, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd index 9bdc0489a89..dcf5aa20948 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport char_utf8 -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] translate( column_view input, vector[pair[char_utf8, char_utf8]] chars_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,5 +31,5 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil: vector[pair[char_utf8, char_utf8]] characters_to_filter, filter_type keep_characters, string_scalar replacement, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd index 8aa5631a12e..2ddd924df48 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,5 +14,5 @@ cdef extern from "cudf/strings/wrap.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] wrap( column_view input, size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd index 7b339782295..d51a51dfb13 100644 --- a/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport column_view @@ -22,5 +22,5 @@ cdef extern from "cudf/structs/structs_column_view.hpp" namespace "cudf" nogil: column_view parent() except +libcudf_exception_handler column_view get_sliced_child( size_type index, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd index 230131d5520..dcfc046a904 100644 --- a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table_view cimport mutable_table_view, table_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,12 +14,12 @@ cdef extern from "cudf/table/table.hpp" namespace "cudf" nogil: cdef cppclass table: table( const table&, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler table( table_view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler size_type num_columns() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd index 9b2ace2d940..ebc9d8bfa1d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd @@ -16,14 +16,14 @@ from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type from pylibcudf.libcudf.types cimport null_aware, output_nullability from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: cdef pair[unique_ptr[device_buffer], size_type] bools_to_mask ( const column_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,19 +31,19 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: const bitmask_type* bitmask, size_type begin_bit, size_type end_bit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[device_buffer], size_type] nans_to_nulls( const column_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] column_nans_to_nulls( const column_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -55,33 +55,33 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: optional[void *] user_data, null_aware is_null_aware, output_nullability null_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[table], unique_ptr[column]] encode( table_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[column], table_view] one_hot_encode( column_view input_column, column_view categories, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] compute_column( const table_view table, const expression& expr, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] compute_column_jit( const table_view table, const expression& expr, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd index 2345ab5a2d9..0ce2048ba0f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,6 +16,6 @@ cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil: table_view ] transpose( table_view input_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/unary.pxd b/python/pylibcudf/pylibcudf/libcudf/unary.pxd index d3fd2f2f976..6f59ff8d5e0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/unary.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/unary.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -42,32 +42,32 @@ cdef extern from "cudf/unary.hpp" namespace "cudf" nogil: cdef extern unique_ptr[column] unary_operation( column_view input, unary_operator op, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef extern unique_ptr[column] is_null( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef extern unique_ptr[column] is_valid( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef extern unique_ptr[column] cast( column_view input, data_type out_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef extern bool is_supported_cast(data_type from_, data_type to) noexcept cdef extern unique_ptr[column] is_nan( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef extern unique_ptr[column] is_not_nan( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd b/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd index 5954dace85e..04001f5a064 100644 --- a/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport ( null_policy, size_type, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/reduction/unique_count.hpp" namespace "cudf" nogil: @@ -17,9 +17,9 @@ cdef extern from "cudf/reduction/unique_count.hpp" namespace "cudf" nogil: column_view column, null_policy null_handling, nan_policy nan_handling, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler cdef size_type unique_count( table_view source_table, null_equality nulls_equal, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd index a9569f11706..661db24f5aa 100644 --- a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from cuda.bindings.cyruntime cimport cudaStream_t from libcpp cimport bool -from rmm.librmm.cuda_stream_view cimport cuda_stream_view - cdef extern from "cudf/utilities/default_stream.hpp" namespace "cudf" nogil: cdef bool is_ptds_enabled() - cdef cuda_stream_view get_default_stream() + cdef cudaStream_t get_default_stream() diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd index be47db18a59..88b09c01531 100644 --- a/python/pylibcudf/pylibcudf/lists.pxd +++ b/python/pylibcudf/pylibcudf/lists.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -9,7 +9,6 @@ from pylibcudf.libcudf.copying cimport out_of_bounds_policy from pylibcudf.libcudf.lists.combine cimport concatenate_null_policy from pylibcudf.libcudf.lists.contains cimport duplicate_find_option from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar @@ -26,33 +25,33 @@ ctypedef fused ColumnOrSizeType: cpdef Table explode_outer( Table, size_type explode_column_idx, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column concatenate_rows( Table, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column concatenate_list_elements( Column, concatenate_null_policy null_policy, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column contains( Column, ColumnOrScalar, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column contains_nulls( Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -60,13 +59,13 @@ cpdef Column index_of( Column, ColumnOrScalar, duplicate_find_option, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column reverse( Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -74,20 +73,20 @@ cpdef Column segmented_gather( Column, Column, out_of_bounds_policy bounds_policy=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column extract_list_element( Column, ColumnOrSizeType, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column count_elements( Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -95,7 +94,7 @@ cpdef Column sequences( Column, Column, Column steps = *, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -104,7 +103,7 @@ cpdef Column sort_lists( order, null_order, bool stable = *, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -113,7 +112,7 @@ cpdef Column difference_distinct( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -122,7 +121,7 @@ cpdef Column have_overlap( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -131,7 +130,7 @@ cpdef Column intersect_distinct( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -140,14 +139,14 @@ cpdef Column union_distinct( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column apply_boolean_mask( Column, Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -155,6 +154,6 @@ cpdef Column distinct( Column, null_equality, nan_equality, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi index a3bcf9f76d6..1e418b59726 100644 --- a/python/pylibcudf/pylibcudf/lists.pyi +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -1,16 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.copying import OutOfBoundsPolicy from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import NanEquality, NullEquality, NullOrder, Order +from pylibcudf.utils import CudaStreamLike class ConcatenateNullPolicy(IntEnum): IGNORE = ... @@ -23,66 +23,66 @@ class DuplicateFindOption(IntEnum): def explode_outer( input: Table, explode_column_idx: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def concatenate_rows( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def concatenate_list_elements( input: Column, null_policy: ConcatenateNullPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains( input: Column, search_key: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains_nulls( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def index_of( input: Column, search_key: Column | Scalar, find_option: DuplicateFindOption, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def reverse( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def segmented_gather( input: Column, gather_map_list: Column, bounds_policy: OutOfBoundsPolicy = OutOfBoundsPolicy.DONT_CHECK, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def extract_list_element( input: Column, index: Column | int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_elements( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sequences( starts: Column, sizes: Column, steps: Column | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sort_lists( @@ -90,7 +90,7 @@ def sort_lists( sort_order: Order, na_position: NullOrder, stable: bool = False, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def difference_distinct( @@ -98,7 +98,7 @@ def difference_distinct( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def have_overlap( @@ -106,7 +106,7 @@ def have_overlap( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def intersect_distinct( @@ -114,7 +114,7 @@ def intersect_distinct( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def union_distinct( @@ -122,19 +122,19 @@ def union_distinct( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def apply_boolean_mask( input: Column, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def distinct( input: Column, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index 0076f7da677..fd05242e44f 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -55,6 +55,7 @@ from .column cimport Column, ListsColumnView from .scalar cimport Scalar from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ConcatenateNullPolicy", @@ -82,7 +83,7 @@ __all__ = [ cpdef Table explode_outer( Table input, size_type explode_column_idx, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Explode a column of lists into rows. @@ -105,20 +106,21 @@ cpdef Table explode_outer( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_explode.explode_outer( - input.view(), explode_column_idx, stream.view(), mr.get_mr() + input.view(), explode_column_idx, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column concatenate_rows( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Concatenate multiple lists columns into a single lists column row-wise. @@ -139,21 +141,22 @@ cpdef Column concatenate_rows( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_concatenate_rows( - input.view(), concatenate_null_policy.IGNORE, stream.view(), mr.get_mr() + input.view(), concatenate_null_policy.IGNORE, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column concatenate_list_elements( Column input, concatenate_null_policy null_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Concatenate multiple lists on the same row into a single list. @@ -174,21 +177,22 @@ cpdef Column concatenate_list_elements( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_concatenate_list_elements( - input.view(), null_policy, stream.view(), mr.get_mr() + input.view(), null_policy, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column contains( Column input, ColumnOrScalar search_key, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of bool values indicating whether @@ -218,7 +222,8 @@ cpdef Column contains( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if not isinstance(search_key, (Column, Scalar)): @@ -230,15 +235,15 @@ cpdef Column contains( search_key.view() if ColumnOrScalar is Column else dereference( search_key.get() ), - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column contains_nulls( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of bool values indicating whether @@ -262,21 +267,22 @@ cpdef Column contains_nulls( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_contains.contains_nulls( - list_view.view(), stream.view(), mr.get_mr() + list_view.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column index_of( Column input, ColumnOrScalar search_key, duplicate_find_option find_option, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of index values indicating the position of a search @@ -307,7 +313,8 @@ cpdef Column index_of( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -317,15 +324,15 @@ cpdef Column index_of( search_key.get() ), find_option, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column reverse( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Reverse the element order within each list of the input column. @@ -347,19 +354,20 @@ cpdef Column reverse( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_reverse.reverse(list_view.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_reverse.reverse(list_view.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column segmented_gather( Column input, Column gather_map_list, out_of_bounds_policy bounds_policy=out_of_bounds_policy.DONT_CHECK, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column with elements gathered based on the indices in gather_map_list @@ -394,7 +402,8 @@ cpdef Column segmented_gather( cdef ListsColumnView list_view1 = input.list_view() cdef ListsColumnView list_view2 = gather_map_list.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -402,16 +411,16 @@ cpdef Column segmented_gather( list_view1.view(), list_view2.view(), bounds_policy, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column extract_list_element( Column input, ColumnOrSizeType index, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of extracted list elements. @@ -433,22 +442,23 @@ cpdef Column extract_list_element( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_extract_list_element( list_view.view(), index.view() if ColumnOrSizeType is Column else index, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_elements( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Count the number of rows in each @@ -472,20 +482,21 @@ cpdef Column count_elements( cdef ListsColumnView list_view = input.list_view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_count_elements(list_view.view(), stream.view(), mr.get_mr()) + c_result = cpp_count_elements(list_view.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sequences( Column starts, Column sizes, Column steps = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a lists column in which each row contains a sequence of @@ -509,7 +520,8 @@ cpdef Column sequences( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if steps is not None: @@ -518,22 +530,22 @@ cpdef Column sequences( starts.view(), steps.view(), sizes.view(), - stream.view(), + _cs, mr.get_mr(), ) else: with nogil: c_result = cpp_filling.sequences( - starts.view(), sizes.view(), stream.view(), mr.get_mr() + starts.view(), sizes.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sort_lists( Column input, order sort_order, null_order na_position, bool stable = False, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sort the elements within a list in each row of a list column. @@ -561,7 +573,8 @@ cpdef Column sort_lists( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -570,7 +583,7 @@ cpdef Column sort_lists( list_view.view(), sort_order, na_position, - stream.view(), + _cs, mr.get_mr(), ) else: @@ -578,10 +591,10 @@ cpdef Column sort_lists( list_view.view(), sort_order, na_position, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column difference_distinct( @@ -589,7 +602,7 @@ cpdef Column difference_distinct( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of index values indicating the position of a search @@ -617,7 +630,8 @@ cpdef Column difference_distinct( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -626,10 +640,10 @@ cpdef Column difference_distinct( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column have_overlap( @@ -637,7 +651,7 @@ cpdef Column have_overlap( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Check if lists at each row of the given lists columns overlap. @@ -664,7 +678,8 @@ cpdef Column have_overlap( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -673,10 +688,10 @@ cpdef Column have_overlap( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column intersect_distinct( @@ -684,7 +699,7 @@ cpdef Column intersect_distinct( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a lists column of distinct elements common to two input lists columns. @@ -711,7 +726,8 @@ cpdef Column intersect_distinct( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -720,10 +736,10 @@ cpdef Column intersect_distinct( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column union_distinct( @@ -731,7 +747,7 @@ cpdef Column union_distinct( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a lists column of distinct elements found in @@ -759,7 +775,8 @@ cpdef Column union_distinct( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -768,16 +785,16 @@ cpdef Column union_distinct( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column apply_boolean_mask( Column input, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters elements in each row of the input lists column using a boolean mask @@ -802,24 +819,25 @@ cpdef Column apply_boolean_mask( cdef ListsColumnView list_view = input.list_view() cdef ListsColumnView mask_view = boolean_mask.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_apply_boolean_mask( list_view.view(), mask_view.view(), - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column distinct( Column input, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a new list column without duplicate elements in each list. @@ -843,7 +861,8 @@ cpdef Column distinct( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -852,10 +871,10 @@ cpdef Column distinct( nulls_equal, nans_equal, duplicate_keep_option.KEEP_ANY, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) ConcatenateNullPolicy.__str__ = ConcatenateNullPolicy.__repr__ DuplicateFindOption.__str__ = DuplicateFindOption.__repr__ diff --git a/python/pylibcudf/pylibcudf/merge.pxd b/python/pylibcudf/pylibcudf/merge.pxd index aed9dda7479..07624852289 100644 --- a/python/pylibcudf/pylibcudf/merge.pxd +++ b/python/pylibcudf/pylibcudf/merge.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from .table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -12,6 +11,6 @@ cpdef Table merge ( list key_cols, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/merge.pyi b/python/pylibcudf/pylibcudf/merge.pyi index f96e1d8534e..50e87d5bffa 100644 --- a/python/pylibcudf/pylibcudf/merge.pyi +++ b/python/pylibcudf/pylibcudf/merge.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.table import Table from pylibcudf.types import NullOrder, Order +from pylibcudf.utils import CudaStreamLike def merge( tables_to_merge: list[Table], key_cols: list[int], column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx index a6cbaf81051..3c0cd93a342 100644 --- a/python/pylibcudf/pylibcudf/merge.pyx +++ b/python/pylibcudf/pylibcudf/merge.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["merge"] @@ -22,7 +23,7 @@ cpdef Table merge ( list key_cols, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Merge a set of sorted tables. @@ -58,7 +59,8 @@ cpdef Table merge ( c_tables_to_merge.push_back(( tbl).view()) cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -67,7 +69,7 @@ cpdef Table merge ( c_key_cols, c_column_order, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/null_mask.pxd index 6eb10eddb2e..e7fa70e23ae 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pxd +++ b/python/pylibcudf/pylibcudf/null_mask.pxd @@ -5,18 +5,19 @@ from pylibcudf.libcudf.types cimport mask_state, size_type from rmm.pylibrmm.device_buffer cimport DeviceBuffer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column -cpdef DeviceBuffer copy_bitmask(Column col, Stream stream=*, DeviceMemoryResource mr=*) +cpdef DeviceBuffer copy_bitmask( + Column col, object stream = *, DeviceMemoryResource mr=* +) cpdef DeviceBuffer copy_bitmask_from_bitmask( object bitmask, size_type begin_bit, size_type end_bit, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -25,24 +26,24 @@ cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits) cpdef DeviceBuffer create_null_mask( size_type size, mask_state state=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) -cpdef tuple bitmask_and(list columns, Stream stream=*, DeviceMemoryResource mr=*) +cpdef tuple bitmask_and(list columns, object stream = *, DeviceMemoryResource mr=*) -cpdef tuple bitmask_or(list columns, Stream stream=*, DeviceMemoryResource mr=*) +cpdef tuple bitmask_or(list columns, object stream = *, DeviceMemoryResource mr=*) cpdef size_type null_count( object bitmask, size_type start, size_type stop, - Stream stream=* + object stream = * ) cpdef size_type index_of_first_set_bit( object bitmask, size_type start, size_type stop, - Stream stream=* + object stream = * ) diff --git a/python/pylibcudf/pylibcudf/null_mask.pyi b/python/pylibcudf/pylibcudf/null_mask.pyi index 98f6e60fb0d..45e130b704e 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pyi +++ b/python/pylibcudf/pylibcudf/null_mask.pyi @@ -3,44 +3,44 @@ from rmm.pylibrmm.device_buffer import DeviceBuffer from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.span import Span from pylibcudf.types import MaskState +from pylibcudf.utils import CudaStreamLike def copy_bitmask( col: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> DeviceBuffer: ... def copy_bitmask_from_bitmask( bitmask: Span, begin_bit: int, end_bit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> DeviceBuffer: ... def bitmask_allocation_size_bytes(number_of_bits: int) -> int: ... def create_null_mask( size: int, state: MaskState = MaskState.UNINITIALIZED, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> DeviceBuffer: ... def bitmask_and( columns: list[Column], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[DeviceBuffer, int]: ... def bitmask_or( columns: list[Column], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[DeviceBuffer, int]: ... def null_count( - bitmask: Span, start: int, stop: int, stream: Stream | None = None + bitmask: Span, start: int, stop: int, stream: CudaStreamLike | None = None ) -> int: ... def index_of_first_set_bit( - bitmask: Span, start: int, stop: int, stream: Stream | None = None + bitmask: Span, start: int, stop: int, stream: CudaStreamLike | None = None ) -> int: ... diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx index 176e73047e2..164c51aca9f 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pyx +++ b/python/pylibcudf/pylibcudf/null_mask.pyx @@ -19,6 +19,7 @@ from .span import is_span as py_is_span from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "bitmask_allocation_size_bytes", @@ -31,7 +32,7 @@ __all__ = [ ] cdef DeviceBuffer buffer_to_python( - device_buffer buf, Stream stream, DeviceMemoryResource mr + device_buffer buf, object stream, DeviceMemoryResource mr ): return DeviceBuffer.c_from_unique_ptr( make_unique[device_buffer](move(buf)), stream, mr @@ -40,7 +41,7 @@ cdef DeviceBuffer buffer_to_python( cpdef DeviceBuffer copy_bitmask( Column col, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copies ``col``'s bitmask into a ``DeviceBuffer``. @@ -63,20 +64,21 @@ cpdef DeviceBuffer copy_bitmask( ``DeviceBuffer`` if ``col`` is not nullable """ cdef device_buffer db - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - db = cpp_null_mask.copy_bitmask(col.view(), stream.view(), mr.get_mr()) + db = cpp_null_mask.copy_bitmask(col.view(), _cs, mr.get_mr()) - return buffer_to_python(move(db), stream, mr) + return buffer_to_python(move(db), _stream, mr) cpdef DeviceBuffer copy_bitmask_from_bitmask( object bitmask, size_type begin_bit, size_type end_bit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copies a portion of a bitmask into a ``DeviceBuffer``. @@ -108,7 +110,8 @@ cpdef DeviceBuffer copy_bitmask_from_bitmask( f"got {type(bitmask).__name__}" ) cdef device_buffer db - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef uintptr_t ptr = bitmask.ptr @@ -117,11 +120,11 @@ cpdef DeviceBuffer copy_bitmask_from_bitmask( ptr, begin_bit, end_bit, - stream.view(), + _cs, mr.get_mr() ) - return buffer_to_python(move(db), stream, mr) + return buffer_to_python(move(db), _stream, mr) cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits): @@ -148,7 +151,7 @@ cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits): cpdef DeviceBuffer create_null_mask( size_type size, mask_state state = mask_state.UNINITIALIZED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Creates a ``DeviceBuffer`` for use as a null value indicator bitmask of a @@ -176,16 +179,17 @@ cpdef DeviceBuffer create_null_mask( state """ cdef device_buffer db - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - db = cpp_null_mask.create_null_mask(size, state, stream.view(), mr.get_mr()) + db = cpp_null_mask.create_null_mask(size, state, _cs, mr.get_mr()) - return buffer_to_python(move(db), stream, mr) + return buffer_to_python(move(db), _stream, mr) -cpdef tuple bitmask_and(list columns, Stream stream=None, DeviceMemoryResource mr=None): +cpdef tuple bitmask_and(list columns, object stream=None, DeviceMemoryResource mr=None): """Performs bitwise AND of the bitmasks of a list of columns. For details, see :cpp:func:`bitmask_and`. @@ -206,16 +210,19 @@ cpdef tuple bitmask_and(list columns, Stream stream=None, DeviceMemoryResource m """ cdef Table c_table = Table(columns) cdef pair[device_buffer, size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_null_mask.bitmask_and(c_table.view(), stream.view(), mr.get_mr()) + c_result = cpp_null_mask.bitmask_and( + c_table.view(), _cs, mr.get_mr() + ) - return buffer_to_python(move(c_result.first), stream, mr), c_result.second + return buffer_to_python(move(c_result.first), _stream, mr), c_result.second -cpdef tuple bitmask_or(list columns, Stream stream=None, DeviceMemoryResource mr=None): +cpdef tuple bitmask_or(list columns, object stream=None, DeviceMemoryResource mr=None): """Performs bitwise OR of the bitmasks of a list of columns. For details, see :cpp:func:`bitmask_or`. @@ -236,20 +243,21 @@ cpdef tuple bitmask_or(list columns, Stream stream=None, DeviceMemoryResource mr """ cdef Table c_table = Table(columns) cdef pair[device_buffer, size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_null_mask.bitmask_or(c_table.view(), stream.view(), mr.get_mr()) + c_result = cpp_null_mask.bitmask_or(c_table.view(), _cs, mr.get_mr()) - return buffer_to_python(move(c_result.first), stream, mr), c_result.second + return buffer_to_python(move(c_result.first), _stream, mr), c_result.second cpdef size_type null_count( object bitmask, size_type start, size_type stop, - Stream stream=None + object stream=None ): """Given a validity bitmask, counts the number of null elements. @@ -277,20 +285,21 @@ cpdef size_type null_count( f"got {type(bitmask).__name__}" ) cdef uintptr_t ptr = bitmask.ptr - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: return cpp_null_mask.null_count( ptr, start, stop, - stream.view() + _cs ) cpdef size_type index_of_first_set_bit( object bitmask, size_type start, size_type stop, - Stream stream=None + object stream=None ): """Given a validity bitmask, returns the index of the first valid element relative to ``start``. @@ -319,11 +328,12 @@ cpdef size_type index_of_first_set_bit( f"got {type(bitmask).__name__}" ) cdef uintptr_t ptr = bitmask.ptr - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: return cpp_null_mask.index_of_first_set_bit( ptr, start, stop, - stream.view() + _cs ) diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd index 8cd73fe41ad..2bc3f75b174 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.byte_pair_encode cimport bpe_merge_pairs from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class BPEMergePairs: @@ -16,6 +15,6 @@ cpdef Column byte_pair_encoding( Column input, BPEMergePairs merge_pairs, Scalar separator=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi index 4abf1f52b4d..7ee48f72209 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class BPEMergePairs: def __init__( self, merge_pairs: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... @@ -19,6 +19,6 @@ def byte_pair_encoding( input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx index 001b9dfca1e..023e00a1169 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["BPEMergePairs", "byte_pair_encoding"] @@ -30,14 +31,17 @@ cdef class BPEMergePairs: def __cinit__( self, Column merge_pairs, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): cdef column_view c_pairs = merge_pairs.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - self.c_obj = move(cpp_load_merge_pairs(c_pairs, stream.view(), mr.get_mr())) + self.c_obj = move( + cpp_load_merge_pairs(c_pairs, _cs, mr.get_mr()) + ) __hash__ = None @@ -45,7 +49,7 @@ cpdef Column byte_pair_encoding( Column input, BPEMergePairs merge_pairs, Scalar separator=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -70,12 +74,13 @@ cpdef Column byte_pair_encoding( An encoded column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if separator is None: separator = Scalar.from_libcudf( - cpp_make_string_scalar(" ".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar(" ".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: @@ -84,9 +89,9 @@ cpdef Column byte_pair_encoding( input.view(), dereference(merge_pairs.c_obj.get()), dereference(separator.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd index ecca0a495a1..d038d4a3e27 100644 --- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd @@ -1,22 +1,21 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column build_suffix_array( Column input, size_type min_width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column resolve_duplicates( Column input, Column indices, size_type min_width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column resolve_duplicates_pair( @@ -25,6 +24,6 @@ cpdef Column resolve_duplicates_pair( Column input2, Column indices2, size_type min_width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi index 6e3d6883df4..653ee588f61 100644 --- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def build_suffix_array( input: Column, min_width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def resolve_duplicates( input: Column, indices: Column, min_width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def resolve_duplicates_pair( @@ -25,6 +25,6 @@ def resolve_duplicates_pair( input2: Column, indices2: Column, min_width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx index c71ae479674..e679841a792 100644 --- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator import dereference @@ -18,6 +18,7 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.librmm.device_buffer cimport device_buffer from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "build_suffix_array", @@ -36,14 +37,12 @@ cdef Column _column_from_suffix_array( device_buffer(), 0 ) - ), - stream, - mr + ), stream, mr ) cpdef Column build_suffix_array( - Column input, size_type min_width, Stream stream=None, DeviceMemoryResource mr=None + Column input, size_type min_width, object stream=None, DeviceMemoryResource mr=None ): """ Builds a suffix array for the input strings column. @@ -68,22 +67,23 @@ cpdef Column build_suffix_array( New column of suffix array """ cdef cpp_suffix_array_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_build_suffix_array( - input.view(), min_width, stream.view(), mr.get_mr() + input.view(), min_width, _cs, mr.get_mr() ) - return _column_from_suffix_array(move(c_result), stream, mr) + return _column_from_suffix_array(move(c_result), _stream, mr) cpdef Column resolve_duplicates( Column input, Column indices, size_type min_width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -111,15 +111,16 @@ cpdef Column resolve_duplicates( New column of duplicate strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_resolve_duplicates( - input.view(), indices.view(), min_width, stream.view(), mr.get_mr() + input.view(), indices.view(), min_width, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column resolve_duplicates_pair( @@ -128,7 +129,7 @@ cpdef Column resolve_duplicates_pair( Column input2, Column indices2, size_type min_width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -161,7 +162,8 @@ cpdef Column resolve_duplicates_pair( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -171,8 +173,8 @@ cpdef Column resolve_duplicates_pair( input2.view(), indices2.view(), min_width, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd index aca87ac4882..c0297ebd887 100644 --- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd @@ -1,20 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column edit_distance( Column input, Column targets, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column edit_distance_matrix( Column input, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi index 8c0e97b9951..5a6bde4cb66 100644 --- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi @@ -1,19 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def edit_distance( input: Column, targets: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def edit_distance_matrix( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx index 14d3b4539dc..4b9d3f6bcc3 100644 --- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx @@ -17,13 +17,14 @@ from rmm.pylibrmm.stream cimport Stream from ..column cimport Column from ..utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["edit_distance", "edit_distance_matrix"] cpdef Column edit_distance( Column input, Column targets, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -48,18 +49,19 @@ cpdef Column edit_distance( cdef column_view c_strings = input.view() cdef column_view c_targets = targets.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_edit_distance(c_strings, c_targets, stream.view(), mr.get_mr()) + c_result = cpp_edit_distance(c_strings, c_targets, _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column edit_distance_matrix( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -88,10 +90,11 @@ cpdef Column edit_distance_matrix( ) cdef column_view c_strings = input.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_edit_distance_matrix(c_strings, stream.view(), mr.get_mr()) + c_result = cpp_edit_distance_matrix(c_strings, _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd index 1eb55f1fcf6..85477223954 100644 --- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t @@ -6,21 +6,20 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column generate_ngrams( Column input, size_type ngrams, Scalar separator, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column generate_character_ngrams( Column input, size_type ngrams=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -28,6 +27,6 @@ cpdef Column hash_character_ngrams( Column input, size_type ngrams, uint32_t seed, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi index 7a522acc5a9..317fdb9ee73 100644 --- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import Any @@ -6,28 +6,28 @@ from typing import Any import numpy as np from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def generate_ngrams( input: Column, ngrams: int, separator: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def generate_character_ngrams( input: Column, ngrams: int = 2, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def hash_character_ngrams( input: Column, ngrams: int, seed: int | np.unsignedinteger[Any], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx index ca8a21c279c..6d70751a5a0 100644 --- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t @@ -18,6 +18,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "generate_ngrams", @@ -29,7 +30,7 @@ cpdef Column generate_ngrams( Column input, size_type ngrams, Scalar separator, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -56,7 +57,8 @@ cpdef Column generate_ngrams( cdef column_view c_strings = input.view() cdef const string_scalar* c_separator = separator.c_obj.get() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -64,16 +66,16 @@ cpdef Column generate_ngrams( c_strings, ngrams, c_separator[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column generate_character_ngrams( Column input, size_type ngrams = 2, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -97,24 +99,25 @@ cpdef Column generate_character_ngrams( """ cdef column_view c_strings = input.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_generate_character_ngrams( c_strings, ngrams, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column hash_character_ngrams( Column input, size_type ngrams, uint32_t seed, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -140,7 +143,8 @@ cpdef Column hash_character_ngrams( """ cdef column_view c_strings = input.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -148,7 +152,7 @@ cpdef Column hash_character_ngrams( c_strings, ngrams, seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd index fbf8e99ac55..1e3a26454a1 100644 --- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd @@ -1,16 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column jaccard_index( Column input1, Column input2, size_type width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi index abc86597c0e..355d2d7a92f 100644 --- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi @@ -1,15 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def jaccard_index( input1: Column, input2: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx index 4089853ca77..24a343e4508 100644 --- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -13,6 +13,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["jaccard_index"] @@ -20,7 +21,7 @@ cpdef Column jaccard_index( Column input1, Column input2, size_type width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -49,7 +50,8 @@ cpdef Column jaccard_index( cdef column_view c_input1 = input1.view() cdef column_view c_input2 = input2.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -57,8 +59,8 @@ cpdef Column jaccard_index( c_input1, c_input2, width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd index 0647337324d..f26b1e30245 100644 --- a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -18,7 +17,7 @@ cpdef Column minhash( Column a, Column b, size_type width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -28,7 +27,7 @@ cpdef Column minhash64( Column a, Column b, size_type width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -38,7 +37,7 @@ cpdef Column minhash_ngrams( uint32_t seed, Column a, Column b, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -48,6 +47,6 @@ cpdef Column minhash64_ngrams( uint64_t seed, Column a, Column b, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi index ee924f8d7aa..5bce73dc991 100644 --- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import Any @@ -6,9 +6,9 @@ from typing import Any import numpy as np from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def minhash( input: Column, @@ -16,7 +16,7 @@ def minhash( a: Column, b: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minhash64( @@ -25,7 +25,7 @@ def minhash64( a: Column, b: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minhash_ngrams( @@ -34,7 +34,7 @@ def minhash_ngrams( seed: int | np.unsignedinteger[Any], a: Column, b: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minhash64_ngrams( @@ -43,6 +43,6 @@ def minhash64_ngrams( seed: int | np.unsignedinteger[Any], a: Column, b: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx index 1329d88060c..3029ed54c50 100644 --- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t @@ -16,6 +16,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "minhash", @@ -30,7 +31,7 @@ cpdef Column minhash( Column a, Column b, size_type width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -58,7 +59,8 @@ cpdef Column minhash( List column of minhash values for each string per seed """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -68,11 +70,11 @@ cpdef Column minhash( a.view(), b.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column minhash64( Column input, @@ -80,7 +82,7 @@ cpdef Column minhash64( Column a, Column b, size_type width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -110,7 +112,8 @@ cpdef Column minhash64( List column of minhash values for each string per seed """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -120,11 +123,11 @@ cpdef Column minhash64( a.view(), b.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column minhash_ngrams( Column input, @@ -132,7 +135,7 @@ cpdef Column minhash_ngrams( uint32_t seed, Column a, Column b, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -163,7 +166,8 @@ cpdef Column minhash_ngrams( value in columns a and b. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -173,11 +177,11 @@ cpdef Column minhash_ngrams( seed, a.view(), b.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column minhash64_ngrams( Column input, @@ -185,7 +189,7 @@ cpdef Column minhash64_ngrams( uint64_t seed, Column a, Column b, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -216,7 +220,8 @@ cpdef Column minhash64_ngrams( value in columns a and b. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -226,8 +231,8 @@ cpdef Column minhash64_ngrams( seed, a.view(), b.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd index f410d778cb1..5deaa45c73f 100644 --- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column ngrams_tokenize( @@ -13,6 +12,6 @@ cpdef Column ngrams_tokenize( size_type ngrams, Scalar delimiter, Scalar separator, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi index 1347b7e7087..99c309a21ff 100644 --- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def ngrams_tokenize( input: Column, ngrams: int, delimiter: Scalar, separator: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx index f9f36244a1d..959c47d595d 100644 --- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -15,6 +15,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["ngrams_tokenize"] @@ -23,7 +24,7 @@ cpdef Column ngrams_tokenize( size_type ngrams, Scalar delimiter, Scalar separator, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -52,7 +53,8 @@ cpdef Column ngrams_tokenize( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -61,7 +63,7 @@ cpdef Column ngrams_tokenize( ngrams, dereference(delimiter.get()), dereference(separator.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd index 8c8623e07a3..30e459f75a5 100644 --- a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -6,16 +6,17 @@ from libcpp.memory cimport unique_ptr from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.normalize cimport character_normalizer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class CharacterNormalizer: cdef unique_ptr[character_normalizer] c_obj -cpdef Column normalize_spaces(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column normalize_spaces( + Column input, object stream = *, DeviceMemoryResource mr=* +) cpdef Column normalize_characters( Column input, CharacterNormalizer normalizer, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi index 958adb10ada..0fbd2e7e725 100644 --- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi @@ -1,28 +1,28 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class CharacterNormalizer: def __init__( self, do_lower_case: bool, special_tokens: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... def normalize_spaces( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def normalize_characters( input: Column, normalizer: CharacterNormalizer, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx index 5f62189f2f5..8e29aad9121 100644 --- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -12,6 +12,7 @@ from pylibcudf.libcudf.nvtext cimport normalize as cpp_normalize from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "CharacterNormalizer" @@ -28,18 +29,19 @@ cdef class CharacterNormalizer: self, bool do_lower_case, Column tokens, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): cdef column_view c_tokens = tokens.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: self.c_obj = move( cpp_normalize.create_character_normalizer( do_lower_case, c_tokens, - stream.view(), + _cs, mr.get_mr() ) ) @@ -47,7 +49,7 @@ cdef class CharacterNormalizer: __hash__ = None cpdef Column normalize_spaces( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column by normalizing the whitespace in @@ -68,21 +70,22 @@ cpdef Column normalize_spaces( New strings columns of normalized strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_normalize.normalize_spaces( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column normalize_characters( Column input, CharacterNormalizer normalizer, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -105,15 +108,16 @@ cpdef Column normalize_characters( Normalized strings column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_normalize.normalize_characters( input.view(), dereference(normalizer.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/nvtext/replace.pxd index c6a9ed5ba67..1265f75a514 100644 --- a/python/pylibcudf/pylibcudf/nvtext/replace.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column replace_tokens( @@ -13,7 +12,7 @@ cpdef Column replace_tokens( Column targets, Column replacements, Scalar delimiter=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -22,6 +21,6 @@ cpdef Column filter_tokens( size_type min_token_length, Scalar replacement=*, Scalar delimiter=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyi b/python/pylibcudf/pylibcudf/nvtext/replace.pyi index 09187c1edf1..a5e451cdb16 100644 --- a/python/pylibcudf/pylibcudf/nvtext/replace.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def replace_tokens( input: Column, targets: Column, replacements: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_tokens( @@ -20,6 +20,6 @@ def filter_tokens( min_token_length: int, replacement: Scalar | None = None, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx index db375e6993f..4b00d76bd64 100644 --- a/python/pylibcudf/pylibcudf/nvtext/replace.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["filter_tokens", "replace_tokens"] @@ -27,7 +28,7 @@ cpdef Column replace_tokens( Column targets, Column replacements, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -55,11 +56,12 @@ cpdef Column replace_tokens( New strings column with replaced strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = cpp_replace_tokens( @@ -67,10 +69,10 @@ cpdef Column replace_tokens( targets.view(), replacements.view(), dereference(delimiter.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column filter_tokens( @@ -78,7 +80,7 @@ cpdef Column filter_tokens( size_type min_token_length, Scalar replacement=None, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -107,15 +109,16 @@ cpdef Column filter_tokens( New strings column of filtered strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) if replacement is None: replacement = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: @@ -124,8 +127,8 @@ cpdef Column filter_tokens( min_token_length, dereference(replacement.get()), dereference(delimiter.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd index 0b19c699ea8..d9f9ef1549c 100644 --- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.stemmer cimport letter_type from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrSize: Column @@ -16,10 +15,10 @@ cpdef Column is_letter( Column input, bool check_vowels, ColumnOrSize indices, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column porter_stemmer_measure( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi index ae53ce887a4..5fef689a895 100644 --- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi @@ -1,20 +1,20 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def is_letter( input: Column, check_vowels: bool, indices: Column | int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def porter_stemmer_measure( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx index 44dc6be5c60..e038cd03fb2 100644 --- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -18,6 +18,7 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from pylibcudf.libcudf.nvtext.stemmer import letter_type as LetterType # no-cython-lint from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["is_letter", "porter_stemmer_measure", "LetterType"] @@ -25,7 +26,7 @@ cpdef Column is_letter( Column input, bool check_vowels, ColumnOrSize indices, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -55,7 +56,8 @@ cpdef Column is_letter( New boolean column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -63,14 +65,14 @@ cpdef Column is_letter( input.view(), letter_type.VOWEL if check_vowels else letter_type.CONSONANT, indices if ColumnOrSize is size_type else indices.view(), - stream.view() + _cs ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column porter_stemmer_measure( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns the Porter Stemmer measurements of a strings column. @@ -92,12 +94,13 @@ cpdef Column porter_stemmer_measure( New column of measure values """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_porter_stemmer_measure(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_porter_stemmer_measure(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) LetterType.__str__ = LetterType.__repr__ diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd index 2ad694d1eca..8346d420440 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -7,36 +7,35 @@ from pylibcudf.libcudf.nvtext.tokenize cimport tokenize_vocabulary from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class TokenizeVocabulary: cdef unique_ptr[tokenize_vocabulary] c_obj cpdef Column tokenize_scalar( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column tokenize_column( - Column input, Column delimiters, Stream stream=*, DeviceMemoryResource mr=* + Column input, Column delimiters, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_tokens_scalar( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_tokens_column( - Column input, Column delimiters, Stream stream=*, DeviceMemoryResource mr=* + Column input, Column delimiters, object stream = *, DeviceMemoryResource mr=* ) cpdef Column character_tokenize( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column detokenize( Column input, Column row_indices, Scalar separator=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -45,6 +44,6 @@ cpdef Column tokenize_with_vocabulary( TokenizeVocabulary vocabulary, Scalar delimiter, size_type default_id=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi index c6e2d4cfcb4..72a5209902e 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi @@ -1,54 +1,54 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class TokenizeVocabulary: def __init__( self, vocab: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... def tokenize_scalar( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def tokenize_column( input: Column, delimiters: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_tokens_scalar( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_tokens_column( input: Column, delimiters: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def character_tokenize( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def detokenize( input: Column, row_indices: Column, separator: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def tokenize_with_vocabulary( @@ -56,6 +56,6 @@ def tokenize_with_vocabulary( vocabulary: TokenizeVocabulary, delimiter: Scalar, default_id: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx index e296ea38a58..4e44d781cc4 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -24,6 +24,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "TokenizeVocabulary", @@ -41,19 +42,20 @@ cdef class TokenizeVocabulary: For details, see :cpp:class:`cudf::nvtext::tokenize_vocabulary`. """ - def __cinit__(self, Column vocab, Stream stream=None, DeviceMemoryResource mr=None): + def __cinit__(self, Column vocab, object stream=None, DeviceMemoryResource mr=None): cdef column_view c_vocab = vocab.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - self.c_obj = move(cpp_load_vocabulary(c_vocab, stream.view(), mr.get_mr())) + self.c_obj = move(cpp_load_vocabulary(c_vocab, _cs, mr.get_mr())) __hash__ = None cpdef Column tokenize_scalar( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -77,26 +79,27 @@ cpdef Column tokenize_scalar( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = cpp_tokenize( input.view(), dereference(delimiter.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column tokenize_column( - Column input, Column delimiters, Stream stream=None, DeviceMemoryResource mr=None + Column input, Column delimiters, object stream=None, DeviceMemoryResource mr=None ): """ Returns a single column of strings by tokenizing the input @@ -119,23 +122,24 @@ cpdef Column tokenize_column( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_tokenize( input.view(), delimiters.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_tokens_scalar( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -159,26 +163,27 @@ cpdef Column count_tokens_scalar( New column of token counts """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = cpp_count_tokens( input.view(), dereference(delimiter.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_tokens_column( - Column input, Column delimiters, Stream stream=None, DeviceMemoryResource mr=None + Column input, Column delimiters, object stream=None, DeviceMemoryResource mr=None ): """ Returns the number of tokens in each string of a strings column @@ -201,21 +206,22 @@ cpdef Column count_tokens_column( New column of token counts """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_count_tokens( input.view(), delimiters.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column character_tokenize( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a single column of strings by converting @@ -236,18 +242,19 @@ cpdef Column character_tokenize( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_character_tokenize(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_character_tokenize(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column detokenize( Column input, Column row_indices, Scalar separator=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -273,12 +280,13 @@ cpdef Column detokenize( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if separator is None: separator = Scalar.from_libcudf( - cpp_make_string_scalar(" ".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar(" ".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: @@ -286,18 +294,18 @@ cpdef Column detokenize( input.view(), row_indices.view(), dereference(separator.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column tokenize_with_vocabulary( Column input, TokenizeVocabulary vocabulary, Scalar delimiter, size_type default_id=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -325,7 +333,8 @@ cpdef Column tokenize_with_vocabulary( Lists column of token ids """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -334,8 +343,8 @@ cpdef Column tokenize_with_vocabulary( dereference(vocabulary.c_obj.get()), dereference(delimiter.c_obj.get()), default_id, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd index 3f7685903e0..604a566c701 100644 --- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.wordpiece_tokenize cimport wordpiece_vocabulary from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class WordPieceVocabulary: cdef unique_ptr[wordpiece_vocabulary] c_obj @@ -15,6 +14,6 @@ cpdef Column wordpiece_tokenize( Column input, WordPieceVocabulary vocabulary, size_type max_words_per_row, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi index e91cfc8f21e..e77a8c86a69 100644 --- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi @@ -1,16 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class WordPieceVocabulary: def __init__( self, vocab: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... @@ -18,6 +18,6 @@ def wordpiece_tokenize( input: Column, vocabulary: WordPieceVocabulary, max_words_per_row: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx index b6c516cf739..dfdb563087d 100644 --- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -15,6 +15,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "WordPieceVocabulary", @@ -29,15 +30,16 @@ cdef class WordPieceVocabulary: def __cinit__( self, Column vocab, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): cdef column_view c_vocab = vocab.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: self.c_obj = move(cpp_load_wordpiece_vocabulary( - c_vocab, stream.view(), mr.get_mr() + c_vocab, _cs, mr.get_mr() )) __hash__ = None @@ -46,7 +48,7 @@ cpdef Column wordpiece_tokenize( Column input, WordPieceVocabulary vocabulary, size_type max_words_per_row, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -73,7 +75,8 @@ cpdef Column wordpiece_tokenize( Lists column of token ids """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -81,8 +84,8 @@ cpdef Column wordpiece_tokenize( input.view(), dereference(vocabulary.c_obj.get()), max_words_per_row, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/partitioning.pxd index 096b4eb99e8..84c9b647691 100644 --- a/python/pylibcudf/pylibcudf/partitioning.pxd +++ b/python/pylibcudf/pylibcudf/partitioning.pxd @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from libc.stdint cimport uint32_t @@ -20,7 +19,7 @@ cpdef tuple[Table, list] hash_partition( int num_partitions, hash_id hash_function = *, uint32_t seed = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -28,7 +27,7 @@ cpdef tuple[Table, list] partition( Table t, Column partition_map, int num_partitions, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -36,6 +35,6 @@ cpdef tuple[Table, list] round_robin_partition( Table input, int num_partitions, int start_partition=*, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/partitioning.pyi b/python/pylibcudf/pylibcudf/partitioning.pyi index 081ee53731f..971346421ea 100644 --- a/python/pylibcudf/pylibcudf/partitioning.pyi +++ b/python/pylibcudf/pylibcudf/partitioning.pyi @@ -4,10 +4,10 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class HashId(IntEnum): HASH_IDENTITY = ... @@ -19,20 +19,20 @@ def hash_partition( num_partitions: int, hash_function: HashId = ..., seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[int]]: ... def partition( t: Table, partition_map: Column, num_partitions: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[int]]: ... def round_robin_partition( input: Table, num_partitions: int, start_partition: int = 0, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[int]]: ... diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx index b8da9249656..62e35ab9cca 100644 --- a/python/pylibcudf/pylibcudf/partitioning.pyx +++ b/python/pylibcudf/pylibcudf/partitioning.pyx @@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -29,7 +30,7 @@ cpdef tuple[Table, list] hash_partition( int num_partitions, cpp_partitioning.hash_id hash_function = cpp_partitioning.hash_id.HASH_MURMUR3, uint32_t seed = cpp_partitioning.DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -63,7 +64,8 @@ cpdef tuple[Table, list] hash_partition( cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result cdef int c_num_partitions = num_partitions cdef vector[libcudf_types.size_type] columns_to_hash - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if TableOrList is Table: with nogil: @@ -73,7 +75,7 @@ cpdef tuple[Table, list] hash_partition( c_num_partitions, hash_function, seed, - stream.view(), + _cs, mr.get_mr() ) else: @@ -85,17 +87,17 @@ cpdef tuple[Table, list] hash_partition( c_num_partitions, hash_function, seed, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second) + return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second) cpdef tuple[Table, list] partition( Table t, Column partition_map, int num_partitions, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -126,7 +128,8 @@ cpdef tuple[Table, list] partition( cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result cdef int c_num_partitions = num_partitions - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -134,18 +137,18 @@ cpdef tuple[Table, list] partition( t.view(), partition_map.view(), c_num_partitions, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second) + return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second) cpdef tuple[Table, list] round_robin_partition( Table input, int num_partitions, int start_partition=0, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -176,7 +179,8 @@ cpdef tuple[Table, list] round_robin_partition( cdef int c_num_partitions = num_partitions cdef int c_start_partition = start_partition - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -184,8 +188,8 @@ cpdef tuple[Table, list] round_robin_partition( input.view(), c_num_partitions, c_start_partition, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second) + return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second) diff --git a/python/pylibcudf/pylibcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/quantiles.pxd index 9492ef8ce38..668e8015688 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pxd +++ b/python/pylibcudf/pylibcudf/quantiles.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.vector cimport vector from pylibcudf.libcudf.types cimport interpolation, sorted from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -15,7 +14,7 @@ cpdef Column quantile( interpolation interp = *, Column ordered_indices = *, bint exact = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -26,6 +25,6 @@ cpdef Table quantiles( sorted is_input_sorted = *, list column_order = *, list null_precedence = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/quantiles.pyi b/python/pylibcudf/pylibcudf/quantiles.pyi index 2e414357651..9af646407ab 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pyi +++ b/python/pylibcudf/pylibcudf/quantiles.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from collections.abc import Iterable from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table from pylibcudf.types import Interpolation, NullOrder, Order, Sorted +from pylibcudf.utils import CudaStreamLike def quantile( input: Column, @@ -16,7 +16,7 @@ def quantile( interp: Interpolation = Interpolation.LINEAR, ordered_indices: Column | None = None, exact: bool = True, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def quantiles( @@ -26,6 +26,6 @@ def quantiles( is_input_sorted: Sorted = Sorted.NO, column_order: list[Order] | None = None, null_precedence: list[NullOrder] | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx index de1ee3344d3..f02643754cb 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pyx +++ b/python/pylibcudf/pylibcudf/quantiles.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -20,6 +20,7 @@ from .column cimport Column from .table cimport Table from .types cimport interpolation from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["quantile", "quantiles"] @@ -29,7 +30,7 @@ cpdef Column quantile( interpolation interp = interpolation.LINEAR, Column ordered_indices = None, bool exact=True, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes quantiles with interpolation. @@ -74,7 +75,8 @@ cpdef Column quantile( else: ordered_indices_view = ordered_indices.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -84,11 +86,11 @@ cpdef Column quantile( interp, ordered_indices_view, exact, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table quantiles( @@ -98,7 +100,7 @@ cpdef Table quantiles( sorted is_input_sorted = sorted.NO, list column_order = None, list null_precedence = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes row quantiles with interpolation. @@ -156,7 +158,8 @@ cpdef Table quantiles( if null_precedence is not None: null_precedence_vec = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -167,8 +170,8 @@ cpdef Table quantiles( is_input_sorted, column_order_vec, null_precedence_vec, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/reduce.pxd b/python/pylibcudf/pylibcudf/reduce.pxd index e9acd2aaed5..dc33d7053f4 100644 --- a/python/pylibcudf/pylibcudf/reduce.pxd +++ b/python/pylibcudf/pylibcudf/reduce.pxd @@ -4,7 +4,6 @@ from libcpp cimport bool from pylibcudf.libcudf.reduce cimport scan_type from pylibcudf.libcudf.types cimport nan_policy, null_policy, size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .aggregation cimport Aggregation @@ -18,7 +17,7 @@ cpdef Scalar reduce( Aggregation agg, DataType data_type, Scalar init = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -26,11 +25,11 @@ cpdef Column scan( Column col, Aggregation agg, scan_type inclusive, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) -cpdef tuple minmax(Column col, Stream stream = *, DeviceMemoryResource mr = *) +cpdef tuple minmax(Column col, object stream = *, DeviceMemoryResource mr = *) cpdef bool is_valid_reduce_aggregation(DataType source, Aggregation agg) @@ -38,12 +37,12 @@ cpdef size_type unique_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream = * + object stream = * ) cpdef size_type distinct_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream = * + object stream = * ) diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi index 5956b93661c..9e1c643b0cd 100644 --- a/python/pylibcudf/pylibcudf/reduce.pyi +++ b/python/pylibcudf/pylibcudf/reduce.pyi @@ -4,12 +4,12 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import Aggregation from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.types import DataType, NanPolicy, NullPolicy +from pylibcudf.utils import CudaStreamLike class ScanType(IntEnum): INCLUSIVE = ... @@ -19,19 +19,19 @@ def reduce( col: Column, agg: Aggregation, data_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Scalar: ... def scan( col: Column, agg: Aggregation, inclusive: ScanType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minmax( col: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Scalar, Scalar]: ... def is_valid_reduce_aggregation( @@ -41,11 +41,11 @@ def unique_count( source: Column, null_handling: NullPolicy, nan_handling: NanPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> int: ... def distinct_count( source: Column, null_handling: NullPolicy, nan_handling: NanPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> int: ... diff --git a/python/pylibcudf/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx index 54036b73e85..95c3555d021 100644 --- a/python/pylibcudf/pylibcudf/reduce.pyx +++ b/python/pylibcudf/pylibcudf/reduce.pyx @@ -31,6 +31,7 @@ from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource from pylibcudf.libcudf.reduce import scan_type as ScanType # no-cython-lint +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ScanType", @@ -47,7 +48,7 @@ cpdef Scalar reduce( Aggregation agg, DataType data_type, Scalar init=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a reduction on a column @@ -79,7 +80,8 @@ cpdef Scalar reduce( cdef optional[reference_wrapper[constscalar]] c_init cdef const scalar* c_init_ptr - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if init is not None: @@ -96,7 +98,7 @@ cpdef Scalar reduce( dereference(c_agg), data_type.c_obj, c_init, - stream.view(), + _cs, mr.get_mr() ) return Scalar.from_libcudf(move(result)) @@ -106,7 +108,7 @@ cpdef Column scan( Column col, Aggregation agg, scan_type inclusive, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a scan on a column @@ -134,7 +136,8 @@ cpdef Column scan( cdef unique_ptr[column] result cdef const scan_aggregation *c_agg = agg.view_underlying_as_scan() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -143,13 +146,13 @@ cpdef Column scan( dereference(c_agg), inclusive, null_policy.EXCLUDE, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef tuple minmax(Column col, Stream stream=None, DeviceMemoryResource mr=None): +cpdef tuple minmax(Column col, object stream=None, DeviceMemoryResource mr=None): """Compute the minimum and maximum of a column For details, see ``cudf::minmax`` documentation. @@ -173,11 +176,12 @@ cpdef tuple minmax(Column col, Stream stream=None, DeviceMemoryResource mr=None) cdef Scalar min_scalar cdef Scalar max_scalar - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_minmax(col.view(), stream.view(), mr.get_mr()) + result = cpp_minmax(col.view(), _cs, mr.get_mr()) min_scalar = Scalar.from_libcudf(move(result.first)) max_scalar = Scalar.from_libcudf(move(result.second)) @@ -206,7 +210,7 @@ cpdef size_type unique_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream=None + object stream=None ): """Returns the number of unique consecutive elements in the input column. @@ -231,10 +235,10 @@ cpdef size_type unique_count( If the input column is sorted, then unique_count can produce the same result as distinct_count, but faster. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) return cpp_unique_count.unique_count( - source.view(), null_handling, nan_handling, stream.view() + source.view(), null_handling, nan_handling, _stream.view().value() ) @@ -242,7 +246,7 @@ cpdef size_type distinct_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream=None + object stream=None ): """Returns the number of distinct elements in the input column. @@ -262,10 +266,10 @@ cpdef size_type distinct_count( size_type The number of distinct elements in the input column. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) return cpp_distinct_count.distinct_count( - source.view(), null_handling, nan_handling, stream.view() + source.view(), null_handling, nan_handling, _stream.view().value() ) diff --git a/python/pylibcudf/pylibcudf/replace.pxd b/python/pylibcudf/pylibcudf/replace.pxd index 49b57753eb1..7e78e92d514 100644 --- a/python/pylibcudf/pylibcudf/replace.pxd +++ b/python/pylibcudf/pylibcudf/replace.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.replace cimport replace_policy -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -22,7 +21,7 @@ ctypedef fused ReplacementType: cpdef Column replace_nulls( Column source_column, ReplacementType replacement, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -30,7 +29,7 @@ cpdef Column find_and_replace_all( Column source_column, Column values_to_replace, Column replacement_values, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -40,13 +39,13 @@ cpdef Column clamp( Scalar hi, Scalar lo_replace=*, Scalar hi_replace=*, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column normalize_nans_and_zeros( Column source_column, bool inplace=*, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi index d7a35721769..f74e06c3909 100644 --- a/python/pylibcudf/pylibcudf/replace.pyi +++ b/python/pylibcudf/pylibcudf/replace.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class ReplacePolicy(IntEnum): PRECEDING = ... @@ -16,14 +16,14 @@ class ReplacePolicy(IntEnum): def replace_nulls( source_column: Column, replacement: Column | Scalar | ReplacePolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def find_and_replace_all( source_column: Column, values_to_replace: Column, replacement_values: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def clamp( @@ -32,12 +32,12 @@ def clamp( hi: Scalar, lo_replace: Scalar | None = None, hi_replace: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def normalize_nans_and_zeros( source_column: Column, inplace: bool = False, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx index c3730e3971f..4a5cc162551 100644 --- a/python/pylibcudf/pylibcudf/replace.pyx +++ b/python/pylibcudf/pylibcudf/replace.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -18,6 +18,7 @@ from pylibcudf.libcudf.replace import \ from .column cimport Column from .scalar cimport Scalar from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ReplacePolicy", @@ -31,7 +32,7 @@ __all__ = [ cpdef Column replace_nulls( Column source_column, ReplacementType replacement, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replace nulls in source_column. @@ -70,7 +71,8 @@ cpdef Column replace_nulls( cdef unique_ptr[column] c_result cdef replace_policy policy - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # Due to https://github.com/cython/cython/issues/5984, if this function is @@ -84,10 +86,10 @@ cpdef Column replace_nulls( c_result = cpp_replace.replace_nulls( source_column.view(), policy, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) else: raise TypeError("replacement must be a Column, Scalar, or replace_policy") @@ -96,33 +98,33 @@ cpdef Column replace_nulls( c_result = cpp_replace.replace_nulls( source_column.view(), replacement.view(), - stream.view(), + _cs, mr.get_mr() ) elif ReplacementType is Scalar: c_result = cpp_replace.replace_nulls( source_column.view(), dereference(replacement.c_obj), - stream.view(), + _cs, mr.get_mr() ) elif ReplacementType is replace_policy: c_result = cpp_replace.replace_nulls( source_column.view(), replacement, - stream.view(), + _cs, mr.get_mr() ) else: assert False, "Internal error. Please contact pylibcudf developers" - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column find_and_replace_all( Column source_column, Column values_to_replace, Column replacement_values, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replace all occurrences of values_to_replace with replacement_values. @@ -150,7 +152,8 @@ cpdef Column find_and_replace_all( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -158,10 +161,10 @@ cpdef Column find_and_replace_all( source_column.view(), values_to_replace.view(), replacement_values.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column clamp( @@ -170,7 +173,7 @@ cpdef Column clamp( Scalar hi, Scalar lo_replace=None, Scalar hi_replace=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Clamp the values in source_column to the range [lo, hi]. @@ -206,7 +209,8 @@ cpdef Column clamp( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -215,7 +219,7 @@ cpdef Column clamp( source_column.view(), dereference(lo.c_obj), dereference(hi.c_obj), - stream.view(), + _cs, mr.get_mr() ) else: @@ -225,16 +229,16 @@ cpdef Column clamp( dereference(lo_replace.c_obj), dereference(hi.c_obj), dereference(hi_replace.c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column normalize_nans_and_zeros( Column source_column, bool inplace=False, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Normalize NaNs and zeros in source_column. @@ -260,24 +264,25 @@ cpdef Column normalize_nans_and_zeros( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: if inplace: cpp_replace.normalize_nans_and_zeros( source_column.mutable_view(), - stream.view(), + _cs, mr.get_mr() ) else: c_result = cpp_replace.normalize_nans_and_zeros( source_column.view(), - stream.view(), + _cs, mr.get_mr() ) if not inplace: - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) ReplacePolicy.__str__ = ReplacePolicy.__repr__ diff --git a/python/pylibcudf/pylibcudf/reshape.pxd b/python/pylibcudf/pylibcudf/reshape.pxd index fd2eb9f31ec..09a111770b5 100644 --- a/python/pylibcudf/pylibcudf/reshape.pxd +++ b/python/pylibcudf/pylibcudf/reshape.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stddef cimport size_t @@ -6,7 +6,6 @@ from libc.stdint cimport uintptr_t from pylibcudf.libcudf.types cimport size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.device_buffer cimport DeviceBuffer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -17,17 +16,17 @@ from .types cimport DataType cpdef Column interleave_columns( - Table source_table, Stream stream=*, DeviceMemoryResource mr=* + Table source_table, object stream = *, DeviceMemoryResource mr=* ) cpdef Table tile( Table source_table, size_type count, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef void table_to_array( Table input_table, uintptr_t ptr, size_t size, - Stream stream=* + object stream = * ) diff --git a/python/pylibcudf/pylibcudf/reshape.pyi b/python/pylibcudf/pylibcudf/reshape.pyi index c8ca83be981..03acda18353 100644 --- a/python/pylibcudf/pylibcudf/reshape.pyi +++ b/python/pylibcudf/pylibcudf/reshape.pyi @@ -1,26 +1,26 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def interleave_columns( source_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def tile( source_table: Table, count: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def table_to_array( input_table: Table, ptr: int, size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> None: ... diff --git a/python/pylibcudf/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx index b001b289794..a81dadf62ce 100644 --- a/python/pylibcudf/pylibcudf/reshape.pyx +++ b/python/pylibcudf/pylibcudf/reshape.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stddef cimport size_t @@ -24,11 +24,12 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["interleave_columns", "tile", "table_to_array"] cpdef Column interleave_columns( - Table source_table, Stream stream=None, DeviceMemoryResource mr=None + Table source_table, object stream=None, DeviceMemoryResource mr=None ): """Interleave columns of a table into a single column. @@ -55,21 +56,22 @@ cpdef Column interleave_columns( A new column which is the result of interleaving the input columns """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_interleave_columns( - source_table.view(), stream.view(), mr.get_mr() + source_table.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table tile( Table source_table, size_type count, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Repeats the rows from input table count times to form a new table. @@ -93,22 +95,23 @@ cpdef Table tile( The table containing the tiled "rows" """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_tile( - source_table.view(), count, stream.view(), mr.get_mr() + source_table.view(), count, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef void table_to_array( Table input_table, uintptr_t ptr, size_t size, - Stream stream=None + object stream=None ): """ Copy a table into a preallocated column-major device array. @@ -129,7 +132,8 @@ cpdef void table_to_array( raise ValueError( "Size exceeds the size_t limit." ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() cdef device_span[byte] span = device_span[byte]( ptr, size @@ -139,5 +143,5 @@ cpdef void table_to_array( cpp_table_to_array( input_table.view(), span, - stream.view() + _cs ) diff --git a/python/pylibcudf/pylibcudf/rolling.pxd b/python/pylibcudf/pylibcudf/rolling.pxd index 5ea7dc747f4..94a6a8a6d89 100644 --- a/python/pylibcudf/pylibcudf/rolling.pxd +++ b/python/pylibcudf/pylibcudf/rolling.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -8,7 +8,6 @@ from pylibcudf.libcudf.rolling cimport ( bounded_closed, bounded_open, current_row, rolling_request, unbounded ) from pylibcudf.libcudf.types cimport null_order, order, size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .aggregation cimport Aggregation @@ -63,7 +62,7 @@ cpdef Table grouped_range_rolling_window( PrecedingRangeWindowType preceding, FollowingRangeWindowType following, list requests, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -73,7 +72,7 @@ cpdef Column rolling_window( WindowType following_window, size_type min_periods, Aggregation agg, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -86,6 +85,6 @@ cpdef tuple make_range_windows( null_order null_order, PrecedingRangeWindowType preceding, FollowingRangeWindowType following, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/rolling.pyi b/python/pylibcudf/pylibcudf/rolling.pyi index 239ce9ddbd8..883f62d0d3f 100644 --- a/python/pylibcudf/pylibcudf/rolling.pyi +++ b/python/pylibcudf/pylibcudf/rolling.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import Aggregation from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import DataType, NullOrder, Order +from pylibcudf.utils import CudaStreamLike class Unbounded: ... class CurrentRow: ... @@ -36,7 +36,7 @@ def grouped_range_rolling_window( preceding: RangeWindowType, following: RangeWindowType, requests: list[RollingRequest], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rolling_window[WindowType: (Column, int)]( @@ -45,7 +45,7 @@ def rolling_window[WindowType: (Column, int)]( following_window: WindowType, min_periods: int, agg: Aggregation, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_valid_rolling_aggregation( @@ -58,6 +58,6 @@ def make_range_windows( null_order: NullOrder, preceding: RangeWindowType, following: RangeWindowType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... diff --git a/python/pylibcudf/pylibcudf/rolling.pyx b/python/pylibcudf/pylibcudf/rolling.pyx index 73c10e53d57..ae9d7665d69 100644 --- a/python/pylibcudf/pylibcudf/rolling.pyx +++ b/python/pylibcudf/pylibcudf/rolling.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -20,6 +20,7 @@ from .column cimport Column from .scalar cimport Scalar from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -125,7 +126,7 @@ cpdef Table grouped_range_rolling_window( PrecedingRangeWindowType preceding, FollowingRangeWindowType following, list requests, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -164,7 +165,8 @@ cpdef Table grouped_range_rolling_window( for req in requests: crequests.push_back(move((req).view())) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -176,10 +178,10 @@ cpdef Table grouped_range_rolling_window( dereference(preceding.c_obj.get()), dereference(following.c_obj.get()), crequests, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef Column rolling_window( @@ -188,7 +190,7 @@ cpdef Column rolling_window( WindowType following_window, size_type min_periods, Aggregation agg, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a rolling window operation on a column @@ -224,7 +226,8 @@ cpdef Column rolling_window( # reclaim the GIL internally for just the necessary scope like column.view() cdef const rolling_aggregation *c_agg = agg.view_underlying_as_rolling() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if WindowType is Column: @@ -235,7 +238,7 @@ cpdef Column rolling_window( following_window.view(), min_periods, dereference(c_agg), - stream.view(), + _cs, mr.get_mr() ) else: @@ -246,11 +249,11 @@ cpdef Column rolling_window( following_window, min_periods, dereference(c_agg), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef bool is_valid_rolling_aggregation(DataType source, Aggregation agg): @@ -278,7 +281,7 @@ cpdef tuple make_range_windows( null_order null_order, PrecedingRangeWindowType preceding, FollowingRangeWindowType following, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -308,7 +311,8 @@ cpdef tuple make_range_windows( """ cdef pair[unique_ptr[column], unique_ptr[column]] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -319,10 +323,10 @@ cpdef tuple make_range_windows( null_order, dereference(preceding.c_obj.get()), dereference(following.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) return ( - Column.from_libcudf(move(result.first), stream, mr), - Column.from_libcudf(move(result.second), stream, mr) + Column.from_libcudf(move(result.first), _stream, mr), + Column.from_libcudf(move(result.second), _stream, mr) ) diff --git a/python/pylibcudf/pylibcudf/round.pxd b/python/pylibcudf/pylibcudf/round.pxd index ecd72c62c0a..0ac0c22346f 100644 --- a/python/pylibcudf/pylibcudf/round.pxd +++ b/python/pylibcudf/pylibcudf/round.pxd @@ -5,7 +5,6 @@ from pylibcudf.libcudf.round cimport rounding_method from .column cimport Column -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -13,7 +12,7 @@ cpdef Column round( Column source, int32_t decimal_places = *, rounding_method round_method = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = * ) @@ -21,6 +20,6 @@ cpdef Column round_decimal( Column source, int32_t decimal_places = *, rounding_method round_method = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = * ) diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi index 848e43aeda7..30d08f234d5 100644 --- a/python/pylibcudf/pylibcudf/round.pyi +++ b/python/pylibcudf/pylibcudf/round.pyi @@ -4,9 +4,9 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class RoundingMethod(IntEnum): HALF_UP = ... @@ -16,13 +16,13 @@ def round( source: Column, decimal_places: int = 0, round_method: RoundingMethod = RoundingMethod.HALF_UP, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def round_decimal( source: Column, decimal_places: int = 0, round_method: RoundingMethod = RoundingMethod.HALF_UP, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx index 84a7ba6dbdf..f5baa6bbd23 100644 --- a/python/pylibcudf/pylibcudf/round.pyx +++ b/python/pylibcudf/pylibcudf/round.pyx @@ -19,6 +19,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["RoundingMethod", "round"] @@ -26,7 +27,7 @@ cpdef Column round( Column source, int32_t decimal_places = 0, rounding_method round_method = rounding_method.HALF_UP, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Rounds all the values in a column to the specified number of decimal places. @@ -58,7 +59,8 @@ cpdef Column round( A Column with values rounded """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -66,18 +68,18 @@ cpdef Column round( source.view(), decimal_places, round_method, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column round_decimal( Column source, int32_t decimal_places = 0, rounding_method round_method = rounding_method.HALF_UP, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Rounds all the values in a column to the specified number of decimal places. @@ -106,7 +108,8 @@ cpdef Column round_decimal( A Column with values rounded """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -114,10 +117,10 @@ cpdef Column round_decimal( source.view(), decimal_places, round_method, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) RoundingMethod.__str__ = RoundingMethod.__repr__ diff --git a/python/pylibcudf/pylibcudf/scalar.pxd b/python/pylibcudf/pylibcudf/scalar.pxd index 5230c0316be..b628b9185a6 100644 --- a/python/pylibcudf/pylibcudf/scalar.pxd +++ b/python/pylibcudf/pylibcudf/scalar.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -6,7 +6,6 @@ from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.scalar.scalar cimport scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .types cimport DataType @@ -24,10 +23,10 @@ cdef class Scalar: cdef const scalar* get(self) noexcept nogil cpdef DataType type(self) - cpdef bool is_valid(self, Stream stream=*) + cpdef bool is_valid(self, object stream = *) @staticmethod - cdef Scalar empty_like(Column column, Stream stream, DeviceMemoryResource mr) + cdef Scalar empty_like(Column column, object stream, DeviceMemoryResource mr) @staticmethod cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*) diff --git a/python/pylibcudf/pylibcudf/scalar.pyi b/python/pylibcudf/pylibcudf/scalar.pyi index ef940d8c021..a204894afd8 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyi +++ b/python/pylibcudf/pylibcudf/scalar.pyi @@ -3,11 +3,10 @@ from typing import Any -from rmm.pylibrmm.stream import Stream - from pylibcudf._interop_helpers import ColumnMetadata from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike NpGeneric = type[Any] @@ -16,31 +15,33 @@ PaScalar = type[Any] class Scalar: def __init__(self): ... def type(self) -> DataType: ... - def is_valid(self, stream: Stream) -> bool: ... + def is_valid(self, stream: CudaStreamLike) -> bool: ... @staticmethod - def empty_like(column: Column, stream: Stream | None = None) -> Scalar: ... + def empty_like( + column: Column, stream: CudaStreamLike | None = None + ) -> Scalar: ... def to_arrow( self, metadata: ColumnMetadata | str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> PaScalar: ... @staticmethod def from_arrow( pa_val: Any, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Scalar: ... @classmethod def from_py( cls, py_val: Any, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Scalar: ... @classmethod def from_numpy( - cls, np_val: NpGeneric, stream: Stream | None = None + cls, np_val: NpGeneric, stream: CudaStreamLike | None = None ) -> Scalar: ... def to_py( - self, stream: Stream | None = None + self, stream: CudaStreamLike | None = None ) -> None | int | float | str | bool: ... diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx index 8771b4a75fd..54e088787a5 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyx +++ b/python/pylibcudf/pylibcudf/scalar.pyx @@ -57,6 +57,7 @@ from rmm.pylibrmm.memory_resource cimport ( get_current_device_resource, ) from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from .column cimport Column from .traits cimport is_floating_point @@ -151,10 +152,11 @@ cdef class Scalar: """The type of data in the column.""" return self._data_type - cpdef bool is_valid(self, Stream stream = None): + cpdef bool is_valid(self, object stream = None): """True if the scalar is valid, false if not""" - stream = _get_stream(stream) - return self.get().is_valid(stream.view()) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + return self.get().is_valid(_cs) def to_arrow( self, @@ -176,7 +178,9 @@ cdef class Scalar: """ # Note that metadata for scalars is primarily important for preserving # information on nested types since names are otherwise irrelevant. - return Column.from_scalar(self, 1, stream).to_arrow(metadata=metadata)[0] + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + return Column.from_scalar(self, 1, _stream).to_arrow(metadata=metadata)[0] @staticmethod def from_arrow( @@ -205,7 +209,7 @@ cdef class Scalar: return _from_arrow(pa_val, dtype, stream) @staticmethod - cdef Scalar empty_like(Column column, Stream stream, DeviceMemoryResource mr): + cdef Scalar empty_like(Column column, object stream, DeviceMemoryResource mr): """Construct a null scalar with the same type as column. Parameters @@ -221,8 +225,10 @@ cdef class Scalar: ------- New empty (null) scalar of the given type. """ + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() return Scalar.from_libcudf( - move(make_empty_scalar_like(column.view(), stream.view(), mr.get_mr())) + move(make_empty_scalar_like(column.view(), _cs, mr.get_mr())) ) @staticmethod @@ -266,9 +272,10 @@ cdef class Scalar: Scalar New pylibcudf.Scalar """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) - return _from_py(py_val, dtype, stream, mr) + return _from_py(py_val, dtype, _stream, mr) @classmethod def from_numpy( @@ -294,9 +301,10 @@ cdef class Scalar: Scalar New pylibcudf.Scalar """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) - return _from_numpy(np_val, stream, mr) + return _from_numpy(np_val, _stream, mr) def to_py(self, stream: Stream | None = None): """ @@ -312,39 +320,40 @@ cdef class Scalar: Python scalar A Python scalar associated with the type of the Scalar. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() if not self.is_valid(stream): return None cdef type_id tid = self.type().id() cdef const scalar* slr = self.c_obj.get() if tid == type_id.BOOL8: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.STRING: - return (slr).to_string(stream.view()).decode() + return (slr).to_string(_cs).decode() elif tid == type_id.FLOAT32: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.FLOAT64: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT8: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT16: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT32: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT64: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT8: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT16: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT32: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT64: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.DECIMAL128: return decimal.Decimal( - (slr).value(stream.view()).value() + (slr).value(_cs).value() ).scaleb( (slr).type().scale() ) @@ -375,6 +384,8 @@ def _from_py( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef DataType c_dtype if dtype is None: raise ValueError("Must specify a dtype for a None value.") @@ -382,7 +393,7 @@ def _( c_dtype = dtype cdef unique_ptr[scalar] c_obj = make_default_constructed_scalar( c_dtype.c_obj, - stream.view(), + _cs, mr.get_mr() ) return _new_scalar(move(c_obj), dtype) @@ -402,6 +413,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef DataType c_dtype if dtype is None: @@ -414,11 +427,11 @@ def _( if tid == type_id.FLOAT32: if abs(py_val) > numeric_limits[float].max(): raise OverflowError(f"{py_val} out of range for FLOAT32 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.FLOAT64: - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) else: typ = c_dtype.id() raise TypeError(f"Cannot convert float to Scalar with dtype {typ.name}") @@ -430,6 +443,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef DataType c_dtype cdef duration_ns c_duration_ns @@ -440,7 +455,7 @@ def _( if dtype is None: c_dtype = dtype = DataType(type_id.INT64) elif is_floating_point(dtype): - return _from_py(float(py_val), dtype, stream, mr) + return _from_py(float(py_val), dtype, _stream, mr) else: c_dtype = dtype cdef type_id tid = c_dtype.id() @@ -450,80 +465,80 @@ def _( numeric_limits[int8_t].min() <= py_val <= numeric_limits[int8_t].max() ): raise OverflowError(f"{py_val} out of range for INT8 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.INT16: if not ( numeric_limits[int16_t].min() <= py_val <= numeric_limits[int16_t].max() ): raise OverflowError(f"{py_val} out of range for INT16 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.INT32: if not ( numeric_limits[int32_t].min() <= py_val <= numeric_limits[int32_t].max() ): raise OverflowError(f"{py_val} out of range for INT32 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.INT64: if not ( numeric_limits[int64_t].min() <= py_val <= numeric_limits[int64_t].max() ): raise OverflowError(f"{py_val} out of range for INT64 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT8: if py_val < 0: raise ValueError("Cannot assign negative value to UINT8 scalar") if py_val > numeric_limits[uint8_t].max(): raise OverflowError(f"{py_val} out of range for UINT8 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT16: if py_val < 0: raise ValueError("Cannot assign negative value to UINT16 scalar") if py_val > numeric_limits[uint16_t].max(): raise OverflowError(f"{py_val} out of range for UINT16 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT32: if py_val < 0: raise ValueError("Cannot assign negative value to UINT32 scalar") if py_val > numeric_limits[uint32_t].max(): raise OverflowError(f"{py_val} out of range for UINT32 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT64: if py_val < 0: raise ValueError("Cannot assign negative value to UINT64 scalar") if py_val > numeric_limits[uint64_t].max(): raise OverflowError(f"{py_val} out of range for UINT64 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.BOOL8: if py_val not in (0, 1): raise ValueError(f"Cannot convert {py_val} to BOOL8 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val != 0, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val != 0, _cs) elif tid == type_id.DURATION_NANOSECONDS: if py_val > numeric_limits[int64_t].max(): raise OverflowError( f"{py_val} nanoseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ns = duration_ns(py_val) (c_obj.get()).set_value( - c_duration_ns, stream.view() + c_duration_ns, _cs ) elif tid == type_id.DURATION_MICROSECONDS: @@ -531,10 +546,10 @@ def _( raise OverflowError( f"{py_val} microseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_us = duration_us(py_val) (c_obj.get()).set_value( - c_duration_us, stream.view() + c_duration_us, _cs ) elif tid == type_id.DURATION_MILLISECONDS: @@ -542,10 +557,10 @@ def _( raise OverflowError( f"{py_val} milliseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ms = duration_ms(py_val) (c_obj.get()).set_value( - c_duration_ms, stream.view() + c_duration_ms, _cs ) elif tid == type_id.DURATION_SECONDS: @@ -553,10 +568,10 @@ def _( raise OverflowError( f"{py_val} seconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_s = duration_s(py_val) (c_obj.get()).set_value( - c_duration_s, stream.view() + c_duration_s, _cs ) elif tid == type_id.DURATION_DAYS: @@ -564,10 +579,10 @@ def _( raise OverflowError( f"{py_val} days out of range for INT32 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_D = duration_D(py_val) (c_obj.get()).set_value( - c_duration_D, stream.view() + c_duration_D, _cs ) else: @@ -581,6 +596,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() if dtype is None: dtype = DataType(type_id.BOOL8) elif dtype.id() != type_id.BOOL8: @@ -591,10 +608,10 @@ def _( cdef unique_ptr[scalar] c_obj = make_numeric_scalar( (dtype).c_obj, - stream.view(), + _cs, mr.get_mr() ) - (c_obj.get()).set_value(py_val, stream.view()) + (c_obj.get()).set_value(py_val, _cs) return _new_scalar(move(c_obj), dtype) @@ -602,6 +619,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() if dtype is None: dtype = DataType(type_id.STRING) elif dtype.id() != type_id.STRING: @@ -610,7 +629,7 @@ def _( f"Cannot convert str to Scalar with dtype {tid.name}" ) cdef unique_ptr[scalar] c_obj = make_string_scalar( - py_val.encode(), stream.view(), mr.get_mr() + py_val.encode(), _cs, mr.get_mr() ) return _new_scalar(move(c_obj), dtype) @@ -619,6 +638,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef duration_us c_duration_us cdef duration_ns c_duration_ns @@ -637,10 +658,10 @@ def _( raise OverflowError( f"{total_nanoseconds} nanoseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ns = duration_ns(total_nanoseconds) (c_obj.get()).set_value( - c_duration_ns, stream.view() + c_duration_ns, _cs ) elif tid == type_id.DURATION_MICROSECONDS: total_microseconds = int(total_seconds * 1_000_000) @@ -648,10 +669,10 @@ def _( raise OverflowError( f"{total_microseconds} microseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_us = duration_us(total_microseconds) (c_obj.get()).set_value( - c_duration_us, stream.view() + c_duration_us, _cs ) elif tid == type_id.DURATION_MILLISECONDS: total_milliseconds = int(total_seconds * 1_000) @@ -659,10 +680,10 @@ def _( raise OverflowError( f"{total_milliseconds} milliseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ms = duration_ms(total_milliseconds) (c_obj.get()).set_value( - c_duration_ms, stream.view() + c_duration_ms, _cs ) elif tid == type_id.DURATION_SECONDS: total_seconds = int(total_seconds) @@ -670,10 +691,10 @@ def _( raise OverflowError( f"{total_seconds} seconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_s = duration_s(total_seconds) (c_obj.get()).set_value( - c_duration_s, stream.view() + c_duration_s, _cs ) elif tid == type_id.DURATION_DAYS: total_days = int(total_seconds // 86400) @@ -681,10 +702,10 @@ def _( raise OverflowError( f"{total_days} days out of range for INT32 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_D = duration_D(total_days) (c_obj.get()).set_value( - c_duration_D, stream.view() + c_duration_D, _cs ) else: typ = c_dtype.id() @@ -696,6 +717,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef duration_us c_duration_us cdef duration_ns c_duration_ns @@ -727,11 +750,11 @@ def _( raise OverflowError( f"{epoch_nanoseconds} nanoseconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ns = duration_ns(epoch_nanoseconds) c_timestamp_ns = timestamp_ns(c_duration_ns) (c_obj.get()).set_value( - c_timestamp_ns, stream.view() + c_timestamp_ns, _cs ) elif tid == type_id.TIMESTAMP_MICROSECONDS: epoch_microseconds = int(epoch_seconds * 1_000_000) @@ -739,11 +762,11 @@ def _( raise OverflowError( f"{epoch_microseconds} microseconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_us = duration_us(epoch_microseconds) c_timestamp_us = timestamp_us(c_duration_us) (c_obj.get()).set_value( - c_timestamp_us, stream.view() + c_timestamp_us, _cs ) elif tid == type_id.TIMESTAMP_MILLISECONDS: epoch_milliseconds = int(epoch_seconds * 1_000) @@ -751,11 +774,11 @@ def _( raise OverflowError( f"{epoch_milliseconds} milliseconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ms = duration_ms(epoch_milliseconds) c_timestamp_ms = timestamp_ms(c_duration_ms) (c_obj.get()).set_value( - c_timestamp_ms, stream.view() + c_timestamp_ms, _cs ) elif tid == type_id.TIMESTAMP_SECONDS: epoch_seconds = int(epoch_seconds) @@ -763,11 +786,11 @@ def _( raise OverflowError( f"{epoch_seconds} seconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_s = duration_s(epoch_seconds) c_timestamp_s = timestamp_s(c_duration_s) (c_obj.get()).set_value( - c_timestamp_s, stream.view() + c_timestamp_s, _cs ) elif tid == type_id.TIMESTAMP_DAYS: epoch_days = int(epoch_seconds // 86400) @@ -775,11 +798,11 @@ def _( raise OverflowError( f"{epoch_days} days out of range for INT32 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_D = duration_D(epoch_days) c_timestamp_D = timestamp_D(c_duration_D) (c_obj.get()).set_value( - c_timestamp_D, stream.view() + c_timestamp_D, _cs ) else: typ = c_dtype.id() @@ -791,6 +814,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() scale = py_val.as_tuple().exponent as_int = int(py_val.scaleb(-scale)) @@ -804,7 +829,7 @@ def _( cdef unique_ptr[scalar] c_obj = make_fixed_point_scalar[decimal128]( val, scale_type(scale), - stream.view(), + _cs, mr.get_mr() ) return _new_scalar(move(c_obj), dtype) @@ -829,21 +854,25 @@ if np is not None: @_from_numpy.register(np.bool_) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef DataType dtype = DataType(type_id.BOOL8) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) cdef cbool c_val = np_val - (c_obj.get()).set_value(c_val, stream.view()) + (c_obj.get()).set_value(c_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.str_) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef DataType dtype = DataType(type_id.STRING) cdef unique_ptr[scalar] c_obj = make_string_scalar( np_val.item().encode(), - stream.view(), + _cs, mr.get_mr() ) cdef Scalar slr = _new_scalar(move(c_obj), dtype) @@ -851,101 +880,121 @@ if np is not None: @_from_numpy.register(np.int8) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT8) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.int16) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT16) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.int32) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT32) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.int64) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT64) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint8) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT8) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint16) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT16) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint32) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT32) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint64) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT64) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.float32) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.FLOAT32) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.float64) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.FLOAT64) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr diff --git a/python/pylibcudf/pylibcudf/search.pxd b/python/pylibcudf/pylibcudf/search.pxd index 7b0725bf60b..c26a6689240 100644 --- a/python/pylibcudf/pylibcudf/search.pxd +++ b/python/pylibcudf/pylibcudf/search.pxd @@ -1,7 +1,6 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -13,7 +12,7 @@ cpdef Column lower_bound( Table needles, list column_order, list null_precedence, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -22,10 +21,10 @@ cpdef Column upper_bound( Table needles, list column_order, list null_precedence, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column contains( - Column haystack, Column needles, Stream stream = *, DeviceMemoryResource mr = * + Column haystack, Column needles, object stream = *, DeviceMemoryResource mr = * ) diff --git a/python/pylibcudf/pylibcudf/search.pyi b/python/pylibcudf/pylibcudf/search.pyi index eaec283a32a..6cc58946f56 100644 --- a/python/pylibcudf/pylibcudf/search.pyi +++ b/python/pylibcudf/pylibcudf/search.pyi @@ -1,19 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table from pylibcudf.types import NullOrder, Order +from pylibcudf.utils import CudaStreamLike def lower_bound( haystack: Table, needles: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def upper_bound( @@ -21,12 +21,12 @@ def upper_bound( needles: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains( haystack: Column, needles: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx index 4915b1b8be9..885d25f2d49 100644 --- a/python/pylibcudf/pylibcudf/search.pyx +++ b/python/pylibcudf/pylibcudf/search.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -13,6 +13,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["contains", "lower_bound", "upper_bound"] @@ -21,7 +22,7 @@ cpdef Column lower_bound( Table needles, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Find smallest indices in haystack where needles may be inserted to retain order. @@ -52,7 +53,8 @@ cpdef Column lower_bound( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -61,10 +63,10 @@ cpdef Column lower_bound( needles.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column upper_bound( @@ -72,7 +74,7 @@ cpdef Column upper_bound( Table needles, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Find largest indices in haystack where needles may be inserted to retain order. @@ -103,7 +105,8 @@ cpdef Column upper_bound( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -112,14 +115,14 @@ cpdef Column upper_bound( needles.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column contains( - Column haystack, Column needles, Stream stream=None, DeviceMemoryResource mr=None + Column haystack, Column needles, object stream=None, DeviceMemoryResource mr=None ): """Check whether needles are present in haystack. @@ -143,14 +146,15 @@ cpdef Column contains( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_search.contains( haystack.view(), needles.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/sorting.pxd b/python/pylibcudf/pylibcudf/sorting.pxd index 701b6803c34..a081ece747a 100644 --- a/python/pylibcudf/pylibcudf/sorting.pxd +++ b/python/pylibcudf/pylibcudf/sorting.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.aggregation cimport rank_method from pylibcudf.libcudf.types cimport null_order, null_policy, order, size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -15,7 +14,7 @@ cpdef Column sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -23,7 +22,7 @@ cpdef Column stable_sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -34,12 +33,12 @@ cpdef Column rank( null_policy null_handling, null_order null_precedence, bool percentage, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef bool is_sorted( - Table table, list column_order, list null_precedence, Stream stream=* + Table table, list column_order, list null_precedence, object stream = * ) cpdef Table segmented_sort_by_key( @@ -48,7 +47,7 @@ cpdef Table segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -58,7 +57,7 @@ cpdef Table stable_segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -67,7 +66,7 @@ cpdef Table sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -76,7 +75,7 @@ cpdef Table stable_sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -84,7 +83,7 @@ cpdef Table sort( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -92,7 +91,7 @@ cpdef Table stable_sort( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -100,7 +99,7 @@ cpdef Column top_k( Column col, size_type k, order sort_order=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -108,6 +107,6 @@ cpdef Column top_k_order( Column col, size_type k, order sort_order=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi index 8f00fcade6e..a06586a8f39 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyi +++ b/python/pylibcudf/pylibcudf/sorting.pyi @@ -1,26 +1,26 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import RankMethod from pylibcudf.column import Column from pylibcudf.table import Table from pylibcudf.types import NullOrder, NullPolicy, Order +from pylibcudf.utils import CudaStreamLike def sorted_order( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def stable_sorted_order( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rank( @@ -30,14 +30,14 @@ def rank( null_handling: NullPolicy, null_precedence: NullOrder, percentage: bool, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_sorted( tbl: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> bool: ... def segmented_sort_by_key( values: Table, @@ -45,7 +45,7 @@ def segmented_sort_by_key( segment_offsets: Column, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def stable_segmented_sort_by_key( @@ -54,7 +54,7 @@ def stable_segmented_sort_by_key( segment_offsets: Column, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def sort_by_key( @@ -62,7 +62,7 @@ def sort_by_key( keys: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def stable_sort_by_key( @@ -70,34 +70,34 @@ def stable_sort_by_key( keys: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def sort( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def stable_sort( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def top_k( col: Column, k: int, sort_order: Order = Order.DESCENDING, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def top_k_order( col: Column, k: int, sort_order: Order = Order.DESCENDING, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx index be668ff2526..fa0ed78b709 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyx +++ b/python/pylibcudf/pylibcudf/sorting.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -15,6 +15,7 @@ from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "is_sorted", @@ -33,7 +34,7 @@ cpdef Column sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the row indices required to sort the table. @@ -58,7 +59,8 @@ cpdef Column sorted_order( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -66,17 +68,17 @@ cpdef Column sorted_order( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column stable_sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the row indices required to sort the table, @@ -102,7 +104,8 @@ cpdef Column stable_sorted_order( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -110,10 +113,10 @@ cpdef Column stable_sorted_order( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column rank( @@ -123,7 +126,7 @@ cpdef Column rank( null_policy null_handling, null_order null_precedence, bool percentage, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the rank of each element in the column. @@ -152,7 +155,8 @@ cpdef Column rank( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -163,14 +167,14 @@ cpdef Column rank( null_handling, null_precedence, percentage, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef bool is_sorted( - Table tbl, list column_order, list null_precedence, Stream stream=None + Table tbl, list column_order, list null_precedence, object stream=None ): """Checks if the table is sorted. @@ -194,14 +198,15 @@ cpdef bool is_sorted( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: c_result = cpp_sorting.is_sorted( tbl.view(), c_orders, c_null_precedence, - stream.view() + _cs ) return c_result @@ -212,7 +217,7 @@ cpdef Table segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key, within segments. @@ -241,7 +246,8 @@ cpdef Table segmented_sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -251,10 +257,10 @@ cpdef Table segmented_sort_by_key( segment_offsets.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_segmented_sort_by_key( @@ -263,7 +269,7 @@ cpdef Table stable_segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key preserving order of equal elements, @@ -293,7 +299,8 @@ cpdef Table stable_segmented_sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -303,10 +310,10 @@ cpdef Table stable_segmented_sort_by_key( segment_offsets.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table sort_by_key( @@ -314,7 +321,7 @@ cpdef Table sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key. @@ -341,7 +348,8 @@ cpdef Table sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -350,10 +358,10 @@ cpdef Table sort_by_key( keys.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_sort_by_key( @@ -361,7 +369,7 @@ cpdef Table stable_sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key preserving order of equal elements. @@ -388,7 +396,8 @@ cpdef Table stable_sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -397,17 +406,17 @@ cpdef Table stable_sort_by_key( keys.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table sort( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table. @@ -432,7 +441,8 @@ cpdef Table sort( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -440,17 +450,17 @@ cpdef Table sort( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_sort( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table preserving order of equal elements. @@ -475,7 +485,8 @@ cpdef Table stable_sort( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -483,17 +494,17 @@ cpdef Table stable_sort( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column top_k( Column col, size_type k, order sort_order = order.DESCENDING, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -518,7 +529,8 @@ cpdef Column top_k( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -526,17 +538,17 @@ cpdef Column top_k( col.view(), k, sort_order, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column top_k_order( Column col, size_type k, order sort_order = order.DESCENDING, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -564,7 +576,8 @@ cpdef Column top_k_order( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -572,7 +585,7 @@ cpdef Column top_k_order( col.view(), k, sort_order, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd index 03b463f5f3a..6e904e11ce1 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd @@ -8,7 +8,6 @@ from pylibcudf.libcudf.types cimport ( size_type, ) from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .expressions cimport Expression @@ -19,7 +18,7 @@ cpdef Table drop_nulls( Table source_table, list keys, size_type keep_threshold, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -27,14 +26,14 @@ cpdef Table drop_nans( Table source_table, list keys, size_type keep_threshold, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Table apply_boolean_mask( Table source_table, Column boolean_mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -43,7 +42,7 @@ cpdef Table unique( list keys, duplicate_keep_option keep, null_equality nulls_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -53,7 +52,7 @@ cpdef Table distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -62,7 +61,7 @@ cpdef Column distinct_indices( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -72,7 +71,7 @@ cpdef Table stable_distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -80,6 +79,6 @@ cpdef Table filter( Table predicate_table, Expression predicate_expr, Table filter_table, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi index 49c44f82486..afdd692dde2 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyi +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi @@ -4,12 +4,12 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.expressions import Expression from pylibcudf.table import Table from pylibcudf.types import NanEquality, NullEquality +from pylibcudf.utils import CudaStreamLike class DuplicateKeepOption(IntEnum): KEEP_ANY = ... @@ -21,20 +21,20 @@ def drop_nulls( source_table: Table, keys: list[int], keep_threshold: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def drop_nans( source_table: Table, keys: list[int], keep_threshold: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def apply_boolean_mask( source_table: Table, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def unique( @@ -42,7 +42,7 @@ def unique( keys: list[int], keep: DuplicateKeepOption, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def distinct( @@ -51,7 +51,7 @@ def distinct( keep: DuplicateKeepOption, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def distinct_indices( @@ -59,7 +59,7 @@ def distinct_indices( keep: DuplicateKeepOption, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def stable_distinct( @@ -68,13 +68,13 @@ def stable_distinct( keep: DuplicateKeepOption, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def filter( predicate_table: Table, predicate_expr: Expression, filter_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx index 4e676602cf8..b4751078acb 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyx +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx @@ -24,6 +24,7 @@ from .column cimport Column from .expressions cimport Expression from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "DuplicateKeepOption", @@ -41,7 +42,7 @@ cpdef Table drop_nulls( Table source_table, list keys, size_type keep_threshold, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters out rows from the input table based on the presence of nulls. @@ -65,21 +66,22 @@ cpdef Table drop_nulls( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.drop_nulls( - source_table.view(), c_keys, keep_threshold, stream.view(), mr.get_mr() + source_table.view(), c_keys, keep_threshold, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table drop_nans( Table source_table, list keys, size_type keep_threshold, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters out rows from the input table based on the presence of NaNs. @@ -103,20 +105,21 @@ cpdef Table drop_nans( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.drop_nans( - source_table.view(), c_keys, keep_threshold, stream.view(), mr.get_mr() + source_table.view(), c_keys, keep_threshold, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table apply_boolean_mask( Table source_table, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters out rows from the input table based on a boolean mask. @@ -137,14 +140,15 @@ cpdef Table apply_boolean_mask( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.apply_boolean_mask( - source_table.view(), boolean_mask.view(), stream.view(), mr.get_mr() + source_table.view(), boolean_mask.view(), _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table unique( @@ -152,7 +156,7 @@ cpdef Table unique( list keys, duplicate_keep_option keep, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filter duplicate consecutive rows from the input table. @@ -184,14 +188,15 @@ cpdef Table unique( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.unique( - input.view(), c_keys, keep, nulls_equal, stream.view(), mr.get_mr() + input.view(), c_keys, keep, nulls_equal, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table distinct( @@ -200,7 +205,7 @@ cpdef Table distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Get the distinct rows from the input table. @@ -229,15 +234,16 @@ cpdef Table distinct( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.distinct( - input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view(), + input.view(), c_keys, keep, nulls_equal, nans_equal, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column distinct_indices( @@ -245,7 +251,7 @@ cpdef Column distinct_indices( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Get the indices of the distinct rows from the input table. @@ -270,14 +276,15 @@ cpdef Column distinct_indices( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.distinct_indices( - input.view(), keep, nulls_equal, nans_equal, stream.view(), mr.get_mr() + input.view(), keep, nulls_equal, nans_equal, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_distinct( @@ -286,7 +293,7 @@ cpdef Table stable_distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Get the distinct rows from the input table, preserving input order. @@ -315,22 +322,23 @@ cpdef Table stable_distinct( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.stable_distinct( - input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view(), + input.view(), c_keys, keep, nulls_equal, nans_equal, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table filter( Table predicate_table, Expression predicate_expr, Table filter_table, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters a table using a predicate expression. @@ -353,7 +361,8 @@ cpdef Table filter( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -361,9 +370,9 @@ cpdef Table filter( predicate_table.view(), dereference(predicate_expr.c_obj.get()), filter_table.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) DuplicateKeepOption.__str__ = DuplicateKeepOption.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/strings/attributes.pxd index 68b1ce9b5a0..64533b1ce3d 100644 --- a/python/pylibcudf/pylibcudf/strings/attributes.pxd +++ b/python/pylibcudf/pylibcudf/strings/attributes.pxd @@ -1,19 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column count_characters( - Column source_strings, Stream stream=*, DeviceMemoryResource mr=* + Column source_strings, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_bytes( - Column source_strings, Stream stream=*, DeviceMemoryResource mr=* + Column source_strings, object stream = *, DeviceMemoryResource mr=* ) cpdef Column code_points( - Column source_strings, Stream stream=*, DeviceMemoryResource mr=* + Column source_strings, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyi b/python/pylibcudf/pylibcudf/strings/attributes.pyi index 06b76e669d3..2e28fb9f186 100644 --- a/python/pylibcudf/pylibcudf/strings/attributes.pyi +++ b/python/pylibcudf/pylibcudf/strings/attributes.pyi @@ -1,23 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def count_characters( source_strings: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_bytes( source_strings: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def code_points( source_strings: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyx b/python/pylibcudf/pylibcudf/strings/attributes.pyx index 2449d51122f..334270ea834 100644 --- a/python/pylibcudf/pylibcudf/strings/attributes.pyx +++ b/python/pylibcudf/pylibcudf/strings/attributes.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,11 +9,12 @@ from pylibcudf.libcudf.strings cimport attributes as cpp_attributes from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["code_points", "count_bytes", "count_characters"] cpdef Column count_characters( - Column source_strings, Stream stream=None, DeviceMemoryResource mr=None + Column source_strings, object stream=None, DeviceMemoryResource mr=None ): """ Returns a column containing character lengths of each string @@ -32,19 +33,20 @@ cpdef Column count_characters( New column with lengths for each string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_attributes.count_characters( - source_strings.view(), stream.view(), mr.get_mr() + source_strings.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_bytes( - Column source_strings, Stream stream=None, DeviceMemoryResource mr=None + Column source_strings, object stream=None, DeviceMemoryResource mr=None ): """ Returns a column containing byte lengths of each string @@ -63,19 +65,20 @@ cpdef Column count_bytes( New column with the number of bytes for each string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_attributes.count_bytes( - source_strings.view(), stream.view(), mr.get_mr() + source_strings.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column code_points( - Column source_strings, Stream stream=None, DeviceMemoryResource mr=None + Column source_strings, object stream=None, DeviceMemoryResource mr=None ): """ Creates a numeric column with code point values (integers) @@ -94,12 +97,13 @@ cpdef Column code_points( New column with code point integer values for each character """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_attributes.code_points( - source_strings.view(), stream.view(), mr.get_mr() + source_strings.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/strings/capitalize.pxd index ccbe15b3794..1a68c29e05c 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pxd +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pxd @@ -1,20 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from pylibcudf.libcudf.strings.char_types cimport string_character_types from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column capitalize( - Column input, Scalar delimiters=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiters=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column title( Column input, string_character_types sequence_type=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) -cpdef Column is_title(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column is_title(Column input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi index 35554e6fff3..031d244bf25 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pyi +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi @@ -1,27 +1,27 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.char_types import StringCharacterTypes +from pylibcudf.utils import CudaStreamLike def capitalize( input: Column, delimiters: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def title( input: Column, sequence_type: StringCharacterTypes = StringCharacterTypes.ALPHA, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_title( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx index 11291bd1243..be8c52a59b5 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pyx +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -17,13 +17,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["capitalize", "is_title", "title"] cpdef Column capitalize( Column input, Scalar delimiters=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, # TODO: default scalar values # https://github.com/rapidsai/cudf/issues/15505 @@ -45,12 +46,13 @@ cpdef Column capitalize( Column of strings capitalized from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiters is None: delimiters = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* cpp_delimiters = ( @@ -61,17 +63,17 @@ cpdef Column capitalize( c_result = cpp_capitalize.capitalize( input.view(), dereference(cpp_delimiters), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column title( Column input, string_character_types sequence_type=string_character_types.ALPHA, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Modifies first character of each word to upper-case and lower-cases @@ -92,17 +94,18 @@ cpdef Column title( Column of titled strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_capitalize.title( - input.view(), sequence_type, stream.view(), mr.get_mr() + input.view(), sequence_type, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_title(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_title(Column input, object stream=None, DeviceMemoryResource mr=None): """Checks if the strings in the input column are title formatted. For details, see :cpp:func:`is_title`. @@ -118,9 +121,10 @@ cpdef Column is_title(Column input, Stream stream=None, DeviceMemoryResource mr= Column of type BOOL8 """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_capitalize.is_title(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_capitalize.is_title(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/strings/case.pxd index 8a959fb61d5..fea9f68e95e 100644 --- a/python/pylibcudf/pylibcudf/strings/case.pxd +++ b/python/pylibcudf/pylibcudf/strings/case.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream -cpdef Column to_lower(Column input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column to_upper(Column input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column swapcase(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column to_lower(Column input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column to_upper(Column input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column swapcase(Column input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/strings/case.pyi b/python/pylibcudf/pylibcudf/strings/case.pyi index ecdb614fcd7..1337e7df5a9 100644 --- a/python/pylibcudf/pylibcudf/strings/case.pyi +++ b/python/pylibcudf/pylibcudf/strings/case.pyi @@ -1,23 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def to_lower( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def to_upper( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def swapcase( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/case.pyx b/python/pylibcudf/pylibcudf/strings/case.pyx index 5e7d20f01f8..ec6539f42e1 100644 --- a/python/pylibcudf/pylibcudf/strings/case.pyx +++ b/python/pylibcudf/pylibcudf/strings/case.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,10 +9,11 @@ from pylibcudf.libcudf.strings cimport case as cpp_case from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["swapcase", "to_lower", "to_upper"] -cpdef Column to_lower(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column to_lower(Column input, object stream=None, DeviceMemoryResource mr=None): """Returns a column of lowercased strings. For details, see :cpp:func:`to_lower`. @@ -32,14 +33,15 @@ cpdef Column to_lower(Column input, Stream stream=None, DeviceMemoryResource mr= Column of strings lowercased from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_case.to_lower(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_case.to_lower(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column to_upper(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column to_upper(Column input, object stream=None, DeviceMemoryResource mr=None): """Returns a column of uppercased strings. For details, see :cpp:func:`to_upper`. @@ -59,14 +61,15 @@ cpdef Column to_upper(Column input, Stream stream=None, DeviceMemoryResource mr= Column of strings uppercased from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_case.to_upper(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_case.to_upper(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column swapcase(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column swapcase(Column input, object stream=None, DeviceMemoryResource mr=None): """Returns a column of strings where the lowercase characters are converted to uppercase and the uppercase characters are converted to lowercase. @@ -88,9 +91,10 @@ cpdef Column swapcase(Column input, Stream stream=None, DeviceMemoryResource mr= Column of strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_case.swapcase(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_case.swapcase(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/strings/char_types.pxd index 009886f3e9f..59c045dba15 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pxd +++ b/python/pylibcudf/pylibcudf/strings/char_types.pxd @@ -1,18 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.strings.char_types cimport string_character_types from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column all_characters_of_type( Column source_strings, string_character_types types, string_character_types verify_types, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -21,6 +20,6 @@ cpdef Column filter_characters_of_type( string_character_types types_to_remove, Scalar replacement, string_character_types types_to_keep, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi index 12749d79f6d..1740a67eb00 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pyi +++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class StringCharacterTypes(IntEnum): DECIMAL = ... @@ -25,7 +25,7 @@ def all_characters_of_type( source_strings: Column, types: StringCharacterTypes, verify_types: StringCharacterTypes, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_characters_of_type( @@ -33,6 +33,6 @@ def filter_characters_of_type( types_to_remove: StringCharacterTypes, replacement: Scalar, types_to_keep: StringCharacterTypes, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx index 5cb5025798e..2567ab8ee4b 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pyx +++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.libcudf.strings.char_types import \ string_character_types as StringCharacterTypes # no-cython-lint @@ -27,7 +28,7 @@ cpdef Column all_characters_of_type( Column source_strings, string_character_types types, string_character_types verify_types, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -50,7 +51,8 @@ cpdef Column all_characters_of_type( New column of boolean results for each string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -58,18 +60,18 @@ cpdef Column all_characters_of_type( source_strings.view(), types, verify_types, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column filter_characters_of_type( Column source_strings, string_character_types types_to_remove, Scalar replacement, string_character_types types_to_keep, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -99,7 +101,8 @@ cpdef Column filter_characters_of_type( replacement.c_obj.get() ) cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -108,10 +111,10 @@ cpdef Column filter_characters_of_type( types_to_remove, dereference(c_replacement), types_to_keep, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) StringCharacterTypes.__str__ = StringCharacterTypes.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/strings/combine.pxd index b889169c7c7..32a58abdc23 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pxd +++ b/python/pylibcudf/pylibcudf/strings/combine.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -9,7 +9,6 @@ from pylibcudf.libcudf.strings.combine cimport ( from pylibcudf.scalar cimport Scalar from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -21,7 +20,7 @@ cpdef Column concatenate( Scalar narep=*, Scalar col_narep=*, separator_on_nulls separate_nulls=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -29,7 +28,7 @@ cpdef Column join_strings( Column input, Scalar separator, Scalar narep, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -40,6 +39,6 @@ cpdef Column join_list_elements( Scalar string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi index fa568046fa8..3186709996f 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pyi +++ b/python/pylibcudf/pylibcudf/strings/combine.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class SeparatorOnNulls(IntEnum): YES = ... @@ -24,14 +24,14 @@ def concatenate( narep: Scalar | None = None, col_narep: Scalar | None = None, separate_nulls: SeparatorOnNulls = SeparatorOnNulls.YES, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def join_strings( input: Column, separator: Scalar, narep: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def join_list_elements( @@ -41,6 +41,6 @@ def join_list_elements( string_narep: Scalar, separate_nulls: SeparatorOnNulls, empty_list_policy: OutputIfEmptyList, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx index e570a18c585..82903002907 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pyx +++ b/python/pylibcudf/pylibcudf/strings/combine.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -16,6 +16,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.libcudf.strings.combine import \ output_if_empty_list as OutputIfEmptyList # no-cython-lint from pylibcudf.libcudf.strings.combine import \ @@ -35,7 +36,7 @@ cpdef Column concatenate( Scalar narep=None, Scalar col_narep=None, separator_on_nulls separate_nulls=separator_on_nulls.YES, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -68,12 +69,13 @@ cpdef Column concatenate( cdef unique_ptr[column] c_result cdef const string_scalar* c_col_narep cdef const string_scalar* c_separator - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if narep is None: narep = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_narep = ( narep.c_obj.get() @@ -82,7 +84,7 @@ cpdef Column concatenate( if ColumnOrScalar is Column: if col_narep is None: col_narep = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) c_col_narep = ( col_narep.c_obj.get() @@ -95,7 +97,7 @@ cpdef Column concatenate( dereference(c_narep), dereference(c_col_narep), separate_nulls, - stream.view(), + _cs, mr.get_mr() ) ) @@ -112,20 +114,20 @@ cpdef Column concatenate( dereference(c_separator), dereference(c_narep), separate_nulls, - stream.view(), + _cs, mr.get_mr() ) ) else: raise ValueError("separator must be a Column or a Scalar") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column join_strings( Column input, Scalar separator, Scalar narep, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -149,7 +151,8 @@ cpdef Column join_strings( New column containing one string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef const string_scalar* c_separator = ( separator.c_obj.get() @@ -163,12 +166,12 @@ cpdef Column join_strings( input.view(), dereference(c_separator), dereference(c_narep), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column join_list_elements( @@ -178,7 +181,7 @@ cpdef Column join_list_elements( Scalar string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -217,7 +220,8 @@ cpdef Column join_list_elements( New strings column with concatenated results """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef const string_scalar* c_separator_narep = ( separator_narep.c_obj.get() @@ -237,7 +241,7 @@ cpdef Column join_list_elements( dereference(c_string_narep), separate_nulls, empty_list_policy, - stream.view(), + _cs, mr.get_mr() ) ) @@ -251,13 +255,13 @@ cpdef Column join_list_elements( dereference(c_separator_narep), separate_nulls, empty_list_policy, - stream.view(), + _cs, mr.get_mr() ) ) else: raise ValueError("separator must be a Column or a Scalar") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) OutputIfEmptyList.__str__ = OutputIfEmptyList.__repr__ SeparatorOnNulls.__str__ = SeparatorOnNulls.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/strings/contains.pxd index b3b0f06efb5..585f2fac1ff 100644 --- a/python/pylibcudf/pylibcudf/strings/contains.pxd +++ b/python/pylibcudf/pylibcudf/strings/contains.pxd @@ -1,28 +1,27 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.strings.regex_program cimport RegexProgram from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column contains_re( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_re( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column matches_re( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column like( Column input, str pattern, str escape_character=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyi b/python/pylibcudf/pylibcudf/strings/contains.pyi index 3685cf5345a..b751ef0b24c 100644 --- a/python/pylibcudf/pylibcudf/strings/contains.pyi +++ b/python/pylibcudf/pylibcudf/strings/contains.pyi @@ -1,34 +1,34 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.utils import CudaStreamLike def contains_re( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_re( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def matches_re( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def like( input: Column, pattern: str, escape_character: str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx index 8fe74228854..495d1637d8a 100644 --- a/python/pylibcudf/pylibcudf/strings/contains.pyx +++ b/python/pylibcudf/pylibcudf/strings/contains.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -11,13 +11,14 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["contains_re", "count_re", "like", "matches_re"] cpdef Column contains_re( Column input, RegexProgram prog, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns a boolean column identifying rows which match the given @@ -39,24 +40,27 @@ cpdef Column contains_re( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + if _stream is None: + _stream = _get_stream(None) mr = _get_memory_resource(mr) with nogil: result = cpp_contains.contains_re( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column count_re( Column input, RegexProgram prog, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns the number of times the given regex_program's pattern @@ -78,24 +82,25 @@ cpdef Column count_re( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_contains.count_re( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column matches_re( Column input, RegexProgram prog, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns a boolean column identifying rows which @@ -118,25 +123,26 @@ cpdef Column matches_re( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_contains.matches_re( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column like( Column input, str pattern, str escape_character=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -161,7 +167,8 @@ cpdef Column like( New column of boolean results for each string """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if escape_character is None: @@ -175,9 +182,9 @@ cpdef Column like( input.view(), c_pattern, c_escape_character, - stream.view(), + _cs, mr.get_mr() ) - stream.synchronize() + _stream.synchronize() - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd index cc1206cf29b..0929544287f 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd @@ -1,20 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_booleans( - Column input, Scalar true_string, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar true_string, object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_booleans( Column booleans, Scalar true_string, Scalar false_string, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi index 608b47bad8c..10c7b96bfc0 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def to_booleans( input: Column, true_string: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_booleans( booleans: Column, true_string: Scalar, false_string: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx index 6f7965f8a3b..e8f963cf0f3 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -15,11 +15,12 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from cython.operator import dereference from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_booleans", "to_booleans"] cpdef Column to_booleans( - Column input, Scalar true_string, Stream stream=None, DeviceMemoryResource mr=None + Column input, Scalar true_string, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new bool column by parsing boolean values from the strings @@ -47,24 +48,25 @@ cpdef Column to_booleans( cdef const string_scalar* c_true_string = ( true_string.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_booleans.to_booleans( input.view(), dereference(c_true_string), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_booleans( Column booleans, Scalar true_string, Scalar false_string, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -99,7 +101,8 @@ cpdef Column from_booleans( cdef const string_scalar* c_false_string = ( false_string.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -107,8 +110,8 @@ cpdef Column from_booleans( booleans.view(), dereference(c_true_string), dereference(c_false_string), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd index 407eb06ce6a..d0a5d2fc829 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd @@ -1,18 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_timestamps( Column input, DataType timestamp_type, str format, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -20,13 +19,13 @@ cpdef Column from_timestamps( Column timestamps, str format, Column input_strings_names, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_timestamp( Column input, str format, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi index 5fdc863705d..99f067ecb04 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi @@ -1,29 +1,29 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_timestamps( input: Column, timestamp_type: DataType, format: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_timestamps( timestamps: Column, format: str, input_strings_names: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_timestamp( input: Column, format: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx index 07b35de7c54..633445a7383 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from pylibcudf.types import DataType +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_timestamps", "is_timestamp", "to_timestamps"] @@ -21,7 +22,7 @@ cpdef Column to_timestamps( Column input, DataType timestamp_type, str format, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -51,24 +52,25 @@ cpdef Column to_timestamps( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_datetime.to_timestamps( input.view(), timestamp_type.c_obj, c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_timestamps( Column timestamps, str format, Column input_strings_names, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -98,23 +100,24 @@ cpdef Column from_timestamps( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_datetime.from_timestamps( timestamps.view(), c_format, input_strings_names.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column is_timestamp( Column input, str format, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -141,14 +144,15 @@ cpdef Column is_timestamp( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_datetime.is_timestamp( input.view(), c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd index 62b372d0af4..a912d939a83 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd @@ -1,24 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_durations( Column input, DataType duration_type, str format, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_durations( Column durations, str format=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi index 95ba392ec94..ac9fd9825dc 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_durations( input: Column, duration_type: DataType, format: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_durations( durations: Column, format: str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx index 9bf8eb96009..548df7398b4 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from pylibcudf.types import DataType +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_durations", "to_durations"] @@ -21,7 +22,7 @@ cpdef Column to_durations( Column input, DataType duration_type, str format, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -51,7 +52,8 @@ cpdef Column to_durations( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -59,16 +61,16 @@ cpdef Column to_durations( input.view(), duration_type.c_obj, c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_durations( Column durations, str format=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -95,7 +97,8 @@ cpdef Column from_durations( New strings column with formatted durations. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if format is None: @@ -106,8 +109,8 @@ cpdef Column from_durations( c_result = cpp_convert_durations.from_durations( durations.view(), c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd index 046556db181..439f8884008 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd @@ -1,26 +1,25 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_fixed_point( Column input, DataType output_type, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_fixed_point( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_fixed_point( Column input, DataType decimal_type=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi index 7269f970069..a9d4a0eac98 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi @@ -1,26 +1,26 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_fixed_point( input: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_fixed_point( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_fixed_point( input: Column, decimal_type: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx index 13020a5ee73..059373790c5 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,12 +12,13 @@ from pylibcudf.types cimport DataType, type_id from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_fixed_point", "is_fixed_point", "to_fixed_point"] cpdef Column to_fixed_point( - Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new fixed-point column parsing decimal values from the @@ -42,21 +43,22 @@ cpdef Column to_fixed_point( New column of output_type. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_fixed_point.to_fixed_point( input.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_fixed_point( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting the fixed-point values @@ -78,20 +80,21 @@ cpdef Column from_fixed_point( New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_fixed_point.from_fixed_point( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column is_fixed_point( Column input, DataType decimal_type=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -118,7 +121,8 @@ cpdef Column is_fixed_point( New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if decimal_type is None: @@ -128,8 +132,8 @@ cpdef Column is_fixed_point( c_result = cpp_fixed_point.is_fixed_point( input.view(), decimal_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd index a2b98fa0b74..0d394fa1fe7 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd @@ -1,16 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_floats( - Column strings, DataType output_type, Stream stream=*, DeviceMemoryResource mr=* + Column strings, DataType output_type, object stream = *, DeviceMemoryResource mr=* ) -cpdef Column from_floats(Column floats, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column from_floats(Column floats, object stream = *, DeviceMemoryResource mr=*) -cpdef Column is_float(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column is_float(Column input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi index b5c8d7e7497..b334dfef9c7 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi @@ -1,25 +1,25 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_floats( strings: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_floats( floats: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_float( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx index 59ac17a3e1c..d4901ce7be6 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,13 +12,14 @@ from pylibcudf.types cimport DataType from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_floats", "is_float", "to_floats"] cpdef Column to_floats( Column strings, DataType output_type, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -44,22 +45,23 @@ cpdef Column to_floats( New column with floats converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_floats.to_floats( strings.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_floats( - Column floats, Stream stream=None, DeviceMemoryResource mr=None + Column floats, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting the float values from the @@ -81,18 +83,19 @@ cpdef Column from_floats( New strings column with floats as strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_floats.from_floats( - floats.view(), stream.view(), mr.get_mr() + floats.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_float(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_float(Column input, object stream=None, DeviceMemoryResource mr=None): """ Returns a boolean column identifying strings in which all characters are valid for conversion to floats. @@ -113,10 +116,13 @@ cpdef Column is_float(Column input, Stream stream=None, DeviceMemoryResource mr= New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_floats.is_float(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_floats.is_float( + input.view(), _cs, mr.get_mr() + ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd index 376081e9b20..059e8c31f19 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd @@ -1,32 +1,31 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_integers( - Column input, DataType output_type, Stream stream=*, DeviceMemoryResource mr=* + Column input, DataType output_type, object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_integers( - Column integers, Stream stream=*, DeviceMemoryResource mr=* + Column integers, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_integer( - Column input, DataType int_type=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, DataType int_type=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column hex_to_integers( - Column input, DataType output_type, Stream stream=*, DeviceMemoryResource mr=* + Column input, DataType output_type, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_hex( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column integers_to_hex( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi index 4625ee5e883..88a66350466 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi @@ -1,42 +1,42 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_integers( input: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_integers( integers: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_integer( input: Column, int_type: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def hex_to_integers( input: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_hex( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def integers_to_hex( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx index c5945e5e1e5..b717ddbbcda 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,6 +12,7 @@ from pylibcudf.types cimport DataType from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "from_integers", @@ -23,7 +24,7 @@ __all__ = [ ] cpdef Column to_integers( - Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new integer numeric column parsing integer values from the @@ -48,7 +49,8 @@ cpdef Column to_integers( New column with integers converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -56,16 +58,16 @@ cpdef Column to_integers( cpp_convert_integers.to_integers( input.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_integers( - Column integers, Stream stream=None, DeviceMemoryResource mr=None + Column integers, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting the integer values from the @@ -87,25 +89,26 @@ cpdef Column from_integers( New strings column with integers as strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = move( cpp_convert_integers.from_integers( integers.view(), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column is_integer( Column input, DataType int_type=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -133,7 +136,8 @@ cpdef Column is_integer( New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if int_type is None: @@ -141,7 +145,7 @@ cpdef Column is_integer( c_result = move( cpp_convert_integers.is_integer( input.view(), - stream.view(), + _cs, mr.get_mr() ) ) @@ -151,16 +155,16 @@ cpdef Column is_integer( cpp_convert_integers.is_integer( input.view(), int_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column hex_to_integers( - Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new integer numeric column parsing hexadecimal values @@ -185,7 +189,8 @@ cpdef Column hex_to_integers( New column with integers converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -193,15 +198,15 @@ cpdef Column hex_to_integers( cpp_convert_integers.hex_to_integers( input.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_hex(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_hex(Column input, object stream=None, DeviceMemoryResource mr=None): """ Returns a boolean column identifying strings in which all characters are valid for conversion to integers from hex. @@ -222,23 +227,24 @@ cpdef Column is_hex(Column input, Stream stream=None, DeviceMemoryResource mr=No New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = move( cpp_convert_integers.is_hex( input.view(), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column integers_to_hex( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting integer columns to hexadecimal @@ -260,16 +266,17 @@ cpdef Column integers_to_hex( New strings column with hexadecimal characters. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = move( cpp_convert_integers.integers_to_hex( input.view(), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd index 53a3927af41..04df2862c31 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd @@ -1,19 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column ipv4_to_integers( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column integers_to_ipv4( - Column integers, Stream stream=*, DeviceMemoryResource mr=* + Column integers, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_ipv4( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi index 86a969a4021..16e4d8d990a 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi @@ -1,23 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def ipv4_to_integers( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def integers_to_ipv4( integers: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_ipv4( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx index 72021e85a9d..45b98190aa7 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,11 +9,12 @@ from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4 from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["integers_to_ipv4", "ipv4_to_integers", "is_ipv4"] cpdef Column ipv4_to_integers( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Converts IPv4 addresses into integers. @@ -34,19 +35,20 @@ cpdef Column ipv4_to_integers( New uint32 column converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_ipv4.ipv4_to_integers( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column integers_to_ipv4( - Column integers, Stream stream=None, DeviceMemoryResource mr=None + Column integers, object stream=None, DeviceMemoryResource mr=None ): """ Converts integers into IPv4 addresses as strings. @@ -67,18 +69,19 @@ cpdef Column integers_to_ipv4( New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_ipv4.integers_to_ipv4( - integers.view(), stream.view(), mr.get_mr() + integers.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_ipv4(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_ipv4(Column input, object stream=None, DeviceMemoryResource mr=None): """ Returns a boolean column identifying strings in which all characters are valid for conversion to integers from IPv4 format. @@ -99,10 +102,11 @@ cpdef Column is_ipv4(Column input, Stream stream=None, DeviceMemoryResource mr=N New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_ipv4.is_ipv4(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_ipv4.is_ipv4(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd index a2dcc15dacd..c25cf9d7146 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd @@ -1,16 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column format_list_column( Column input, Scalar na_rep=*, Column separators=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi index cf301dd9a1b..29f94a30123 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi @@ -1,16 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def format_list_column( input: Column, na_rep: Scalar | None = None, separators: Column | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx index 79648efcc3f..9c8f9d7b02e 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -20,6 +20,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["format_list_column"] @@ -27,7 +28,7 @@ cpdef Column format_list_column( Column input, Scalar na_rep=None, Column separators=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -58,12 +59,13 @@ cpdef Column format_list_column( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if na_rep is None: na_rep = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_na_rep = ( @@ -78,8 +80,8 @@ cpdef Column format_list_column( input.view(), dereference(c_na_rep), separators.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd index dce44f5e547..56b1f803d38 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd @@ -1,15 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column url_encode( - Column Input, Stream stream=*, DeviceMemoryResource mr=* + Column Input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column url_decode( - Column Input, Stream stream=*, DeviceMemoryResource mr=* + Column Input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi index 6a248cdc974..8707da953b5 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def url_encode( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def url_decode( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx index 30ca51f27f7..efe009e6c02 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -10,10 +10,11 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["url_decode", "url_encode"] -cpdef Column url_encode(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column url_encode(Column input, object stream=None, DeviceMemoryResource mr=None): """ Encodes each string using URL encoding. @@ -33,16 +34,19 @@ cpdef Column url_encode(Column input, Stream stream=None, DeviceMemoryResource m New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_urls.url_encode(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_urls.url_encode( + input.view(), _cs, mr.get_mr() + ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column url_decode(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column url_decode(Column input, object stream=None, DeviceMemoryResource mr=None): """ Decodes each string using URL encoding. @@ -62,10 +66,13 @@ cpdef Column url_decode(Column input, Stream stream=None, DeviceMemoryResource m New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_urls.url_decode(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_urls.url_decode( + input.view(), _cs, mr.get_mr() + ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/strings/extract.pxd index c8fcb900d2b..85f722970c8 100644 --- a/python/pylibcudf/pylibcudf/strings/extract.pxd +++ b/python/pylibcudf/pylibcudf/strings/extract.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -6,21 +6,20 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.table cimport Table from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Table extract( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column extract_all_record( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column extract_single( Column input, RegexProgram prog, size_type group, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyi b/python/pylibcudf/pylibcudf/strings/extract.pyi index 853420a8091..a9607266bbc 100644 --- a/python/pylibcudf/pylibcudf/strings/extract.pyi +++ b/python/pylibcudf/pylibcudf/strings/extract.pyi @@ -1,29 +1,29 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.regex_program import RegexProgram from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def extract( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def extract_all_record( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def extract_single( input: Column, prog: RegexProgram, group: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyx b/python/pylibcudf/pylibcudf/strings/extract.pyx index bac20c2cd15..c670b226e84 100644 --- a/python/pylibcudf/pylibcudf/strings/extract.pyx +++ b/python/pylibcudf/pylibcudf/strings/extract.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -13,11 +13,12 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["extract", "extract_all_record", "extract_single"] cpdef Table extract( - Column input, RegexProgram prog, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram prog, object stream=None, DeviceMemoryResource mr=None ): """ Returns a table of strings columns where each column @@ -41,22 +42,23 @@ cpdef Table extract( Columns of strings extracted from the input column. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_extract.extract( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column extract_all_record( - Column input, RegexProgram prog, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram prog, object stream=None, DeviceMemoryResource mr=None ): """ Returns a lists column of strings where each string column @@ -80,25 +82,26 @@ cpdef Column extract_all_record( Lists column containing strings extracted from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_extract.extract_all_record( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column extract_single( Column input, RegexProgram prog, size_type group, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -124,7 +127,8 @@ cpdef Column extract_single( Column of strings extracted from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -132,8 +136,8 @@ cpdef Column extract_single( input.view(), prog.c_obj.get()[0], group, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/strings/find.pxd index 3ec32563c5a..1a04cf4eca2 100644 --- a/python/pylibcudf/pylibcudf/strings/find.pxd +++ b/python/pylibcudf/pylibcudf/strings/find.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -16,7 +15,7 @@ cpdef Column find( ColumnOrScalar target, size_type start=*, size_type stop=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -25,27 +24,27 @@ cpdef Column rfind( Scalar target, size_type start=*, size_type stop=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column contains( Column input, ColumnOrScalar target, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column starts_with( Column input, ColumnOrScalar target, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column ends_with( Column input, ColumnOrScalar target, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/find.pyi b/python/pylibcudf/pylibcudf/strings/find.pyi index a566fbdd72a..a8b3ca1da7c 100644 --- a/python/pylibcudf/pylibcudf/strings/find.pyi +++ b/python/pylibcudf/pylibcudf/strings/find.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def find( input: Column, target: Column | Scalar, start: int = 0, stop: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rfind( @@ -20,24 +20,24 @@ def rfind( target: Scalar, start: int = 0, stop: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains( input: Column, target: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def starts_with( input: Column, target: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def ends_with( input: Column, target: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/find.pyx b/python/pylibcudf/pylibcudf/strings/find.pyx index 7323a924342..102a8787651 100644 --- a/python/pylibcudf/pylibcudf/strings/find.pyx +++ b/python/pylibcudf/pylibcudf/strings/find.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -14,6 +14,7 @@ from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["contains", "ends_with", "find", "rfind", "starts_with"] @@ -22,7 +23,7 @@ cpdef Column find( ColumnOrScalar target, size_type start=0, size_type stop=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns a column of character position values where the target string is @@ -58,7 +59,8 @@ cpdef Column find( New integer column with character position values """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: with nogil: @@ -66,7 +68,7 @@ cpdef Column find( input.view(), target.view(), start, - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -76,13 +78,13 @@ cpdef Column find( dereference((target.c_obj.get())), start, stop, - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column rfind( @@ -90,7 +92,7 @@ cpdef Column rfind( Scalar target, size_type start=0, size_type stop=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -119,7 +121,8 @@ cpdef Column rfind( New integer column with character position values """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_find.rfind( @@ -127,16 +130,16 @@ cpdef Column rfind( dereference((target.c_obj.get())), start, stop, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column contains( Column input, ColumnOrScalar target, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -169,14 +172,15 @@ cpdef Column contains( New boolean column with True for each string that contains the target """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: with nogil: result = cpp_find.contains( input.view(), target.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -184,19 +188,19 @@ cpdef Column contains( result = cpp_find.contains( input.view(), dereference((target.c_obj.get())), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column starts_with( Column input, ColumnOrScalar target, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -229,7 +233,8 @@ cpdef Column starts_with( New boolean column with True for each string that starts with the target """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: @@ -237,7 +242,7 @@ cpdef Column starts_with( result = cpp_find.starts_with( input.view(), target.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -245,18 +250,18 @@ cpdef Column starts_with( result = cpp_find.starts_with( input.view(), dereference((target.c_obj.get())), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column ends_with( Column input, ColumnOrScalar target, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -288,14 +293,15 @@ cpdef Column ends_with( New boolean column with True for each string that ends with the target """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: with nogil: result = cpp_find.ends_with( input.view(), target.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -303,10 +309,10 @@ cpdef Column ends_with( result = cpp_find.ends_with( input.view(), dereference((target.c_obj.get())), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd index f6677607c5e..e01cb33fdb8 100644 --- a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd @@ -1,21 +1,20 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column find_multiple( Column input, Column targets, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Table contains_multiple( Column input, Column targets, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi index 48de0eac0e1..76115cd7496 100644 --- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi @@ -1,21 +1,21 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def find_multiple( input: Column, targets: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains_multiple( input: Column, targets: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx index e18b178f803..ed5f0d78506 100644 --- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -11,13 +11,14 @@ from pylibcudf.table cimport Table from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["find_multiple", "contains_multiple"] cpdef Column find_multiple( Column input, Column targets, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -41,24 +42,25 @@ cpdef Column find_multiple( Lists column with character position values """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_find_multiple.find_multiple( input.view(), targets.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table contains_multiple( Column input, Column targets, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -82,15 +84,16 @@ cpdef Table contains_multiple( Columns of booleans """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_find_multiple.contains_multiple( input.view(), targets.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/strings/findall.pxd index 2dc75fa6d34..ec7e01f7539 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/strings/findall.pxd @@ -1,15 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.strings.regex_program cimport RegexProgram from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column find_re( - Column input, RegexProgram pattern, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram pattern, object stream = *, DeviceMemoryResource mr=* ) cpdef Column findall( - Column input, RegexProgram pattern, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram pattern, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyi b/python/pylibcudf/pylibcudf/strings/findall.pyi index 5677a99d325..f72e786cf1d 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyi +++ b/python/pylibcudf/pylibcudf/strings/findall.pyi @@ -1,21 +1,21 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.utils import CudaStreamLike def find_re( input: Column, pattern: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def findall( input: Column, pattern: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 881664faced..5647a791ef1 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -10,11 +10,12 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["findall", "find_re"] cpdef Column findall( - Column input, RegexProgram pattern, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram pattern, object stream=None, DeviceMemoryResource mr=None ): """ Returns a lists column of strings for each matching occurrence using @@ -37,22 +38,23 @@ cpdef Column findall( New lists column of strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_findall.findall( input.view(), pattern.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column find_re( - Column input, RegexProgram pattern, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram pattern, object stream=None, DeviceMemoryResource mr=None ): """ Returns character positions where the pattern first matches @@ -75,15 +77,16 @@ cpdef Column find_re( New column of integers """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_findall.find_re( input.view(), pattern.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/strings/padding.pxd index 1dfbbd9950f..61dcaf7cba9 100644 --- a/python/pylibcudf/pylibcudf/strings/padding.pxd +++ b/python/pylibcudf/pylibcudf/strings/padding.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.strings.side_type cimport side_type from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column pad( @@ -14,14 +13,14 @@ cpdef Column pad( size_type width, side_type side, str fill_char, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column zfill( - Column input, size_type width, Stream stream=*, DeviceMemoryResource mr=* + Column input, size_type width, object stream = *, DeviceMemoryResource mr=* ) cpdef Column zfill_by_widths( - Column input, Column widths, Stream stream=*, DeviceMemoryResource mr=* + Column input, Column widths, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyi b/python/pylibcudf/pylibcudf/strings/padding.pyi index 26af5429acb..904b0022317 100644 --- a/python/pylibcudf/pylibcudf/strings/padding.pyi +++ b/python/pylibcudf/pylibcudf/strings/padding.pyi @@ -1,29 +1,29 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.side_type import SideType +from pylibcudf.utils import CudaStreamLike def pad( input: Column, width: int, side: SideType, fill_char: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def zfill( input: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def zfill_by_widths( input: Column, widths: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyx b/python/pylibcudf/pylibcudf/strings/padding.pyx index 9409970b075..d8eb4f1da4a 100644 --- a/python/pylibcudf/pylibcudf/strings/padding.pyx +++ b/python/pylibcudf/pylibcudf/strings/padding.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -10,6 +10,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["pad", "zfill", "zfill_by_widths"] @@ -18,7 +19,7 @@ cpdef Column pad( size_type width, side_type side, str fill_char, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -46,7 +47,8 @@ cpdef Column pad( """ cdef unique_ptr[column] c_result cdef string c_fill_char = fill_char.encode("utf-8") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -55,14 +57,14 @@ cpdef Column pad( width, side, c_fill_char, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column zfill( - Column input, size_type width, Stream stream=None, DeviceMemoryResource mr=None + Column input, size_type width, object stream=None, DeviceMemoryResource mr=None ): """ Add '0' as padding to the left of each string. @@ -84,21 +86,22 @@ cpdef Column zfill( New column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_padding.zfill( input.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column zfill_by_widths( - Column input, Column widths, Stream stream=None, DeviceMemoryResource mr=None + Column input, Column widths, object stream=None, DeviceMemoryResource mr=None ): """ Add '0' as padding to the left of each string. @@ -120,15 +123,16 @@ cpdef Column zfill_by_widths( New column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_padding.zfill_by_widths( input.view(), widths.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/strings/repeat.pxd index f1abe23ce59..60725aa688e 100644 --- a/python/pylibcudf/pylibcudf/strings/repeat.pxd +++ b/python/pylibcudf/pylibcudf/strings/repeat.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnorSizeType: Column @@ -13,6 +12,6 @@ ctypedef fused ColumnorSizeType: cpdef Column repeat_strings( Column input, ColumnorSizeType repeat_times, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyi b/python/pylibcudf/pylibcudf/strings/repeat.pyi index 5b47213e956..fedb7dee76c 100644 --- a/python/pylibcudf/pylibcudf/strings/repeat.pyi +++ b/python/pylibcudf/pylibcudf/strings/repeat.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def repeat_strings( input: Column, repeat_times: Column | int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyx b/python/pylibcudf/pylibcudf/strings/repeat.pyx index 84a305bf866..7a9c5285d02 100644 --- a/python/pylibcudf/pylibcudf/strings/repeat.pyx +++ b/python/pylibcudf/pylibcudf/strings/repeat.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -11,13 +11,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from ..utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["repeat_strings"] cpdef Column repeat_strings( Column input, ColumnorSizeType repeat_times, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -44,7 +45,8 @@ cpdef Column repeat_strings( New column containing the repeated strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnorSizeType is Column: @@ -52,7 +54,7 @@ cpdef Column repeat_strings( c_result = cpp_repeat.repeat_strings( input.view(), repeat_times.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnorSizeType is size_type: @@ -60,10 +62,10 @@ cpdef Column repeat_strings( c_result = cpp_repeat.repeat_strings( input.view(), repeat_times, - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError("repeat_times must be size_type or integer") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/strings/replace.pxd index a486869aada..aea2296b5f9 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pxd +++ b/python/pylibcudf/pylibcudf/strings/replace.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column replace( @@ -13,7 +12,7 @@ cpdef Column replace( Scalar target, Scalar repl, size_type maxrepl=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column replace_multiple( @@ -21,7 +20,7 @@ cpdef Column replace_multiple( Column target, Column repl, size_type maxrepl=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column replace_slice( @@ -29,6 +28,6 @@ cpdef Column replace_slice( Scalar repl=*, size_type start=*, size_type stop=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyi b/python/pylibcudf/pylibcudf/strings/replace.pyi index 3e62a76d2bf..0e76eb402f7 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pyi +++ b/python/pylibcudf/pylibcudf/strings/replace.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def replace( input: Column, target: Scalar, repl: Scalar, maxrepl: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def replace_multiple( @@ -20,7 +20,7 @@ def replace_multiple( target: Column, repl: Column, maxrepl: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def replace_slice( @@ -28,6 +28,6 @@ def replace_slice( repl: Scalar | None = None, start: int = 0, stop: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx index e1d88fed464..ccd6c924441 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["replace", "replace_multiple", "replace_slice"] @@ -27,7 +28,7 @@ cpdef Column replace( Scalar target, Scalar repl, size_type maxrepl=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replaces target string within each string with the specified replacement string. @@ -60,7 +61,8 @@ cpdef Column replace( target_str = (target.c_obj.get()) repl_str = (repl.c_obj.get()) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -69,11 +71,11 @@ cpdef Column replace( target_str[0], repl_str[0], maxrepl, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column replace_multiple( @@ -81,7 +83,7 @@ cpdef Column replace_multiple( Column target, Column repl, size_type maxrepl=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replaces target string within each string with the specified replacement string. @@ -109,7 +111,8 @@ cpdef Column replace_multiple( New string column with target replaced. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -117,11 +120,11 @@ cpdef Column replace_multiple( input.view(), target.view(), repl.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column replace_slice( @@ -131,7 +134,7 @@ cpdef Column replace_slice( Scalar repl=None, size_type start=0, size_type stop=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replaces each string in the column with the provided repl string @@ -162,12 +165,13 @@ cpdef Column replace_slice( New string column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if repl is None: repl = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* scalar_str = (repl.c_obj.get()) @@ -178,8 +182,8 @@ cpdef Column replace_slice( scalar_str[0], start, stop, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/strings/replace_re.pxd index fc833a61045..0d360f8de6f 100644 --- a/python/pylibcudf/pylibcudf/strings/replace_re.pxd +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -7,7 +7,6 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.strings.regex_flags cimport regex_flags from pylibcudf.strings.regex_program cimport RegexProgram from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused Replacement: Column @@ -24,7 +23,7 @@ cpdef Column replace_re( Replacement replacement=*, size_type max_replace_count=*, regex_flags flags=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -32,6 +31,6 @@ cpdef Column replace_with_backrefs( Column input, RegexProgram prog, str replacement, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyi b/python/pylibcudf/pylibcudf/strings/replace_re.pyi index 29f8ddfe925..64970928323 100644 --- a/python/pylibcudf/pylibcudf/strings/replace_re.pyi +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyi @@ -1,15 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import overload from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.regex_flags import RegexFlags from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.utils import CudaStreamLike @overload def replace_re( @@ -17,7 +17,7 @@ def replace_re( pattern: RegexProgram, replacement: Scalar, max_replace_count: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @overload @@ -27,13 +27,13 @@ def replace_re( replacement: Column, max_replace_count: int = -1, flags: RegexFlags = RegexFlags.DEFAULT, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def replace_with_backrefs( input: Column, prog: RegexProgram, replacement: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx index 1819dd0ba2b..60e9c4c1666 100644 --- a/python/pylibcudf/pylibcudf/strings/replace_re.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference from libcpp.memory cimport unique_ptr @@ -19,6 +19,7 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["replace_re", "replace_with_backrefs"] @@ -28,7 +29,7 @@ cpdef Column replace_re( Replacement replacement=None, size_type max_replace_count=-1, regex_flags flags=regex_flags.DEFAULT, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -64,13 +65,14 @@ cpdef Column replace_re( """ cdef unique_ptr[column] c_result cdef vector[string] c_patterns - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if Patterns is RegexProgram and Replacement is Scalar: if replacement is None: replacement = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = move( @@ -79,12 +81,12 @@ cpdef Column replace_re( patterns.c_obj.get()[0], dereference((replacement.get())), max_replace_count, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) elif Patterns is list and Replacement is Column: c_patterns.reserve(len(patterns)) for pattern in patterns: @@ -97,12 +99,12 @@ cpdef Column replace_re( c_patterns, replacement.view(), flags, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) else: raise TypeError("Must pass either a RegexProgram and a Scalar or a list") @@ -111,7 +113,7 @@ cpdef Column replace_with_backrefs( Column input, RegexProgram prog, str replacement, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -137,7 +139,8 @@ cpdef Column replace_with_backrefs( New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef string c_replacement = replacement.encode() @@ -146,8 +149,8 @@ cpdef Column replace_with_backrefs( input.view(), prog.c_obj.get()[0], c_replacement, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyi b/python/pylibcudf/pylibcudf/strings/reverse.pyi index 182f4768825..48c602e2d28 100644 --- a/python/pylibcudf/pylibcudf/strings/reverse.pyi +++ b/python/pylibcudf/pylibcudf/strings/reverse.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def reverse( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyx b/python/pylibcudf/pylibcudf/strings/reverse.pyx index 49792b5661b..f1d06248523 100644 --- a/python/pylibcudf/pylibcudf/strings/reverse.pyx +++ b/python/pylibcudf/pylibcudf/strings/reverse.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,10 +9,11 @@ from pylibcudf.libcudf.strings cimport reverse as cpp_reverse from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["reverse"] -cpdef Column reverse(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column reverse(Column input, object stream=None, DeviceMemoryResource mr=None): """Reverses the characters within each string. Any null string entries return corresponding null output column entries. @@ -32,9 +33,10 @@ cpdef Column reverse(Column input, Stream stream=None, DeviceMemoryResource mr=N New strings column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_reverse.reverse(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_reverse.reverse(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/slice.pxd b/python/pylibcudf/pylibcudf/strings/slice.pxd index 6bb5a8d3611..9612ead3108 100644 --- a/python/pylibcudf/pylibcudf/strings/slice.pxd +++ b/python/pylibcudf/pylibcudf/strings/slice.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -15,6 +14,6 @@ cpdef Column slice_strings( ColumnOrScalar start=*, ColumnOrScalar stop=*, Scalar step=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyi b/python/pylibcudf/pylibcudf/strings/slice.pyi index 73ee8c31b5b..ac2e4d12f1f 100644 --- a/python/pylibcudf/pylibcudf/strings/slice.pyi +++ b/python/pylibcudf/pylibcudf/strings/slice.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def slice_strings( input: Column, start: Column | Scalar | None = None, stop: Column | Scalar | None = None, step: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx index 2b5bbf2f621..b3ac2cd8bfe 100644 --- a/python/pylibcudf/pylibcudf/strings/slice.pyx +++ b/python/pylibcudf/pylibcudf/strings/slice.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -18,6 +18,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from ..utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["slice_strings"] @@ -26,7 +27,7 @@ cpdef Column slice_strings( ColumnOrScalar start=None, ColumnOrScalar stop=None, Scalar step=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a slice operation on a strings column. @@ -60,7 +61,8 @@ cpdef Column slice_strings( cdef numeric_scalar[size_type]* cpp_start cdef numeric_scalar[size_type]* cpp_stop cdef numeric_scalar[size_type]* cpp_step - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if input is None: @@ -80,22 +82,22 @@ cpdef Column slice_strings( input.view(), start.view(), stop.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: if start is None: start = Scalar.from_libcudf( - cpp_make_fixed_width_scalar(0, stream.view(), mr.get_mr()) + cpp_make_fixed_width_scalar(0, _stream.view().value(), mr.get_mr()) ) if stop is None: stop = Scalar.from_libcudf( - cpp_make_fixed_width_scalar(0, stream.view(), mr.get_mr()) + cpp_make_fixed_width_scalar(0, _stream.view().value(), mr.get_mr()) ) if step is None: step = Scalar.from_libcudf( - cpp_make_fixed_width_scalar(1, stream.view(), mr.get_mr()) + cpp_make_fixed_width_scalar(1, _stream.view().value(), mr.get_mr()) ) cpp_start = start.c_obj.get() @@ -108,10 +110,10 @@ cpdef Column slice_strings( dereference(cpp_start), dereference(cpp_stop), dereference(cpp_step), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError("start, stop, and step must be either Column or Scalar") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/strings/split/partition.pxd index d8001682b32..e3da533c90c 100644 --- a/python/pylibcudf/pylibcudf/strings/split/partition.pxd +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pxd @@ -1,17 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Table partition( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Table rpartition( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyi b/python/pylibcudf/pylibcudf/strings/split/partition.pyi index d919b68153c..cef2d16aea6 100644 --- a/python/pylibcudf/pylibcudf/strings/split/partition.pyi +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def partition( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rpartition( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx index 728d7b9975d..ce813c10bba 100644 --- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -16,13 +16,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["partition", "rpartition"] cpdef Table partition( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -46,12 +47,13 @@ cpdef Table partition( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_delimiter = ( @@ -62,16 +64,16 @@ cpdef Table partition( c_result = cpp_partition.partition( input.view(), dereference(c_delimiter), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table rpartition( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -95,12 +97,13 @@ cpdef Table rpartition( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_delimiter = ( @@ -111,8 +114,8 @@ cpdef Table rpartition( c_result = cpp_partition.rpartition( input.view(), dereference(c_delimiter), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/strings/split/split.pxd index 06b77154b18..2372a177944 100644 --- a/python/pylibcudf/pylibcudf/strings/split/split.pxd +++ b/python/pylibcudf/pylibcudf/strings/split/split.pxd @@ -7,50 +7,49 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Table split( - Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings_column, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Table rsplit( - Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings_column, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column split_record( - Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column rsplit_record( - Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Table split_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Table rsplit_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column split_record_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column rsplit_record_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column split_part( - Column input, Scalar delimiter, size_type index, Stream stream=*, + Column input, Scalar delimiter, size_type index, object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyi b/python/pylibcudf/pylibcudf/strings/split/split.pyi index ae64e300b63..7a775bd960c 100644 --- a/python/pylibcudf/pylibcudf/strings/split/split.pyi +++ b/python/pylibcudf/pylibcudf/strings/split/split.pyi @@ -2,73 +2,73 @@ # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.regex_program import RegexProgram from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def split( strings_column: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rsplit( strings_column: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def split_record( strings: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rsplit_record( strings: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def split_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rsplit_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def split_record_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rsplit_record_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def split_part( input: Column, delimiter: Scalar, index: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyx b/python/pylibcudf/pylibcudf/strings/split/split.pyx index 0635df87e13..52803b08eb0 100644 --- a/python/pylibcudf/pylibcudf/strings/split/split.pyx +++ b/python/pylibcudf/pylibcudf/strings/split/split.pyx @@ -16,6 +16,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "rsplit", @@ -32,7 +33,7 @@ cpdef Table split( Column strings_column, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -65,7 +66,8 @@ cpdef Table split( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -73,18 +75,18 @@ cpdef Table split( strings_column.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table rsplit( Column strings_column, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -117,7 +119,8 @@ cpdef Table rsplit( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -125,17 +128,17 @@ cpdef Table rsplit( strings_column.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column split_record( Column strings, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -164,7 +167,8 @@ cpdef Column split_record( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -172,18 +176,18 @@ cpdef Column split_record( strings.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column rsplit_record( Column strings, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -213,7 +217,8 @@ cpdef Column rsplit_record( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -221,18 +226,18 @@ cpdef Column rsplit_record( strings.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table split_re( Column input, RegexProgram prog, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -259,7 +264,8 @@ cpdef Table split_re( A table of columns of strings. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -267,17 +273,17 @@ cpdef Table split_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table rsplit_re( Column input, RegexProgram prog, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -305,7 +311,8 @@ cpdef Table rsplit_re( A table of columns of strings. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -313,17 +320,17 @@ cpdef Table rsplit_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column split_record_re( Column input, RegexProgram prog, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -350,7 +357,8 @@ cpdef Column split_record_re( Lists column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -358,14 +366,14 @@ cpdef Column split_record_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column rsplit_record_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=None, + Column input, RegexProgram prog, size_type maxsplit, object stream=None, DeviceMemoryResource mr=None, ): """ @@ -392,7 +400,8 @@ cpdef Column rsplit_record_re( Lists column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -400,22 +409,23 @@ cpdef Column rsplit_record_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column split_part( - Column input, Scalar delimiter, size_type index, Stream stream=None, + Column input, Scalar delimiter, size_type index, object stream=None, DeviceMemoryResource mr=None, ): cdef unique_ptr[column] c_result cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -423,8 +433,8 @@ cpdef Column split_part( input.view(), dereference(c_delimiter), index, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/strings/strip.pxd index d3f41ce9a5c..a37ac40c523 100644 --- a/python/pylibcudf/pylibcudf/strings/strip.pxd +++ b/python/pylibcudf/pylibcudf/strings/strip.pxd @@ -1,17 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from pylibcudf.strings.side_type cimport side_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column strip( Column input, side_type side=*, Scalar to_strip=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyi b/python/pylibcudf/pylibcudf/strings/strip.pyi index ecb80b632d7..786079769c7 100644 --- a/python/pylibcudf/pylibcudf/strings/strip.pyi +++ b/python/pylibcudf/pylibcudf/strings/strip.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.side_type import SideType +from pylibcudf.utils import CudaStreamLike def strip( input: Column, side: SideType = SideType.BOTH, to_strip: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyx b/python/pylibcudf/pylibcudf/strings/strip.pyx index 3b477fa83ad..607428b6f69 100644 --- a/python/pylibcudf/pylibcudf/strings/strip.pyx +++ b/python/pylibcudf/pylibcudf/strings/strip.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -16,6 +16,7 @@ from pylibcudf.strings.side_type cimport side_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["strip"] @@ -23,7 +24,7 @@ cpdef Column strip( Column input, side_type side=side_type.BOTH, Scalar to_strip=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Removes the specified characters from the beginning @@ -47,12 +48,13 @@ cpdef Column strip( pylibcudf.Column New strings column. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if to_strip is None: to_strip = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef unique_ptr[column] c_result @@ -64,8 +66,8 @@ cpdef Column strip( input.view(), side, dereference(cpp_to_strip), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/strings/translate.pxd index 2d74e2f4a2c..d6a80ddfd43 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pxd +++ b/python/pylibcudf/pylibcudf/strings/translate.pxd @@ -1,14 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.strings.translate cimport filter_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column translate( - Column input, dict chars_table, Stream stream=*, DeviceMemoryResource mr=* + Column input, dict chars_table, object stream = *, DeviceMemoryResource mr=* ) cpdef Column filter_characters( @@ -16,6 +15,6 @@ cpdef Column filter_characters( dict characters_to_filter, filter_type keep_characters, Scalar replacement, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi index a01b786fd6f..9e7624e0b17 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pyi +++ b/python/pylibcudf/pylibcudf/strings/translate.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from collections.abc import Mapping from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class FilterType(IntEnum): KEEP = ... @@ -16,7 +16,7 @@ class FilterType(IntEnum): def translate( input: Column, chars_table: Mapping[int | str, int | str], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_characters( @@ -24,6 +24,6 @@ def filter_characters( characters_to_filter: Mapping[int | str, int | str], keep_characters: FilterType, replacement: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyx b/python/pylibcudf/pylibcudf/strings/translate.pyx index 06c772330df..2a60ff881d4 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pyx +++ b/python/pylibcudf/pylibcudf/strings/translate.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair @@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.libcudf.strings.translate import \ filter_type as FilterType # no-cython-lint @@ -43,7 +44,7 @@ cdef vector[pair[char_utf8, char_utf8]] _table_to_c_table(dict table): cpdef Column translate( - Column input, dict chars_table, Stream stream=None, DeviceMemoryResource mr=None + Column input, dict chars_table, object stream=None, DeviceMemoryResource mr=None ): """ Translates individual characters within each string. @@ -69,17 +70,18 @@ cpdef Column translate( cdef vector[pair[char_utf8, char_utf8]] c_chars_table = _table_to_c_table( chars_table ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_translate.translate( input.view(), c_chars_table, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column filter_characters( @@ -87,7 +89,7 @@ cpdef Column filter_characters( dict characters_to_filter, filter_type keep_characters, Scalar replacement, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -124,7 +126,8 @@ cpdef Column filter_characters( cdef const string_scalar* c_replacement = ( replacement.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -133,9 +136,9 @@ cpdef Column filter_characters( c_characters_to_filter, keep_characters, dereference(c_replacement), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) FilterType.__str__ = FilterType.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/strings/wrap.pxd index 62faaff36f0..ea74927498d 100644 --- a/python/pylibcudf/pylibcudf/strings/wrap.pxd +++ b/python/pylibcudf/pylibcudf/strings/wrap.pxd @@ -1,12 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column wrap( - Column input, size_type width, Stream stream=*, DeviceMemoryResource mr=* + Column input, size_type width, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyi b/python/pylibcudf/pylibcudf/strings/wrap.pyi index 00c939cc420..aa88b64a391 100644 --- a/python/pylibcudf/pylibcudf/strings/wrap.pyi +++ b/python/pylibcudf/pylibcudf/strings/wrap.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def wrap( input: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyx b/python/pylibcudf/pylibcudf/strings/wrap.pyx index 504c469debc..28bc310b5a4 100644 --- a/python/pylibcudf/pylibcudf/strings/wrap.pyx +++ b/python/pylibcudf/pylibcudf/strings/wrap.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -10,11 +10,12 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["wrap"] cpdef Column wrap( - Column input, size_type width, Stream stream=None, DeviceMemoryResource mr=None + Column input, size_type width, object stream=None, DeviceMemoryResource mr=None ): """ Wraps strings onto multiple lines shorter than `width` by @@ -41,15 +42,16 @@ cpdef Column wrap( Column of wrapped strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_wrap.wrap( input.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/table.pxd b/python/pylibcudf/pylibcudf/table.pxd index 4a4a963e0de..76c38dacf3f 100644 --- a/python/pylibcudf/pylibcudf/table.pxd +++ b/python/pylibcudf/pylibcudf/table.pxd @@ -4,7 +4,6 @@ from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cdef class Table: @@ -20,7 +19,7 @@ cdef class Table: @staticmethod cdef Table from_libcudf( unique_ptr[table] libcudf_tbl, - Stream stream, + object stream, DeviceMemoryResource mr ) @@ -31,8 +30,8 @@ cdef class Table: cdef Table from_table_view_of_arbitrary( const table_view& tv, object owner, - Stream stream, + object stream, ) cpdef list columns(self) - cpdef Table copy(self, Stream stream=*, DeviceMemoryResource mr=*) + cpdef Table copy(self, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi index 0f8de52b132..263bf813c75 100644 --- a/python/pylibcudf/pylibcudf/table.pyi +++ b/python/pylibcudf/pylibcudf/table.pyi @@ -4,11 +4,11 @@ from typing import Any from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf._interop_helpers import ArrowLike, ColumnMetadata from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class Table: def __init__(self, column: list[Column]): ... @@ -18,22 +18,22 @@ class Table: def columns(self) -> list[Column]: ... def copy( self, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def to_arrow( self, metadata: list[ColumnMetadata | str] | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> ArrowLike: ... # Private methods below are included because polars is currently using them, # but we want to remove stubs for these private methods eventually def _to_schema(self, metadata: Any = None) -> Any: ... - def _to_host_array(self, stream: Stream) -> Any: ... + def _to_host_array(self, stream: CudaStreamLike) -> Any: ... @staticmethod def from_arrow( arrow_like: ArrowLike, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx index 654cf9bb60b..6b62a5428f9 100644 --- a/python/pylibcudf/pylibcudf/table.pyx +++ b/python/pylibcudf/pylibcudf/table.pyx @@ -39,6 +39,7 @@ from pylibcudf._interop_helpers cimport ( _metadata_to_libcudf, ) from ._interop_helpers import ArrowLike, ColumnMetadata, _ObjectWithArrowMetadata +from cuda.bindings.cyruntime cimport cudaStream_t try: import pyarrow as pa @@ -105,7 +106,7 @@ cdef class Table: def from_arrow( obj: ArrowLike, dtype: DataType | None = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ) -> Table: """ @@ -154,7 +155,8 @@ cdef class Table: cdef _ArrowTableHolder result cdef unique_ptr[arrow_table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if hasattr(obj, "__arrow_c_device_array__"): @@ -170,7 +172,7 @@ cdef class Table: c_result = make_unique[arrow_table]( move(dereference(c_schema)), move(dereference(c_array)), - stream.view(), + _cs, result.mr.get_mr(), ) result.tbl.swap(c_result) @@ -193,7 +195,7 @@ cdef class Table: with nogil: c_result = make_unique[arrow_table]( move(dereference(c_stream)), - stream.view(), + _cs, result.mr.get_mr(), ) result.tbl.swap(c_result) @@ -233,7 +235,7 @@ cdef class Table: @staticmethod cdef Table from_libcudf( unique_ptr[table] libcudf_tbl, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Table from a libcudf table. @@ -275,7 +277,7 @@ cdef class Table: cdef Table from_table_view_of_arbitrary( const table_view& tv, object owner, - Stream stream, + object stream, ): """Create a Table from a libcudf table_view into an arbitrary owner. @@ -292,8 +294,9 @@ cdef class Table: # For efficiency, prohibit calling this overload with a Table owner. assert not isinstance(owner, Table) cdef int i + cdef Stream _stream = stream return Table([ - Column.from_column_view_of_arbitrary(tv.column(i), owner, stream) + Column.from_column_view_of_arbitrary(tv.column(i), owner, _stream) for i in range(tv.num_columns()) ]) @@ -315,7 +318,7 @@ cdef class Table: """The shape of this table""" return (self.num_rows(), self.num_columns()) - cpdef Table copy(self, Stream stream=None, DeviceMemoryResource mr=None): + cpdef Table copy(self, object stream=None, DeviceMemoryResource mr=None): """Create a deep copy of the table. Parameters @@ -330,9 +333,9 @@ cdef class Table: Table A new Table with deep copies of all columns. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) - return Table([col.copy(stream, mr) for col in self._columns]) + return Table([col.copy(_stream, mr) for col in self._columns]) def _to_schema(self, metadata=None): """Create an Arrow schema from this table.""" @@ -356,11 +359,13 @@ cdef class Table: return PyCapsule_New(raw_schema_ptr, "arrow_schema", _release_schema) - def _to_host_array(self, Stream stream): + def _to_host_array(self, object stream): cdef ArrowArray* raw_host_array_ptr + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: - raw_host_array_ptr = to_arrow_host_raw(self.view(), stream.view()) + raw_host_array_ptr = to_arrow_host_raw(self.view(), _cs) return PyCapsule_New(raw_host_array_ptr, "arrow_array", _release_array) diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd index a92ffb3f27e..8333abd6df0 100644 --- a/python/pylibcudf/pylibcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/transform.pxd @@ -3,7 +3,6 @@ from libcpp cimport bool from pylibcudf.libcudf.types cimport bitmask_type, data_type from pylibcudf.libcudf.types cimport null_aware, output_nullability -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -14,30 +13,30 @@ from .types cimport DataType cpdef tuple[gpumemoryview, int] nans_to_nulls( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column column_nans_to_nulls( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column compute_column( - Table input, Expression expr, Stream stream = *, DeviceMemoryResource mr = * + Table input, Expression expr, object stream = *, DeviceMemoryResource mr = * ) cpdef Column compute_column_jit( - Table input, Expression expr, Stream stream = *, DeviceMemoryResource mr = * + Table input, Expression expr, object stream = *, DeviceMemoryResource mr = * ) cpdef tuple[gpumemoryview, int] bools_to_mask( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column mask_to_bools( Py_ssize_t bitmask, int begin_bit, int end_bit, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -48,17 +47,17 @@ cpdef Column transform( bool is_ptx, null_aware is_null_aware, output_nullability null_policy, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef tuple[Table, Column] encode( - Table input, Stream stream = *, DeviceMemoryResource mr = * + Table input, object stream = *, DeviceMemoryResource mr = * ) cpdef Table one_hot_encode( Column input_column, Column categories, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi index 2d2038f07a0..e979575f590 100644 --- a/python/pylibcudf/pylibcudf/transform.pyi +++ b/python/pylibcudf/pylibcudf/transform.pyi @@ -1,46 +1,46 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.expressions import Expression from pylibcudf.gpumemoryview import gpumemoryview from pylibcudf.table import Table from pylibcudf.types import DataType, NullAware, OutputNullability +from pylibcudf.utils import CudaStreamLike def nans_to_nulls( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[gpumemoryview, int]: ... def column_nans_to_nulls( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def compute_column( input: Table, expr: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def compute_column_jit( input: Table, expr: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def bools_to_mask( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[gpumemoryview, int]: ... def mask_to_bools( bitmask: int, begin_bit: int, end_bit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def transform( @@ -50,17 +50,17 @@ def transform( is_ptx: bool, null_aware: NullAware = NullAware.NO, null_policy: OutputNullability = OutputNullability.PRESERVE, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def encode( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, Column]: ... def one_hot_encode( input: Column, categories: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx index 3baf6c5306e..0025ed7d566 100644 --- a/python/pylibcudf/pylibcudf/transform.pyx +++ b/python/pylibcudf/pylibcudf/transform.pyx @@ -26,6 +26,7 @@ from .expressions cimport Expression from .gpumemoryview cimport gpumemoryview from .types cimport DataType, null_aware, output_nullability from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "bools_to_mask", @@ -41,7 +42,7 @@ __all__ = [ cpdef tuple[gpumemoryview, int] nans_to_nulls( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a null mask preserving existing nulls and converting nans to null. @@ -63,21 +64,26 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls( """ cdef pair[unique_ptr[device_buffer], size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_transform.nans_to_nulls(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_transform.nans_to_nulls( + input.view(), _cs, mr.get_mr() + ) return ( - gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream, mr)), + gpumemoryview( + DeviceBuffer.c_from_unique_ptr(move(c_result.first), _stream, mr) + ), c_result.second ) cpdef Column column_nans_to_nulls( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column with nans converted to nulls. @@ -100,19 +106,20 @@ cpdef Column column_nans_to_nulls( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.column_nans_to_nulls( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column compute_column( - Table input, Expression expr, Stream stream=None, DeviceMemoryResource mr=None + Table input, Expression expr, object stream=None, DeviceMemoryResource mr=None ): """Create a column by evaluating an expression on a table. @@ -135,19 +142,20 @@ cpdef Column compute_column( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.compute_column( - input.view(), dereference(expr.c_obj.get()), stream.view(), mr.get_mr() + input.view(), dereference(expr.c_obj.get()), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column compute_column_jit( - Table input, Expression expr, Stream stream=None, DeviceMemoryResource mr=None + Table input, Expression expr, object stream=None, DeviceMemoryResource mr=None ): """ Create a column by evaluating an expression on a table @@ -172,20 +180,21 @@ cpdef Column compute_column_jit( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.compute_column_jit( - input.view(), dereference(expr.c_obj.get()), stream.view(), mr.get_mr() + input.view(), dereference(expr.c_obj.get()), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef tuple[gpumemoryview, int] bools_to_mask( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a bitmask from a column of boolean elements @@ -206,14 +215,19 @@ cpdef tuple[gpumemoryview, int] bools_to_mask( """ cdef pair[unique_ptr[device_buffer], size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_transform.bools_to_mask(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_transform.bools_to_mask( + input.view(), _cs, mr.get_mr() + ) return ( - gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream, mr)), + gpumemoryview( + DeviceBuffer.c_from_unique_ptr(move(c_result.first), _stream, mr) + ), c_result.second ) @@ -222,7 +236,7 @@ cpdef Column mask_to_bools( Py_ssize_t bitmask, int begin_bit, int end_bit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates a boolean column from given bitmask. @@ -248,7 +262,8 @@ cpdef Column mask_to_bools( cdef unique_ptr[column] c_result cdef bitmask_type * bitmask_ptr = bitmask - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -256,11 +271,11 @@ cpdef Column mask_to_bools( bitmask_ptr, begin_bit, end_bit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column transform( @@ -270,7 +285,7 @@ cpdef Column transform( bool is_ptx, null_aware is_null_aware, output_nullability null_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a new column by applying a transform function against @@ -312,7 +327,8 @@ cpdef Column transform( cdef output_nullability c_null_policy = null_policy cdef optional[void *] user_data - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) for input in inputs: @@ -327,14 +343,14 @@ cpdef Column transform( user_data, c_is_null_aware, c_null_policy, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef tuple[Table, Column] encode( - Table input, Stream stream=None, DeviceMemoryResource mr=None + Table input, object stream=None, DeviceMemoryResource mr=None ): """Encode the rows of the given table as integers. @@ -355,21 +371,22 @@ cpdef tuple[Table, Column] encode( """ cdef pair[unique_ptr[table], unique_ptr[column]] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_transform.encode(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_transform.encode(input.view(), _cs, mr.get_mr()) return ( - Table.from_libcudf(move(c_result.first), stream, mr), - Column.from_libcudf(move(c_result.second), stream, mr) + Table.from_libcudf(move(c_result.first), _stream, mr), + Column.from_libcudf(move(c_result.second), _stream, mr) ) cpdef Table one_hot_encode( Column input, Column categories, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Encodes `input` by generating a new column @@ -395,19 +412,20 @@ cpdef Table one_hot_encode( cdef pair[unique_ptr[column], table_view] c_result cdef Table owner_table - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.one_hot_encode( input.view(), categories.view(), - stream.view(), + _cs, mr.get_mr() ) owner_table = Table( - [Column.from_libcudf(move(c_result.first), stream, mr)] + [Column.from_libcudf(move(c_result.first), _stream, mr)] * c_result.second.num_columns() ) diff --git a/python/pylibcudf/pylibcudf/transpose.pxd b/python/pylibcudf/pylibcudf/transpose.pxd index 6c432a62b5f..a63d52da9e1 100644 --- a/python/pylibcudf/pylibcudf/transpose.pxd +++ b/python/pylibcudf/pylibcudf/transpose.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from .table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -cpdef Table transpose(Table input_table, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Table transpose(Table input_table, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/transpose.pyi b/python/pylibcudf/pylibcudf/transpose.pyi index 4487e49feaf..fbf2d3fce2d 100644 --- a/python/pylibcudf/pylibcudf/transpose.pyi +++ b/python/pylibcudf/pylibcudf/transpose.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def transpose( input_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx index e7cdbe503eb..e15aa45ce77 100644 --- a/python/pylibcudf/pylibcudf/transpose.pyx +++ b/python/pylibcudf/pylibcudf/transpose.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair @@ -13,11 +13,12 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["transpose"] cpdef Table transpose( - Table input_table, Stream stream=None, DeviceMemoryResource mr=None + Table input_table, object stream=None, DeviceMemoryResource mr=None ): """Transpose a Table. @@ -39,16 +40,17 @@ cpdef Table transpose( """ cdef pair[unique_ptr[column], table_view] c_result cdef Table owner_table - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transpose.transpose( - input_table.view(), stream.view(), mr.get_mr() + input_table.view(), _cs, mr.get_mr() ) owner_table = Table( - [Column.from_libcudf(move(c_result.first), stream, mr)] * + [Column.from_libcudf(move(c_result.first), _stream, mr)] * c_result.second.num_columns() ) diff --git a/python/pylibcudf/pylibcudf/unary.pxd b/python/pylibcudf/pylibcudf/unary.pxd index 69ec06ecea6..44a4f796085 100644 --- a/python/pylibcudf/pylibcudf/unary.pxd +++ b/python/pylibcudf/pylibcudf/unary.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.unary cimport unary_operator -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -11,19 +10,19 @@ from .types cimport DataType cpdef Column unary_operation( - Column input, unary_operator op, Stream stream = *, DeviceMemoryResource mr = * + Column input, unary_operator op, object stream = *, DeviceMemoryResource mr = * ) -cpdef Column is_null(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_null(Column input, object stream = *, DeviceMemoryResource mr = *) -cpdef Column is_valid(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_valid(Column input, object stream = *, DeviceMemoryResource mr = *) cpdef Column cast( - Column input, DataType data_type, Stream stream = *, DeviceMemoryResource mr = * + Column input, DataType data_type, object stream = *, DeviceMemoryResource mr = * ) -cpdef Column is_nan(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_nan(Column input, object stream = *, DeviceMemoryResource mr = *) -cpdef Column is_not_nan(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_not_nan(Column input, object stream = *, DeviceMemoryResource mr = *) cpdef bool is_supported_cast(DataType from_, DataType to) diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi index 6a77f7998b9..dd3d42404e7 100644 --- a/python/pylibcudf/pylibcudf/unary.pyi +++ b/python/pylibcudf/pylibcudf/unary.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class UnaryOperator(IntEnum): SIN = ... @@ -38,33 +38,33 @@ class UnaryOperator(IntEnum): def unary_operation( input: Column, op: UnaryOperator, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_null( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_valid( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def cast( input: Column, data_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_nan( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_not_nan( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_supported_cast(from_: DataType, to: DataType) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx index da5b08df685..e0614037012 100644 --- a/python/pylibcudf/pylibcudf/unary.pyx +++ b/python/pylibcudf/pylibcudf/unary.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -16,6 +16,7 @@ from pylibcudf.libcudf.unary import \ from .column cimport Column from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "UnaryOperator", @@ -29,7 +30,7 @@ __all__ = [ ] cpdef Column unary_operation( - Column input, unary_operator op, Stream stream=None, DeviceMemoryResource mr=None + Column input, unary_operator op, object stream=None, DeviceMemoryResource mr=None ): """Perform a unary operation on a column. @@ -53,16 +54,19 @@ cpdef Column unary_operation( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.unary_operation(input.view(), op, stream.view(), mr.get_mr()) + result = cpp_unary.unary_operation( + input.view(), op, _cs, mr.get_mr() + ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_null(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_null(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are null. For details, see :cpp:func:`is_null`. @@ -83,16 +87,17 @@ cpdef Column is_null(Column input, Stream stream=None, DeviceMemoryResource mr=N """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_null(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_null(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_valid(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_valid(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are valid. For details, see :cpp:func:`is_valid`. @@ -113,17 +118,18 @@ cpdef Column is_valid(Column input, Stream stream=None, DeviceMemoryResource mr= """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_valid(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_valid(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column cast( - Column input, DataType data_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType data_type, object stream=None, DeviceMemoryResource mr=None ): """Cast a column to a different data type. @@ -147,18 +153,19 @@ cpdef Column cast( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_unary.cast( - input.view(), data_type.c_obj, stream.view(), mr.get_mr() + input.view(), data_type.c_obj, _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_nan(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_nan(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are nan. For details, see :cpp:func:`is_nan`. @@ -179,16 +186,17 @@ cpdef Column is_nan(Column input, Stream stream=None, DeviceMemoryResource mr=No """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_nan(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_nan(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_not_nan(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_not_nan(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are not nan. For details, see :cpp:func:`is_not_nan`. @@ -209,13 +217,14 @@ cpdef Column is_not_nan(Column input, Stream stream=None, DeviceMemoryResource m """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_not_nan(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_not_nan(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef bool is_supported_cast(DataType from_, DataType to): """Check if a cast between datatypes is supported. diff --git a/python/pylibcudf/pylibcudf/utils.pxd b/python/pylibcudf/pylibcudf/utils.pxd index b3d2928f398..feb82cea18f 100644 --- a/python/pylibcudf/pylibcudf/utils.pxd +++ b/python/pylibcudf/pylibcudf/utils.pxd @@ -1,12 +1,12 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.functional cimport reference_wrapper from libcpp.vector cimport vector from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource +from rmm.pylibrmm.stream cimport Stream cdef vector[reference_wrapper[const scalar]] _as_vector(list source) -cpdef Stream _get_stream(Stream stream = *) +cpdef Stream _get_stream(object stream = *) cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = *) diff --git a/python/pylibcudf/pylibcudf/utils.pyi b/python/pylibcudf/pylibcudf/utils.pyi index 21f669898ba..cc3cb93e6c0 100644 --- a/python/pylibcudf/pylibcudf/utils.pyi +++ b/python/pylibcudf/pylibcudf/utils.pyi @@ -1,6 +1,13 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from typing import Protocol + from rmm.pylibrmm.stream import Stream -def _get_stream(stream: Stream | None = None) -> Stream: ... +class HasCudaStream(Protocol): + def __cuda_stream__(self) -> tuple[int, int]: ... + +CudaStreamLike = Stream | HasCudaStream + +def _get_stream(stream: CudaStreamLike | None = None) -> Stream: ... diff --git a/python/pylibcudf/pylibcudf/utils.pyx b/python/pylibcudf/pylibcudf/utils.pyx index 70460e19481..314e62f7760 100644 --- a/python/pylibcudf/pylibcudf/utils.pyx +++ b/python/pylibcudf/pylibcudf/utils.pyx @@ -47,10 +47,12 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source): return c_scalars -cpdef Stream _get_stream(Stream stream = None): +cpdef Stream _get_stream(object stream = None): if stream is None: return CUDF_DEFAULT_STREAM - return stream + if isinstance(stream, Stream): + return stream + return Stream(stream) # Handles __cuda_stream__ protocol cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = None): diff --git a/python/pylibcudf/tests/test_experimental.py b/python/pylibcudf/tests/test_experimental.py index eaf06ff62ae..ed180e8db29 100644 --- a/python/pylibcudf/tests/test_experimental.py +++ b/python/pylibcudf/tests/test_experimental.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest @@ -21,6 +21,7 @@ def test_join_streams(streams: list[Stream], stream: Stream): plc.experimental.join_streams(streams, stream) +@pytest.mark.uses_custom_stream def test_join_streams_type_error(): """Test that join_streams raises appropriate errors for invalid inputs.""" main_stream = Stream() @@ -29,16 +30,10 @@ def test_join_streams_type_error(): with pytest.raises(TypeError): plc.experimental.join_streams(None, main_stream) - # Test with non-Stream in list - with pytest.raises( - TypeError, - match="Cannot convert NoneType to rmm.pylibrmm.stream.Stream", - ): - plc.experimental.join_streams([None], main_stream) - - # Test with non-Stream as main stream - with pytest.raises( - TypeError, - match="Cannot convert NoneType to rmm.pylibrmm.stream.Stream", - ): - plc.experimental.join_streams([Stream()], None) + # Protocol stream should be accepted + class _CudaStreamProto: + def __cuda_stream__(self): + return (0, 0) + + plc.experimental.join_streams([_CudaStreamProto()], main_stream) + plc.experimental.join_streams([Stream()], _CudaStreamProto()) diff --git a/python/pylibcudf/tests/test_stream_protocol.py b/python/pylibcudf/tests/test_stream_protocol.py new file mode 100644 index 00000000000..075c49bd0b3 --- /dev/null +++ b/python/pylibcudf/tests/test_stream_protocol.py @@ -0,0 +1,74 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +import pyarrow as pa +import pytest + +from rmm.pylibrmm.stream import Stream + +import pylibcudf as plc + + +class _CudaStreamProto: + """Minimal __cuda_stream__ protocol object for testing.""" + + def __cuda_stream__(self): + return (0, 0) + + +def test_get_stream_none(): + stream = plc.utils._get_stream(None) + assert isinstance(stream, Stream) + + +def test_get_stream_stream_object(): + stream = Stream() + result = plc.utils._get_stream(stream) + assert result is stream + + +def test_get_stream_protocol_object(): + proto = _CudaStreamProto() + result = plc.utils._get_stream(proto) + assert isinstance(result, Stream) + + +@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()]) +def test_reduce_accepts_stream_protocol(stream): + arr = pa.array([1, 2, 3], type=pa.int32()) + col = plc.Column.from_arrow(arr) + agg = plc.aggregation.sum() + dtype = plc.DataType.from_arrow(pa.int32()) + result = plc.reduce.reduce(col, agg, dtype, stream=stream) + assert result.to_py() == 6 + + +@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()]) +def test_binary_operation_accepts_stream_protocol(stream): + lhs = plc.Column.from_arrow(pa.array([1, 2, 3], type=pa.int32())) + rhs = plc.Column.from_arrow(pa.array([4, 5, 6], type=pa.int32())) + dtype = plc.DataType.from_arrow(pa.int32()) + result = plc.binaryop.binary_operation( + lhs, + rhs, + plc.binaryop.BinaryOperator.ADD, + dtype, + stream=stream, + ) + expect = pa.array([5, 7, 9], type=pa.int32()) + assert result.to_arrow().equals(expect) + + +@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()]) +def test_gather_accepts_stream_protocol(stream): + table = plc.Table.from_arrow(pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})) + indices = plc.Column.from_arrow(pa.array([2, 0], type=pa.int32())) + result = plc.copying.gather( + table, + indices, + plc.copying.OutOfBoundsPolicy.DONT_CHECK, + stream=stream, + ) + expected = pa.table({"a": [3, 1], "b": [6, 4]}) + got = result.to_arrow().rename_columns(expected.column_names) + assert got.cast(expected.schema).equals(expected) From 10993fb0434c705e884fe6b1e7b1edabe30dce5e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 6 May 2026 16:41:46 -0700 Subject: [PATCH 06/12] Use `language: script` for cudf-polars-ir-signatures pre-commit hook (#22384) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `cudf-polars-ir-signatures` pre-commit hook uses `language: python` but is just a local script (`./ci/check_cudf_polars_ir.py`) that only depends on stdlib modules (`ast`, `argparse`, `sys`, `typing`) and has a `#!/usr/bin/env python3` shebang. With `language: python`, pre-commit unnecessarily creates a virtualenv for this hook. `language: script` is the correct setting — it runs the entry point directly as an executable, relying on the shebang for interpreter selection, with no virtualenv overhead. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cudf/pull/22384 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1fb05425bd3..a51294a8f26 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -136,7 +136,7 @@ repos: name: cudf-polars-ir-signatures description: 'Validate cudf-polars IR.do_evaluate signatures.' entry: ./ci/check_cudf_polars_ir.py - language: python + language: script files: ^python/cudf_polars/cudf_polars/(dsl/ir|experimental/(shuffle|io|sort))\.py$ pass_filenames: true verbose: true From 572437becfffc1ce802849c56218a3c137b917d5 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 6 May 2026 18:14:35 -0700 Subject: [PATCH 07/12] Fix potential errors in Parquet page header decode (#22274) This PR fixes a potential infinite loop in parquet page header count/decode kernels if case of malformed input. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Paul Mattione (https://github.com/pmattione-nvidia) URL: https://github.com/rapidsai/cudf/pull/22274 --- cpp/src/io/parquet/page_hdr.cu | 132 ++++++++++--------- cpp/src/io/parquet/parquet_gpu.hpp | 2 + cpp/src/io/parquet/reader_impl_preprocess.cu | 8 +- 3 files changed, 81 insertions(+), 61 deletions(-) diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 83724dd71e2..8e7a6223447 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -110,8 +110,8 @@ inline __device__ int32_t get_i32(byte_stream_s* bs) */ __device__ void skip_struct_field(byte_stream_s* bs, int field_type) { - int struct_depth = 0; - int rep_cnt = 0; + uint32_t struct_depth = 0; + uint32_t rep_cnt = 0; do { if (rep_cnt != 0) { @@ -138,7 +138,7 @@ __device__ void skip_struct_field(byte_stream_s* bs, int field_type) case FieldType::LIST: case FieldType::SET: { // NOTE: skipping a list of lists is not handled auto const c = getb(bs); - int n = c >> 4; + uint32_t n = c >> 4; if (n == 0xf) { n = get_u32(bs); } field_type = c & 0xf; if (static_cast(field_type) == FieldType::STRUCT) { @@ -543,7 +543,6 @@ void __launch_bounds__(decode_page_headers_block_size) auto const block = cg::this_thread_block(); auto const warp = cg::tiled_partition(block); - auto const lane_id = warp.thread_rank(); auto const warp_id = warp.meta_group_rank(); auto const chunk_idx = static_cast((cg::this_grid().block_rank() * num_warps_per_block) + warp_id); @@ -554,20 +553,20 @@ void __launch_bounds__(decode_page_headers_block_size) auto const bs = &bs_g[warp_id]; - if (lane_id == 0) { + cg::invoke_one(warp, [&] { if (chunk_idx < num_chunks) { bs->ck = chunks[chunk_idx]; } error[warp_id] = 0; - } + }); block.sync(); if (chunk_idx < num_chunks) { - if (lane_id == 0) { + cg::invoke_one(warp, [&] { bs->base = bs->cur = bs->ck.compressed_data; bs->end = bs->base + bs->ck.compressed_size; bs->page.chunk_idx = chunk_idx; bs->page.src_col_schema = bs->ck.src_col_schema; zero_out_page_header_info(bs); - } + }); size_t const num_values = bs->ck.num_values; size_t values_found = 0; uint32_t data_page_count = 0; @@ -580,7 +579,7 @@ void __launch_bounds__(decode_page_headers_block_size) while (values_found < num_values and bs->cur < bs->end) { int index_out = -1; - if (lane_id == 0) { + cg::invoke_one(warp, [&] { // this computation is only valid for flat schemas. for nested schemas, // they will be recomputed in the preprocess step by examining repetition and // definition levels @@ -593,7 +592,7 @@ void __launch_bounds__(decode_page_headers_block_size) bs->page.num_nulls = 0; bs->page.lvl_bytes[level_type::DEFINITION] = 0; bs->page.lvl_bytes[level_type::REPETITION] = 0; - if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size >= 0) { + if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size > 0) { if (not is_supported_encoding(bs->page.encoding)) { error[warp_id] |= static_cast(decode_error::UNSUPPORTED_ENCODING); @@ -641,11 +640,13 @@ void __launch_bounds__(decode_page_headers_block_size) bs->cur = bs->end; } if (index_out >= 0 and index_out < max_num_pages) { page_info[index_out] = bs->page; } - } + }); values_found = shuffle(values_found); warp.sync(); } - if (lane_id == 0 and error[warp_id] != 0) { set_error(error[warp_id], error_code); } + cg::invoke_one(warp, [&] { + if (error[warp_id] != 0) { set_error(error[warp_id], error_code); } + }); } } @@ -664,7 +665,6 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size) auto const block = cg::this_thread_block(); auto const warp = cg::tiled_partition(block); - auto const lane_id = warp.thread_rank(); auto const warp_id = warp.meta_group_rank(); auto const chunk_idx = static_cast((cg::this_grid().block_rank() * num_warps_per_block) + warp_id); @@ -675,25 +675,25 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size) auto const bs = &bs_g[warp_id]; - if (lane_id == 0) { + cg::invoke_one(warp, [&] { if (chunk_idx < num_chunks) { bs->ck = chunks[chunk_idx]; } error[warp_id] = 0; - } + }); block.sync(); if (chunk_idx < num_chunks) { - if (lane_id == 0) { + cg::invoke_one(warp, [&] { bs->base = bs->cur = bs->ck.compressed_data; bs->end = bs->base + bs->ck.compressed_size; - } + }); size_t const num_values = bs->ck.num_values; size_t values_found = 0; uint32_t data_page_count = 0; uint32_t dictionary_page_count = 0; warp.sync(); while (values_found < num_values and bs->cur < bs->end) { - if (lane_id == 0) { - if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size >= 0) { + cg::invoke_one(warp, [&] { + if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size > 0) { if (not is_supported_encoding(bs->page.encoding)) { error[warp_id] |= static_cast(decode_error::UNSUPPORTED_ENCODING); @@ -724,15 +724,15 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size) static_cast(decode_error::INVALID_PAGE_HEADER); bs->cur = bs->end; } - } + }); values_found = shuffle(values_found); warp.sync(); } - if (lane_id == 0) { + cg::invoke_one(warp, [&] { chunks[chunk_idx].num_data_pages = data_page_count; chunks[chunk_idx].num_dict_pages = dictionary_page_count; if (error[warp_id] != 0) { set_error(error[warp_id], error_code); } - } + }); } } @@ -784,8 +784,9 @@ struct decode_page_headers_with_pgidx_fn { // bs.page.chunk_row not computed here and will be filled in later by // `fill_in_page_info()`. - if (not parse_page_header_fn{}(&bs) or bs.page.compressed_page_size < 0) { - set_error(static_cast(decode_error::UNSUPPORTED_ENCODING), + // Parsed page must be valid and not empty + if (not parse_page_header_fn{}(&bs) or bs.page.compressed_page_size <= 0) { + set_error(static_cast(decode_error::INVALID_PAGE_HEADER), error_code); return; } @@ -834,54 +835,64 @@ struct decode_page_headers_with_pgidx_fn { * @param[in] num_chunks Number of column chunks */ CUDF_KERNEL void __launch_bounds__(build_string_dict_index_block_size) - build_string_dictionary_index_kernel(ColumnChunkDesc* chunks, int32_t num_chunks) + build_string_dictionary_index_kernel(ColumnChunkDesc* chunks, + int32_t num_chunks, + kernel_error::pointer error_code) { auto constexpr num_warps_per_block = build_string_dict_index_block_size / cudf::detail::warp_size; __shared__ ColumnChunkDesc chunk_g[num_warps_per_block]; - auto const block = cg::this_thread_block(); - auto const warp = cg::tiled_partition(block); - int const lane_id = warp.thread_rank(); - int const chunk = (cg::this_grid().block_rank() * num_warps_per_block) + warp.meta_group_rank(); + auto const block = cg::this_thread_block(); + auto const warp = cg::tiled_partition(block); + int const chunk = (cg::this_grid().block_rank() * num_warps_per_block) + warp.meta_group_rank(); ColumnChunkDesc* const ck = &chunk_g[warp.meta_group_rank()]; - if (chunk < num_chunks and lane_id == 0) *ck = chunks[chunk]; + cg::invoke_one(warp, [&] { + if (chunk < num_chunks) { *ck = chunks[chunk]; } + }); block.sync(); if (chunk >= num_chunks) { return; } - if (!lane_id && ck->num_dict_pages > 0 && ck->str_dict_index) { - // Data type to describe a string - string_index_pair* dict_index = ck->str_dict_index; - uint8_t const* dict = ck->dict_page->page_data; - int dict_size = ck->dict_page->uncompressed_page_size; - int num_entries = ck->dict_page->num_input_values; - int pos = 0, cur = 0; - for (int i = 0; i < num_entries; i++) { - int len = 0; - if (ck->physical_type == Type::FIXED_LEN_BYTE_ARRAY) { - if (cur + ck->type_length <= dict_size) { - len = ck->type_length; - pos = cur; - cur += len; - } else { - cur = dict_size; - } - } else { - if (cur + 4 <= dict_size) { - len = - dict[cur + 0] | (dict[cur + 1] << 8) | (dict[cur + 2] << 16) | (dict[cur + 3] << 24); - if (len >= 0 && cur + 4 + len <= dict_size) { - pos = cur + 4; - cur = pos + len; + cg::invoke_one(warp, [&] { + if (ck->num_dict_pages > 0 && ck->str_dict_index) { + // Data type to describe a string + string_index_pair* dict_index = ck->str_dict_index; + uint8_t const* dict = ck->dict_page->page_data; + int const dict_size = ck->dict_page->uncompressed_page_size; + int32_t const num_entries = ck->dict_page->num_input_values; + if (num_entries < 0 or dict_size < 0) { + set_error(static_cast(decode_error::INVALID_DICT_WIDTH), + error_code); + return; + } + int pos = 0, cur = 0; + for (int i = 0; i < num_entries; i++) { + int len = 0; + if (ck->physical_type == Type::FIXED_LEN_BYTE_ARRAY) { + if (cur + ck->type_length <= dict_size) { + len = ck->type_length; + pos = cur; + cur += len; } else { cur = dict_size; } + } else { + if (cur + 4 <= dict_size) { + len = + dict[cur + 0] | (dict[cur + 1] << 8) | (dict[cur + 2] << 16) | (dict[cur + 3] << 24); + if (len >= 0 && cur + 4 + len <= dict_size) { + pos = cur + 4; + cur = pos + len; + } else { + cur = dict_size; + } + } } + // TODO: Could store 8 entries in shared mem, then do a single warp-wide store + dict_index[i].first = reinterpret_cast(dict + pos); + dict_index[i].second = len; } - // TODO: Could store 8 entries in shared mem, then do a single warp-wide store - dict_index[i].first = reinterpret_cast(dict + pos); - dict_index[i].second = len; } - } + }); } } // namespace @@ -942,6 +953,7 @@ void decode_page_headers_with_pgidx(cudf::device_span chu void build_string_dictionary_index(ColumnChunkDesc* chunks, int32_t num_chunks, + kernel_error::pointer error_code, rmm::cuda_stream_view stream) { static_assert( @@ -954,8 +966,8 @@ void build_string_dictionary_index(ColumnChunkDesc* chunks, dim3 dim_block(build_string_dict_index_block_size, 1); dim3 dim_grid(num_blocks, 1); - build_string_dictionary_index_kernel<<>>(chunks, - num_chunks); + build_string_dictionary_index_kernel<<>>( + chunks, num_chunks, error_code); } } // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 680d11959a1..7d07f39aa38 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -735,10 +735,12 @@ void decode_page_headers_with_pgidx(cudf::device_span chu * * @param[in] chunks List of column chunks * @param[in] num_chunks Number of column chunks + * @param[out] error_code Pointer to the error code for kernel failures * @param[in] stream CUDA stream to use */ void build_string_dictionary_index(ColumnChunkDesc* chunks, int32_t num_chunks, + kernel_error::pointer error_code, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 07db8ff0c23..8ebb8879d7e 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -94,8 +94,14 @@ void reader_impl::build_string_dict_indices() set_str_dict_index_ptr{pass.str_dict_index.data(), str_dict_index_offsets, pass.chunks}); // compute the indices - build_string_dictionary_index(pass.chunks.device_ptr(), pass.chunks.size(), _stream); + kernel_error error_code(_stream); + build_string_dictionary_index( + pass.chunks.device_ptr(), pass.chunks.size(), error_code.data(), _stream); pass.chunks.device_to_host(_stream); + auto const error = error_code.value_sync(_stream); + CUDF_EXPECTS( + error == 0, + "Parquet dictionary index construction failed with code(s) " + kernel_error::to_string(error)); } void reader_impl::allocate_nesting_info() From 563021f45e9f11acec6e242b644e3b024ac1e3c3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 May 2026 18:55:29 -0700 Subject: [PATCH 08/12] Make RapidsMPF the default runtime for cudf_polars streaming executor (#22281) closes https://github.com/rapidsai/cudf/issues/21466 closes https://github.com/rapidsai/cudf/issues/21767 Waiting for https://github.com/rapidsai/cudf/pull/22212 * Makes rapidsmpf a required dependency of cudf_polars * Removes the following `StreamingExecutor` options as they were "experimental" with associated code paths * `StreamingExecutor.runtime` * `StreamingExecutor.shuffle_method` * `StreamingExecutor.unique_fraction` * `StreamingExecutor.groupby_n_ary` * `StreamingExecutor.rapidsmpf_spill` * Removes the task runtime and associated tests * Some tests we modified to only test 1 specific test configuration because of https://github.com/rapidsai/cudf/issues/22346 to pass these tests for now. Planning on revisiting this once rapidsmpf becomes the default Ops-Bot-Merge-Barrier: true Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Bradley Dice (https://github.com/bdice) - Matthew Murray (https://github.com/Matt711) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/22281 --- .devcontainer/Dockerfile | 2 + .devcontainer/README.md | 8 + .../cuda12.9-conda/devcontainer.json | 5 +- .devcontainer/cuda12.9-pip/devcontainer.json | 7 +- .../cuda13.1-conda/devcontainer.json | 5 +- .devcontainer/cuda13.1-pip/devcontainer.json | 7 +- .github/workflows/pr.yaml | 9 +- ci/test_cudf_polars_experimental.sh | 2 +- ci/test_cudf_polars_polars_tests.sh | 2 +- .../all_cuda-129_arch-aarch64.yaml | 1 + .../all_cuda-129_arch-x86_64.yaml | 1 + .../all_cuda-131_arch-aarch64.yaml | 1 + .../all_cuda-131_arch-x86_64.yaml | 1 + conda/recipes/cudf-polars/recipe.yaml | 1 + dependencies.yaml | 56 ++- docs/cudf/source/cudf_polars/api.md | 1 - .../cudf/source/cudf_polars/engine_options.md | 2 +- python/cudf_polars/cudf_polars/callback.py | 5 +- python/cudf_polars/cudf_polars/dsl/expr.py | 2 - .../cudf_polars/dsl/expressions/base.py | 8 +- python/cudf_polars/cudf_polars/dsl/ir.py | 10 +- .../cudf_polars/experimental/base.py | 13 +- .../benchmarks/utils_new_frontends.py | 27 +- .../cudf_polars/experimental/dispatch.py | 39 +- .../cudf_polars/experimental/distinct.py | 77 +--- .../cudf_polars/experimental/explain.py | 5 +- .../cudf_polars/experimental/expressions.py | 21 +- .../cudf_polars/experimental/groupby.py | 63 +-- .../cudf_polars/experimental/io.py | 232 +--------- .../cudf_polars/experimental/join.py | 164 +------ .../cudf_polars/experimental/parallel.py | 125 +----- .../experimental/rapidsmpf/core.py | 4 - .../experimental/rapidsmpf/frontend/core.py | 2 +- .../experimental/rapidsmpf/frontend/dask.py | 2 - .../rapidsmpf/frontend/options.py | 18 - .../experimental/rapidsmpf/frontend/ray.py | 6 - .../experimental/rapidsmpf/frontend/spmd.py | 21 +- .../cudf_polars/experimental/repartition.py | 43 +- .../cudf_polars/experimental/scheduler.py | 153 ------- .../cudf_polars/experimental/shuffle.py | 279 +----------- .../cudf_polars/experimental/sort.py | 402 +----------------- .../cudf_polars/experimental/utils.py | 47 +- .../cudf_polars/testing/asserts.py | 5 +- .../cudf_polars/testing/inject_gpu_engine.py | 3 +- .../cudf_polars/cudf_polars/utils/config.py | 214 ++-------- .../cudf_polars/utils/cuda_stream.py | 5 - python/cudf_polars/pyproject.toml | 2 +- python/cudf_polars/tests/conftest.py | 30 +- .../tests/experimental/test_dask.py | 2 - .../tests/experimental/test_explain.py | 4 +- .../tests/experimental/test_groupby.py | 5 +- .../tests/experimental/test_hstack.py | 2 - .../tests/experimental/test_options.py | 5 - .../tests/experimental/test_parallel.py | 41 -- .../tests/experimental/test_ray.py | 4 +- .../tests/experimental/test_sort.py | 4 - .../tests/experimental/test_spmd.py | 3 +- .../tests/experimental/test_unique.py | 35 +- python/cudf_polars/tests/test_config.py | 175 ++------ python/cudf_polars/tests/test_scan.py | 2 +- python/cudf_polars/tests/test_sink.py | 1 + python/cudf_polars/tests/test_tracing.py | 14 +- .../tests/testing/test_engine_utils.py | 5 - 63 files changed, 259 insertions(+), 2181 deletions(-) delete mode 100644 python/cudf_polars/cudf_polars/experimental/scheduler.py diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index b4b2ecb69e0..57ccf6302c5 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -5,9 +5,11 @@ ARG PYTHON_PACKAGE_MANAGER=conda FROM ${BASE} as pip-base +# libnuma-dev is required for pip devcontainers for cucascade from rapidsmpf RUN apt update -y \ && DEBIAN_FRONTEND=noninteractive apt install -y \ librdkafka-dev \ + libnuma-dev \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; ENV DEFAULT_VIRTUAL_ENV=rapids diff --git a/.devcontainer/README.md b/.devcontainer/README.md index 91ee7ef85f7..cc5fac22fde 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -20,6 +20,7 @@ This container is a turnkey development environment for building and testing the By default, the following directories are bind-mounted into the devcontainer: * `${repo}:/home/coder/cudf` +* `${repo}/../rapidsmpf:/home/coder/rapidsmpf` * `${repo}/../.aws:/home/coder/.aws` * `${repo}/../.local:/home/coder/.local` * `${repo}/../.cache:/home/coder/.cache` @@ -28,6 +29,13 @@ By default, the following directories are bind-mounted into the devcontainer: This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs. +The [rapidsmpf](https://github.com/rapidsai/rapidsmpf) repository is a required dependency of `cudf_polars` (that also requires `libcudf`) and must be cloned as a sibling directory to the cudf repo before launching the devcontainer: + +``` +# from the parent directory of your cudf clone +git clone https://github.com/rapidsai/rapidsmpf.git +``` + ## Launch a Dev Container To launch a devcontainer from VSCode, open the cuDF repo and select the "Reopen in Container" button in the bottom right:
diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json index 9d672bdbde8..272007e7c95 100644 --- a/.devcontainer/cuda12.9-conda/devcontainer.json +++ b/.devcontainer/cuda12.9-conda/devcontainer.json @@ -49,7 +49,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.9-envs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.9-envs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -63,7 +63,8 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.9-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.9-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json index f99cc4ce5dc..5012dcfa979 100644 --- a/.devcontainer/cuda12.9-pip/devcontainer.json +++ b/.devcontainer/cuda12.9-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "12.9", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:26.06-cpp-cuda12.9" + "BASE": "rapidsai/devcontainers:26.06-cpp-cuda12.9-ucx1.19.0-openmpi5.0.10" } }, "runArgs": [ @@ -27,7 +27,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -40,7 +40,8 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda13.1-conda/devcontainer.json b/.devcontainer/cuda13.1-conda/devcontainer.json index a73953b1989..785302c3c1d 100644 --- a/.devcontainer/cuda13.1-conda/devcontainer.json +++ b/.devcontainer/cuda13.1-conda/devcontainer.json @@ -49,7 +49,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.1-envs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.1-envs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -63,7 +63,8 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.1-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.1-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda13.1-pip/devcontainer.json b/.devcontainer/cuda13.1-pip/devcontainer.json index 8596ff6b503..730b1c1e8ca 100644 --- a/.devcontainer/cuda13.1-pip/devcontainer.json +++ b/.devcontainer/cuda13.1-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "13.1", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:26.06-cpp-cuda13.1" + "BASE": "rapidsai/devcontainers:26.06-cpp-cuda13.1-ucx1.19.0-openmpi5.0.10" } }, "runArgs": [ @@ -27,7 +27,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -40,7 +40,8 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4e6935c3822..41a4c734f21 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -550,9 +550,16 @@ jobs: SCCACHE_DIST_MAX_RETRIES=inf SCCACHE_SERVER_LOG=sccache=debug SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false + # clone-rapidsmpf to not use the rapidsmpf wheels from cudf_polars dependency + # librapidsmpf-cu13 wheels brings in a hardcoded libnuma-dev cmake target: https://github.com/NVIDIA/cuCascade/issues/118 + # -DBUILD_TESTS=OFF to match rapidsmpf https://github.com/rapidsai/rapidsmpf/blob/main/.github/workflows/pr.yaml#L351 (leads to compilation errors) + # -DCUDF_BUILD_TESTUTIL=OFF to avoid IMPORTED_GLOBAL promotion errors when cuCascade's find_package(cudf) loads cudf-config.cmake from a CPM subdirectory build_command: | sccache --zero-stats; - build-all -j0 -DBUILD_BENCHMARKS=ON --verbose 2>&1 | tee telemetry-artifacts/build.log; + clone-rapidsmpf -j$(nproc) -v -q --branch "$(cat ~/cudf/RAPIDS_BRANCH)" --clone-upstream --depth 1 --single-branch --shallow-submodules; + if [ "$PYTHON_PACKAGE_MANAGER" = "pip" ]; then rapids-make-pip-env --force; elif [ "$PYTHON_PACKAGE_MANAGER" = "conda" ]; then rapids-make-conda-env --force; fi; + rapids-generate-scripts; + build-all -j0 -DBUILD_BENCHMARKS=OFF -DBUILD_NUMA_SUPPORT=OFF -DBUILD_TESTS=OFF -DCUDF_BUILD_TESTUTIL=OFF --verbose 2>&1 | tee telemetry-artifacts/build.log; sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt; unit-tests-cudf-pandas: needs: [wheel-build-cudf, changed-files] diff --git a/ci/test_cudf_polars_experimental.sh b/ci/test_cudf_polars_experimental.sh index 02eab86c0dd..aa3abd66254 100755 --- a/ci/test_cudf_polars_experimental.sh +++ b/ci/test_cudf_polars_experimental.sh @@ -28,7 +28,7 @@ rapids-pip-retry install \ -v \ --prefer-binary \ --constraint "${PIP_CONSTRAINT}" \ - "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental,rapidsmpf]" \ + "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental]" \ "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh index 802110b18ac..4e19464a895 100755 --- a/ci/test_cudf_polars_polars_tests.sh +++ b/ci/test_cudf_polars_polars_tests.sh @@ -27,7 +27,7 @@ rapids-logger "Install libcudf, pylibcudf and cudf_polars" rapids-pip-retry install \ -v \ --constraint "${PIP_CONSTRAINT}" \ - "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,rapidsmpf]" \ + "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \ "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 35939809d53..3bf006400bc 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index d88d82e4999..d206963fc36 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index f423c19b51d..c73cea89f29 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index 3c9bffa4738..f4d88866690 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/recipes/cudf-polars/recipe.yaml b/conda/recipes/cudf-polars/recipe.yaml index e3a21aa1afd..52ac74c7c8b 100644 --- a/conda/recipes/cudf-polars/recipe.yaml +++ b/conda/recipes/cudf-polars/recipe.yaml @@ -36,6 +36,7 @@ requirements: - nvidia-ml-py>=12 - python - pylibcudf =${{ version }} + - rapidsmpf =${{ minor_version }} - polars>=1.30,<1.39 - packaging - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} diff --git a/dependencies.yaml b/dependencies.yaml index 9728b3ea667..b1eb276befb 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -8,6 +8,51 @@ files: matrix: cuda: ["12.9", "13.1"] arch: [x86_64, aarch64] + includes: + - build_base + - build_all + - build_cpp + - build_python_common + - clang + - cuda + - cuda_version + - cudf_polars_trace + - depends_on_cupy + - depends_on_cuda_python + - depends_on_dask_cuda + - depends_on_libkvikio + - depends_on_librmm + - depends_on_libnvcomp + - depends_on_numba_cuda + - depends_on_rapids_logger + - depends_on_rapidsmpf + - depends_on_rmm + - develop + - docs + - iwyu + - notebooks + - numpy_run + - py_version + - pyarrow_run + - rapids_build_skbuild + - rapids_build_setuptools + - run_common + - run_cudf + - run_cudf_polars + - run_pylibcudf + - run_dask_cudf + - run_custreamz + - test_cpp + - test_python_common + - test_python_cudf + - test_python_cudf_common + - test_python_pylibcudf + - test_python_cudf_pandas + - test_python_cudf_polars + - test_python_s3 + devcontainers: + output: none + # Same as "all", excluding depends_on_rapidsmpf (which is built from source) includes: - build_base - build_all @@ -328,6 +373,7 @@ files: table: project includes: - run_cudf_polars + - depends_on_rapidsmpf - depends_on_pylibcudf - depends_on_cuda_python py_run_cudf_polars_experimental: @@ -338,15 +384,6 @@ files: key: experimental includes: - run_cudf_polars_experimental - py_run_cudf_polars_rapidsmpf: - output: pyproject - pyproject_dir: python/cudf_polars - extras: - table: project.optional-dependencies - key: rapidsmpf - includes: - - depends_on_rapidsmpf - - pyarrow_run py_test_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars @@ -360,7 +397,6 @@ files: - test_python_common - test_python_cudf_polars - cudf_polars_trace - - depends_on_rapidsmpf py_trace_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars diff --git a/docs/cudf/source/cudf_polars/api.md b/docs/cudf/source/cudf_polars/api.md index 741b2f6f758..823954a3b08 100644 --- a/docs/cudf/source/cudf_polars/api.md +++ b/docs/cudf/source/cudf_polars/api.md @@ -13,7 +13,6 @@ For the most part, the public API of `cudf-polars` is the polars API. ExecutorType, InMemoryExecutor, ParquetOptions, - ShuffleMethod, StreamingExecutor, StreamingFallbackMode, ``` diff --git a/docs/cudf/source/cudf_polars/engine_options.md b/docs/cudf/source/cudf_polars/engine_options.md index 67e601467d9..ba6085275b8 100644 --- a/docs/cudf/source/cudf_polars/engine_options.md +++ b/docs/cudf/source/cudf_polars/engine_options.md @@ -52,7 +52,7 @@ For example, the environment variable `max_rows_per_partition` to use if it isn't overridden through `executor_options`. -For boolean options, like `rapidsmpf_spill`, the values `{"1", "true", "yes", "y"}` +For boolean options, like `sink_to_directory`, the values `{"1", "true", "yes", "y"}` are considered `True` and `{"0", "false", "no", "n"}` are considered `False`. See [Configuration Reference](#cudf-polars-api) for a full list of options, and diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py index fb915784f96..acd0452ae1b 100644 --- a/python/cudf_polars/cudf_polars/callback.py +++ b/python/cudf_polars/cudf_polars/callback.py @@ -159,13 +159,12 @@ def set_memory_resource( """ previous = rmm.mr.get_current_device_resource() if mr is None: - # Use cuda async by default with the rapidsmpf runtime. + # Use cuda async by default with the streaming executor. if ( memory_resource_config is None and executor.name == "streaming" - and executor.runtime == "rapidsmpf" and (device_size := get_total_device_memory()) is not None - ): # pragma: no cover; Requires rapidsmpf runtime. + ): # pragma: no cover memory_resource_config = MemoryResourceConfig( qualname="rmm.mr.CudaAsyncMemoryResource", options={ diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 5dd8898bde2..b21485ac41e 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -15,7 +15,6 @@ from cudf_polars.dsl.expressions.aggregation import Agg from cudf_polars.dsl.expressions.base import ( - AggInfo, Col, ColRef, ErrorExpr, @@ -37,7 +36,6 @@ __all__ = [ "Agg", - "AggInfo", "BinOp", "BooleanFunction", "Cast", diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/base.py b/python/cudf_polars/cudf_polars/dsl/expressions/base.py index 3336ea3fd7c..b97be71b771 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/base.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/base.py @@ -8,7 +8,7 @@ import enum from enum import IntEnum -from typing import TYPE_CHECKING, Any, ClassVar, NamedTuple +from typing import TYPE_CHECKING, Any, ClassVar import pylibcudf as plc @@ -20,11 +20,7 @@ from cudf_polars.containers import Column, DataFrame, DataType -__all__ = ["AggInfo", "Col", "ColRef", "ExecutionContext", "Expr", "NamedExpr"] - - -class AggInfo(NamedTuple): - requests: list[tuple[Expr | None, plc.aggregation.Aggregation, Expr]] +__all__ = ["Col", "ColRef", "ExecutionContext", "Expr", "NamedExpr"] class ExecutionContext(IntEnum): diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index bee30183e1c..1c48f70bb11 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -819,11 +819,13 @@ def read_csv_header( # TODO: Nested column names names = chunk.column_names(include_children=False) concatenated_columns = chunk.tbl.columns() - while reader.has_next(): + while reader.has_next(): # pragma: no cover columns = reader.read_chunk().tbl.columns() # Discard columns while concatenating to reduce memory footprint. # Reverse order to avoid O(n^2) list popping cost. - for i in range(len(concatenated_columns) - 1, -1, -1): + for i in range( # pragma: no cover + len(concatenated_columns) - 1, -1, -1 + ): concatenated_columns[i] = plc.concatenate.concatenate( [concatenated_columns[i], columns.pop()], stream=stream ) @@ -840,7 +842,7 @@ def read_csv_header( num_rows=num_rows, ) if include_file_paths is not None: - df = Scan.add_file_paths( + df = Scan.add_file_paths( # pragma: no cover include_file_paths, paths, chunk.num_rows_per_source, df ) else: @@ -1164,7 +1166,7 @@ def _write_parquet( | plc.io.parquet.ParquetWriterOptionsBuilder ) - if ( + if ( # pragma: no cover parquet_options.chunked and parquet_options.n_output_chunks != 1 and df.table.num_rows() != 0 diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py index 73ed9b3dbe1..80ff0dfacbd 100644 --- a/python/cudf_polars/cudf_polars/experimental/base.py +++ b/python/cudf_polars/cudf_polars/experimental/base.py @@ -11,11 +11,10 @@ from cudf_polars.dsl.traversal import traversal if TYPE_CHECKING: - from collections.abc import Generator, Iterator + from collections.abc import Generator from cudf_polars.dsl.expr import NamedExpr from cudf_polars.dsl.ir import IR - from cudf_polars.dsl.nodebase import Node class PartitionInfo: @@ -40,22 +39,12 @@ def __init__( self.partitioned_on = partitioned_on self.io_plan = io_plan - def keys(self, node: Node) -> Iterator[tuple[str, int]]: - """Return the partitioned keys for a given node.""" - name = get_key_name(node) - yield from ((name, i) for i in range(self.count)) - def __rich_repr__(self) -> Generator[Any, None, None]: """Formatting for rich.pretty.pprint.""" yield "count", self.count yield "partitioned_on", self.partitioned_on -def get_key_name(node: Node) -> str: - """Generate the key name for a Node.""" - return f"{type(node).__name__.lower()}-{hash(node)}" - - class SerializedDataSourceInfo(TypedDict): """The serialized form of DataSourceInfo.""" diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py index d514d4c44e9..74386993737 100644 --- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py +++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py @@ -636,26 +636,8 @@ def get_executor_options( executor_options: dict[str, Any] = ( run_config.streaming_options.to_executor_options() ) - executor_options["runtime"] = "rapidsmpf" executor_options["max_io_threads"] = run_config.max_io_threads - # PDSHQueries: inject unique_fraction when dynamic planning is explicitly disabled - if ( - benchmark - and benchmark.__name__ == "PDSHQueries" - and run_config.executor == "streaming" - and run_config.streaming_options.dynamic_planning is None - ): - executor_options.setdefault( - "unique_fraction", - { - "c_custkey": 0.05, - "l_orderkey": 1.0, - "l_partkey": 0.1, - "o_custkey": 0.25, - }, - ) - return executor_options @@ -1110,8 +1092,7 @@ def run_polars_spmd( from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine executor_options = get_executor_options(run_config, benchmark=benchmark) - # "runtime" and "cluster" are reserved — SPMDEngine sets them - executor_options.pop("runtime", None) + # "cluster" is reserved — SPMDEngine sets it executor_options.pop("cluster", None) engine_options = { **run_config.streaming_options.to_engine_options(), @@ -1168,8 +1149,7 @@ def run_polars_ray( from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine executor_options = get_executor_options(run_config, benchmark=benchmark) - # "runtime", "cluster" are reserved — RayEngine sets them - executor_options.pop("runtime", None) + # "cluster" is reserved — RayEngine sets it executor_options.pop("cluster", None) engine_options: dict[str, Any] = { **run_config.streaming_options.to_engine_options(), @@ -1218,8 +1198,7 @@ def run_polars_dask( from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine executor_options = get_executor_options(run_config, benchmark=benchmark) - # "runtime", "cluster" are reserved — DaskEngine sets them - executor_options.pop("runtime", None) + # "cluster" is reserved — DaskEngine sets it executor_options.pop("cluster", None) engine_options: dict[str, Any] = { **run_config.streaming_options.to_engine_options(), diff --git a/python/cudf_polars/cudf_polars/experimental/dispatch.py b/python/cudf_polars/cudf_polars/experimental/dispatch.py index 3ac67b6af46..9ff0cc3156b 100644 --- a/python/cudf_polars/cudf_polars/experimental/dispatch.py +++ b/python/cudf_polars/cudf_polars/experimental/dispatch.py @@ -5,7 +5,7 @@ from __future__ import annotations from functools import singledispatch -from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict +from typing import TYPE_CHECKING, TypeAlias, TypedDict from cudf_polars.typing import GenericTransformer @@ -13,7 +13,7 @@ from collections.abc import MutableMapping from cudf_polars.dsl import ir - from cudf_polars.dsl.ir import IR, IRExecutionContext + from cudf_polars.dsl.ir import IR from cudf_polars.experimental.base import ( PartitionInfo, StatsCollector, @@ -72,38 +72,3 @@ def lower_ir_node( lower_ir_graph """ raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover - - -@singledispatch -def generate_ir_tasks( - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """ - Generate a task graph for evaluation of an IR node. - - Parameters - ---------- - ir - IR node to generate tasks for. - partition_info - Partitioning information, obtained from :func:`lower_ir_graph`. - context - Runtime context for IR node execution. - - Returns - ------- - mapping - A (partial) dask task graph for the evaluation of an ir node. - - Notes - ----- - Task generation should only produce the tasks for the current node, - referring to child tasks by name. - - See Also - -------- - task_graph - """ - raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover diff --git a/python/cudf_polars/cudf_polars/experimental/distinct.py b/python/cudf_polars/cudf_polars/experimental/distinct.py index 9ae148f77d3..564fe570919 100644 --- a/python/cudf_polars/cudf_polars/experimental/distinct.py +++ b/python/cudf_polars/cudf_polars/experimental/distinct.py @@ -17,8 +17,6 @@ from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.utils import ( _dynamic_planning_on, - _fallback_inform, - _get_unique_fractions, _lower_ir_fallback, ) @@ -35,8 +33,6 @@ def lower_distinct( child: IR, partition_info: MutableMapping[IR, PartitionInfo], config_options: ConfigOptions[StreamingExecutor], - *, - unique_fraction: float | None = None, ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: """ Lower a Distinct IR into partition-wise stages. @@ -56,9 +52,6 @@ def lower_distinct( associated partitioning information. config_options GPUEngine configuration options. - unique_fraction - Fraction of unique values to total values. Used for algorithm selection. - A value of `1.0` means the column is unique. Returns ------- @@ -68,69 +61,24 @@ def lower_distinct( A mapping from unique nodes in the new graph to associated partitioning information. """ - subset: frozenset[str] = ir.subset or frozenset(ir.schema) - distinct_keys = tuple( - NamedExpr(name, Col(ir.schema[name], name)) - for name in ir.schema - if name in subset - ) - child_count = partition_info[child].count - shuffled = partition_info[child].partitioned_on == distinct_keys - # Check for ordering requirements (shuffle is not stable) - require_tree_reduction = ir.stable or ir.keep in ( - plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST, - plc.stream_compaction.DuplicateKeepOption.KEEP_LAST, - ) - - output_count = 1 - n_ary = 32 # Arbitrary default (for now) + n_ary = 32 if ir.zlice is not None and ir.zlice[1] is not None: - # Head/tail slice operation has been pushed into Distinct - # (caller ensures only simple slices reach here) n_ary = max(1_000_000 // ir.zlice[1], 2) - elif unique_fraction is not None: - # Use unique_fraction to determine partitioning - n_ary = min(max(int(1.0 / unique_fraction), 2), child_count) - output_count = max(int(unique_fraction * child_count), 1) - - if output_count > 1 and require_tree_reduction: - # Need to reduce down to a single partition even - # if the unique_fraction is large. - output_count = 1 - _fallback_inform( - "Unsupported unique options for multiple partitions.", - config_options, - ) # Partition-wise unique count = child_count new_node: IR = ir.reconstruct([child]) partition_info[new_node] = PartitionInfo(count=count) - if shuffled or output_count == 1: - # Tree reduction - while count > output_count: - new_node = Repartition(new_node.schema, new_node) - count = max(math.ceil(count / n_ary), output_count) - partition_info[new_node] = PartitionInfo(count=count) - new_node = ir.reconstruct([new_node]) - partition_info[new_node] = PartitionInfo(count=count) - else: - # Shuffle - new_node = Shuffle( - new_node.schema, - distinct_keys, - config_options.executor.shuffle_method, - new_node, - ) - partition_info[new_node] = PartitionInfo(count=output_count) + # Tree reduction + while count > 1: + new_node = Repartition(new_node.schema, new_node) + count = max(math.ceil(count / n_ary), 1) + partition_info[new_node] = PartitionInfo(count=count) new_node = ir.reconstruct([new_node]) - partition_info[new_node] = PartitionInfo( - count=output_count, - partitioned_on=distinct_keys, - ) + partition_info[new_node] = PartitionInfo(count=count) return new_node, partition_info @@ -172,7 +120,6 @@ def _( child = Shuffle( child.schema, distinct_keys, - config_options.executor.shuffle_method, child, ) partition_info[child] = PartitionInfo( @@ -202,19 +149,9 @@ def _( ) return dynamic_node, partition_info - # Non-dynamic planning: use unique_fraction heuristics - unique_fraction_dict = _get_unique_fractions( - tuple(subset), - config_options.executor.unique_fraction, - ) - unique_fraction = ( - max(unique_fraction_dict.values()) if unique_fraction_dict else None - ) - return lower_distinct( ir, child, partition_info, config_options, - unique_fraction=unique_fraction, ) diff --git a/python/cudf_polars/cudf_polars/experimental/explain.py b/python/cudf_polars/cudf_polars/experimental/explain.py index 82f023b229c..d50d9fae0ae 100644 --- a/python/cudf_polars/cudf_polars/experimental/explain.py +++ b/python/cudf_polars/cudf_polars/experimental/explain.py @@ -288,10 +288,7 @@ def _(ir: GroupBy) -> dict[str, Serializable]: @_serialize_properties.register def _(ir: Shuffle) -> dict[str, Serializable]: - return { - "keys": [ne.name for ne in ir.keys], - "shuffle_method": ir.shuffle_method.value, - } + return {"keys": [ne.name for ne in ir.keys]} @_serialize_properties.register diff --git a/python/cudf_polars/cudf_polars/experimental/expressions.py b/python/cudf_polars/cudf_polars/experimental/expressions.py index d2a0070d009..d6df4cae8f9 100644 --- a/python/cudf_polars/cudf_polars/experimental/expressions.py +++ b/python/cudf_polars/cudf_polars/experimental/expressions.py @@ -41,22 +41,18 @@ from cudf_polars.containers import DataType from cudf_polars.dsl.expressions.aggregation import Agg -from cudf_polars.dsl.expressions.base import Col, ExecutionContext, Expr, NamedExpr +from cudf_polars.dsl.expressions.base import Col, ExecutionContext, NamedExpr from cudf_polars.dsl.expressions.binaryop import BinOp from cudf_polars.dsl.expressions.literal import Literal from cudf_polars.dsl.expressions.ternary import Ternary from cudf_polars.dsl.expressions.unary import Cast, Len, UnaryFunction -from cudf_polars.dsl.ir import IR, Distinct, Empty, HConcat, Select +from cudf_polars.dsl.ir import Distinct, Empty, HConcat, Select from cudf_polars.dsl.traversal import ( CachingVisitor, ) from cudf_polars.experimental.base import PartitionInfo from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.utils import ( - _dynamic_planning_on, - _get_unique_fractions, - _leaf_column_names, -) +from cudf_polars.experimental.utils import _dynamic_planning_on if TYPE_CHECKING: from collections.abc import Generator, MutableMapping, Sequence @@ -197,15 +193,6 @@ def _decompose_unique( ) (column,) = columns - unique_fraction_dict = _get_unique_fractions( - _leaf_column_names(child), - config_options.executor.unique_fraction, - ) - - unique_fraction = ( - max(unique_fraction_dict.values()) if unique_fraction_dict else None - ) - input_ir, partition_info = lower_distinct( Distinct( {column.name: column.dtype}, @@ -218,7 +205,6 @@ def _decompose_unique( input_ir, partition_info, config_options, - unique_fraction=unique_fraction, ) return column, input_ir, partition_info @@ -344,7 +330,6 @@ def _decompose_agg_node( input_ir = Shuffle( input_ir.schema, shuffle_on, - config_options.executor.shuffle_method, input_ir, ) partition_info[input_ir] = PartitionInfo( diff --git a/python/cudf_polars/cudf_polars/experimental/groupby.py b/python/cudf_polars/cudf_polars/experimental/groupby.py index 898dfdbf03f..6a17b56bfc5 100644 --- a/python/cudf_polars/cudf_polars/experimental/groupby.py +++ b/python/cudf_polars/cudf_polars/experimental/groupby.py @@ -36,7 +36,6 @@ from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.utils import ( _dynamic_planning_on, - _get_unique_fractions, _lower_ir_fallback, ) @@ -390,7 +389,6 @@ def _( # Check if we are dealing with any high-cardinality columns post_aggregation_count = 1 # Default tree reduction - groupby_key_columns = [ne.name for ne in ir.keys] shuffled = partition_info[child].partitioned_on == ir.keys child_count = partition_info[child].count @@ -421,7 +419,6 @@ def _( child = Shuffle( child.schema, ir.keys, - config_options.executor.shuffle_method, child, ) partition_info[child] = PartitionInfo( @@ -441,14 +438,6 @@ def _( ) return dynamic_node, partition_info - if unique_fraction_dict := _get_unique_fractions( - groupby_key_columns, - config_options.executor.unique_fraction, - ): - # Use unique_fraction to determine output partitioning - unique_fraction = max(unique_fraction_dict.values()) - post_aggregation_count = max(int(unique_fraction * child_count), 1) - # Partition-wise groupby operation pwise_schema = {k.name: k.value.dtype for k in ir.keys} | { k.name: k.value.dtype for k in piecewise_exprs @@ -465,46 +454,28 @@ def _( partition_info[gb_pwise] = PartitionInfo(count=child_count) grouped_keys = tuple(NamedExpr(k.name, Col(k.value.dtype, k.name)) for k in ir.keys) - # Reduction - gb_inter: GroupBy | Repartition | Shuffle + # N-ary tree reduction + gb_inter: GroupBy | Repartition reduction_schema = {k.name: k.value.dtype for k in grouped_keys} | { k.name: k.value.dtype for k in reduction_exprs } - if not shuffled and post_aggregation_count > 1: - # Shuffle reduction - if ir.maintain_order: # pragma: no cover - return _lower_ir_fallback( - ir, - rec, - msg="maintain_order not supported for multiple output partitions.", + n_ary = 32 + count = child_count + gb_inter = gb_pwise + while count > post_aggregation_count: + gb_inter = Repartition(gb_inter.schema, gb_inter) + count = max(math.ceil(count / n_ary), post_aggregation_count) + partition_info[gb_inter] = PartitionInfo(count=count) + if count > post_aggregation_count: + gb_inter = GroupBy( + reduction_schema, + grouped_keys, + reduction_exprs, + ir.maintain_order, + None, + gb_inter, ) - - gb_inter = Shuffle( - gb_pwise.schema, - grouped_keys, - config_options.executor.shuffle_method, - gb_pwise, - ) - partition_info[gb_inter] = PartitionInfo(count=post_aggregation_count) - else: - # N-ary tree reduction - n_ary = config_options.executor.groupby_n_ary - count = child_count - gb_inter = gb_pwise - while count > post_aggregation_count: - gb_inter = Repartition(gb_inter.schema, gb_inter) - count = max(math.ceil(count / n_ary), post_aggregation_count) partition_info[gb_inter] = PartitionInfo(count=count) - if count > post_aggregation_count: - gb_inter = GroupBy( - reduction_schema, - grouped_keys, - reduction_exprs, - ir.maintain_order, - None, - gb_inter, - ) - partition_info[gb_inter] = PartitionInfo(count=count) # Final aggregation gb_reduce = GroupBy( diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py index f45baa054dd..2cea0274ee6 100644 --- a/python/cudf_polars/cudf_polars/experimental/io.py +++ b/python/cudf_polars/cudf_polars/experimental/io.py @@ -4,13 +4,11 @@ from __future__ import annotations -import dataclasses import functools import itertools import math import statistics from collections import defaultdict -from functools import partial from pathlib import Path from typing import TYPE_CHECKING, Any, Literal, overload @@ -24,16 +22,14 @@ Empty, Scan, Sink, - Union, ) from cudf_polars.experimental.base import ( IOPartitionFlavor, IOPartitionPlan, PartitionInfo, SerializedDataSourceInfo, - get_key_name, ) -from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.utils.config import Cluster from cudf_polars.utils.cuda_stream import get_cuda_stream from cudf_polars.utils.versions import POLARS_VERSION_LT_137 @@ -62,36 +58,9 @@ def _( ir: DataFrameScan, rec: LowerIRTransformer ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: - config_options = rec.state["config_options"] + from cudf_polars.experimental.rapidsmpf.io import lower_dataframescan_rapidsmpf - # RapidsMPF runtime: Use rapidsmpf-specific lowering - if ( - config_options.executor.runtime == "rapidsmpf" - ): # pragma: no cover; Requires rapidsmpf runtime - from cudf_polars.experimental.rapidsmpf.io import lower_dataframescan_rapidsmpf - - return lower_dataframescan_rapidsmpf(ir, rec) - - rows_per_partition = config_options.executor.max_rows_per_partition - nrows = max(ir.df.shape()[0], 1) - count = math.ceil(nrows / rows_per_partition) - - if count > 1: - length = math.ceil(nrows / count) - slices = [ - DataFrameScan( - ir.schema, - ir.df.slice(offset, length), - ir.projection, - ) - for offset in range(0, nrows, length) - ] - new_node = Union(ir.schema, None, *slices) - return new_node, {slice: PartitionInfo(count=1) for slice in slices} | { - new_node: PartitionInfo(count=count) - } - - return ir, {ir: PartitionInfo(count=1)} + return lower_dataframescan_rapidsmpf(ir, rec) def scan_partition_plan( @@ -285,84 +254,9 @@ def _( def _( ir: Scan, rec: LowerIRTransformer ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: - partition_info: MutableMapping[IR, PartitionInfo] - config_options = rec.state["config_options"] - - # RapidsMPF runtime: Use rapidsmpf-specific lowering - if ( - config_options.executor.name == "streaming" - and config_options.executor.runtime == "rapidsmpf" - ): # pragma: no cover; Requires rapidsmpf runtime - from cudf_polars.experimental.rapidsmpf.io import lower_scan_rapidsmpf + from cudf_polars.experimental.rapidsmpf.io import lower_scan_rapidsmpf - return lower_scan_rapidsmpf(ir, rec) - - if ( - ir.typ in ("csv", "parquet", "ndjson") - and ir.n_rows == -1 - and ir.skip_rows == 0 - and ir.row_index is None - ): - plan = scan_partition_plan(ir, rec.state["stats"], config_options) - paths = list(ir.paths) - if plan.flavor == IOPartitionFlavor.SPLIT_FILES: - # Disable chunked reader when splitting files - parquet_options = dataclasses.replace( - config_options.parquet_options, - chunked=False, - ) - - slices: list[SplitScan] = [] - for path in paths: - base_scan = Scan( - ir.schema, - ir.typ, - ir.reader_options, - ir.cloud_options, - [path], - ir.with_columns, - ir.skip_rows, - ir.n_rows, - ir.row_index, - ir.include_file_paths, - ir.predicate, - parquet_options, - ) - slices.extend( - SplitScan( - ir.schema, base_scan, sindex, plan.factor, parquet_options - ) - for sindex in range(plan.factor) - ) - new_node = Union(ir.schema, None, *slices) - partition_info = {slice: PartitionInfo(count=1) for slice in slices} | { - new_node: PartitionInfo(count=len(slices)) - } - else: - groups: list[Scan] = [ - Scan( - ir.schema, - ir.typ, - ir.reader_options, - ir.cloud_options, - paths[i : i + plan.factor], - ir.with_columns, - ir.skip_rows, - ir.n_rows, - ir.row_index, - ir.include_file_paths, - ir.predicate, - config_options.parquet_options, - ) - for i in range(0, len(paths), plan.factor) - ] - new_node = Union(ir.schema, None, *groups) - partition_info = {group: PartitionInfo(count=1) for group in groups} | { - new_node: PartitionInfo(count=len(groups)) - } - return new_node, partition_info - - return ir, {ir: PartitionInfo(count=1)} # pragma: no cover + return lower_scan_rapidsmpf(ir, rec) class StreamingSink(IR): @@ -441,22 +335,6 @@ def _prepare_sink_directory(path: str) -> None: Path(path).mkdir(parents=True, exist_ok=True) -def _sink_to_directory( - schema: Schema, - kind: str, - path: str, - parquet_options: ParquetOptions, - options: dict[str, Any], - df: DataFrame, - ready: None, - context: IRExecutionContext, -) -> DataFrame: - """Sink a partition to a new file.""" - return Sink.do_evaluate( - schema, kind, path, parquet_options, options, df, context=context - ) - - def _sink_to_parquet_file( path: str, options: dict[str, Any], @@ -545,106 +423,6 @@ def _sink_to_file( return True -def _finalize_file_sink( - kind: str, - writer_state: Any, - df: DataFrame, -) -> DataFrame: - """Finalize the file sink by closing the writer.""" - if kind == "Parquet" and writer_state is not None: - writer_state.close([]) - return df.slice((0, 0)) - - -def _file_sink_graph( - ir: StreamingSink, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """Sink to a single file.""" - name = get_key_name(ir) - count = partition_info[ir].count - child_name = get_key_name(ir.children[0]) - sink = ir.sink - if count == 1: - return { - (name, 0): ( - partial(sink.do_evaluate, context=context), - *sink._non_child_args, - (child_name, 0), - ) - } - - sink_name = get_key_name(sink) - graph: MutableMapping[Any, Any] = { - (sink_name, i): ( - _sink_to_file, - sink.kind, - sink.path, - sink.options, - None if i == 0 else (sink_name, i - 1), # Writer state - (child_name, i), - ) - for i in range(count) - } - - # Finalize task closes the writer after all chunks are written - graph[(sink_name, "finalize")] = ( - _finalize_file_sink, - sink.kind, - (sink_name, count - 1), # Writer state from last task - (child_name, count - 1), # Last source df for creating empty result - ) - - # Make sure final tasks point to finalize task - graph.update({(name, i): (sink_name, "finalize") for i in range(count)}) - return graph - - -def _directory_sink_graph( - ir: StreamingSink, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """Sink to a directory of files.""" - name = get_key_name(ir) - count = partition_info[ir].count - child_name = get_key_name(ir.children[0]) - sink = ir.sink - - setup_name = f"setup-{name}" - suffix = sink.kind.lower() - width = math.ceil(math.log10(count)) - graph: MutableMapping[Any, Any] = { - (name, i): ( - _sink_to_directory, - sink.schema, - sink.kind, - f"{sink.path}/part.{str(i).zfill(width)}.{suffix}", - sink.parquet_options, - sink.options, - (child_name, i), - setup_name, - context, - ) - for i in range(count) - } - graph[setup_name] = (_prepare_sink_directory, sink.path) - return graph - - -@generate_ir_tasks.register(StreamingSink) -def _( - ir: StreamingSink, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - if ir.sink_to_directory: - return _directory_sink_graph(ir, partition_info, context=context) - else: - return _file_sink_graph(ir, partition_info, context=context) - - class ParquetMetadata: """ Parquet metadata container. diff --git a/python/cudf_polars/cudf_polars/experimental/join.py b/python/cudf_polars/cudf_polars/experimental/join.py index 47d0ad90d8e..cd5c514b45a 100644 --- a/python/cudf_polars/cudf_polars/experimental/join.py +++ b/python/cudf_polars/cudf_polars/experimental/join.py @@ -5,16 +5,15 @@ from __future__ import annotations import operator -from functools import partial, reduce -from typing import TYPE_CHECKING, Any +from functools import reduce +from typing import TYPE_CHECKING from cudf_polars.dsl.ir import ConditionalJoin, Join, Slice -from cudf_polars.experimental.base import PartitionInfo, get_key_name -from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node +from cudf_polars.experimental.base import PartitionInfo +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.shuffle import Shuffle, _hash_partition_dataframe +from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.utils import ( - _concat, _dynamic_planning_on, _fallback_inform, _lower_ir_fallback, @@ -24,16 +23,14 @@ from collections.abc import MutableMapping from cudf_polars.dsl.expr import NamedExpr - from cudf_polars.dsl.ir import IR, IRExecutionContext + from cudf_polars.dsl.ir import IR from cudf_polars.experimental.parallel import LowerIRTransformer - from cudf_polars.utils.config import ShuffleMethod def _maybe_shuffle_frame( frame: IR, on: tuple[NamedExpr, ...], partition_info: MutableMapping[IR, PartitionInfo], - shuffle_method: ShuffleMethod, output_count: int, ) -> IR: # Shuffle `frame` if it isn't already shuffled. @@ -48,7 +45,6 @@ def _maybe_shuffle_frame( frame = Shuffle( frame.schema, on, - shuffle_method, frame, ) partition_info[frame] = PartitionInfo( @@ -64,21 +60,18 @@ def _make_hash_join( partition_info: MutableMapping[IR, PartitionInfo], left: IR, right: IR, - shuffle_method: ShuffleMethod, ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: # Shuffle left and right dataframes (if necessary) left = _maybe_shuffle_frame( left, ir.left_on, partition_info, - shuffle_method, output_count, ) right = _maybe_shuffle_frame( right, ir.right_on, partition_info, - shuffle_method, output_count, ) # Always reconstruct in case children contain Cache nodes @@ -146,45 +139,7 @@ def _make_bcast_join( partition_info: MutableMapping[IR, PartitionInfo], left: IR, right: IR, - shuffle_method: ShuffleMethod, - *, - streaming_runtime: str, ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: - if ir.options[0] != "Inner": - left_count = partition_info[left].count - right_count = partition_info[right].count - - # Shuffle the smaller table (if necessary) - Notes: - # - We need to shuffle the smaller table if - # (1) we are not doing an "inner" join, - # and (2) the small table contains multiple - # partitions. - # - We cannot simply join a large-table partition - # to each small-table partition, and then - # concatenate the partial-join results, because - # a non-"inner" join does NOT commute with - # concatenation. - # - In some cases, we can perform the partial joins - # sequentially. However, we are starting with a - # catch-all algorithm that works for all cases. - if streaming_runtime == "tasks": - if left_count >= right_count: - right = _maybe_shuffle_frame( - right, - ir.right_on, - partition_info, - shuffle_method, - right_count, - ) - else: - left = _maybe_shuffle_frame( - left, - ir.left_on, - partition_info, - shuffle_method, - left_count, - ) - new_node = ir.reconstruct([left, right]) partition_info[new_node] = PartitionInfo(count=output_count) return new_node, partition_info @@ -301,8 +256,6 @@ def _( partition_info, left, right, - config_options.executor.shuffle_method, - streaming_runtime=config_options.executor.runtime, ) else: # Create a hash join @@ -312,109 +265,4 @@ def _( partition_info, left, right, - config_options.executor.shuffle_method, ) - - -@generate_ir_tasks.register(Join) -def _( - ir: Join, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - left, right = ir.children - output_count = partition_info[ir].count - - left_partitioned = ( - partition_info[left].partitioned_on == ir.left_on - and partition_info[left].count == output_count - ) - right_partitioned = ( - partition_info[right].partitioned_on == ir.right_on - and partition_info[right].count == output_count - ) - - if output_count == 1 or (left_partitioned and right_partitioned): - # Partition-wise join - left_name = get_key_name(left) - right_name = get_key_name(right) - return { - key: ( - partial(ir.do_evaluate, context=context), - *ir._non_child_args, - (left_name, i), - (right_name, i), - ) - for i, key in enumerate(partition_info[ir].keys(ir)) - } - else: - # Broadcast join - left_parts = partition_info[left] - right_parts = partition_info[right] - if left_parts.count >= right_parts.count: - small_side = "Right" - small_name = get_key_name(right) - small_size = partition_info[right].count - large_name = get_key_name(left) - large_on = ir.left_on - else: - small_side = "Left" - small_name = get_key_name(left) - small_size = partition_info[left].count - large_name = get_key_name(right) - large_on = ir.right_on - - graph: MutableMapping[Any, Any] = {} - - out_name = get_key_name(ir) - out_size = partition_info[ir].count - split_name = f"split-{out_name}" - getit_name = f"getit-{out_name}" - inter_name = f"inter-{out_name}" - - # Split each large partition if we have - # multiple small partitions (unless this - # is an inner join) - split_large = ir.options[0] != "Inner" and small_size > 1 - - for part_out in range(out_size): - if split_large: - graph[(split_name, part_out)] = ( - _hash_partition_dataframe, - (large_name, part_out), - part_out, - small_size, - None, - large_on, - ) - - _concat_list = [] - for j in range(small_size): - left_key: tuple[str, int] | tuple[str, int, int] - if split_large: - left_key = (getit_name, part_out, j) - graph[left_key] = (operator.getitem, (split_name, part_out), j) - else: - left_key = (large_name, part_out) - join_children = [left_key, (small_name, j)] - if small_side == "Left": - join_children.reverse() - - inter_key = (inter_name, part_out, j) - graph[(inter_name, part_out, j)] = ( - partial(ir.do_evaluate, context=context), - ir.left_on, - ir.right_on, - ir.options, - *join_children, - ) - _concat_list.append(inter_key) - if len(_concat_list) == 1: - graph[(out_name, part_out)] = graph.pop(_concat_list[0]) - else: - graph[(out_name, part_out)] = ( - partial(_concat, context=context), - *_concat_list, - ) - - return graph diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py index f77e923bce0..ab5d3b5bd90 100644 --- a/python/cudf_polars/cudf_polars/experimental/parallel.py +++ b/python/cudf_polars/cudf_polars/experimental/parallel.py @@ -4,10 +4,9 @@ from __future__ import annotations -import itertools import operator from functools import partial, reduce -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import polars as pl @@ -26,7 +25,6 @@ Filter, HConcat, HStack, - IRExecutionContext, MapFunction, Projection, Select, @@ -35,16 +33,11 @@ ) from cudf_polars.dsl.traversal import CachingVisitor, traversal from cudf_polars.dsl.utils.naming import unique_names -from cudf_polars.experimental.base import PartitionInfo, get_key_name -from cudf_polars.experimental.dispatch import ( - generate_ir_tasks, - lower_ir_node, -) +from cudf_polars.experimental.base import PartitionInfo +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.experimental.io import _clear_source_info_cache from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.experimental.utils import ( - _concat, _contains_over, _dynamic_planning_on, _lower_ir_fallback, @@ -52,7 +45,6 @@ if TYPE_CHECKING: from collections.abc import MutableMapping - from typing import Any from cudf_polars.experimental.base import StatsCollector from cudf_polars.experimental.dispatch import LowerIRTransformer, State @@ -109,63 +101,6 @@ def lower_ir_graph( return mapper(ir) -def task_graph( - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], -) -> tuple[MutableMapping[Any, Any], str | tuple[str, int]]: - """ - Construct a task graph for evaluation of an IR graph. - - Parameters - ---------- - ir - Root of the graph to rewrite. - partition_info - A mapping from all unique IR nodes to the - associated partitioning information. - - Returns - ------- - graph - A task graph for the entire IR graph with root `ir`, - in dict-of-tuples form consumed by - :func:`~cudf_polars.experimental.scheduler.synchronous_scheduler`. - - Notes - ----- - This function traverses the unique nodes of the - graph with root `ir`, and extracts the tasks for - each node with :func:`generate_ir_tasks`. - - See Also - -------- - generate_ir_tasks - """ - context = IRExecutionContext() - graph = reduce( - operator.or_, - ( - generate_ir_tasks(node, partition_info, context=context) - for node in traversal([ir]) - ), - ) - - key_name = get_key_name(ir) - partition_count = partition_info[ir].count - - key: str | tuple[str, int] - if partition_count > 1: - graph[key_name] = ( - partial(_concat, context=context), - *partition_info[ir].keys(ir), - ) - key = key_name - else: - key = (key_name, 0) - - return graph, key - - def evaluate_rapidsmpf( ir: IR, config_options: ConfigOptions[StreamingExecutor], @@ -211,44 +146,7 @@ def evaluate_streaming( # Clear source info cache in case data was overwritten _clear_source_info_cache() - if ( - config_options.executor.runtime == "rapidsmpf" - ): # pragma: no cover; rapidsmpf runtime not tested in CI yet - # Using the RapidsMPF streaming runtime. - return evaluate_rapidsmpf(ir, config_options) - else: - # Using the default task engine. - from cudf_polars.experimental.scheduler import synchronous_scheduler - - stats = collect_statistics(ir, config_options) - ir, partition_info = lower_ir_graph(ir, config_options, stats) - - graph, key = task_graph(ir, partition_info) - - return synchronous_scheduler(graph, key).to_polars() - - -@generate_ir_tasks.register(IR) -def _( - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - # Generate pointwise (embarrassingly-parallel) tasks by default - child_names = [get_key_name(c) for c in ir.children] - bcast_child = [partition_info[c].count == 1 for c in ir.children] - - return { - key: ( - partial(ir.do_evaluate, context=context), - *ir._non_child_args, - *[ - (child_name, 0 if bcast_child[j] else i) - for j, child_name in enumerate(child_names) - ], - ) - for i, key in enumerate(partition_info[ir].keys(ir)) - } + return evaluate_rapidsmpf(ir, config_options) @lower_ir_node.register(Union) @@ -278,21 +176,6 @@ def _( return new_node, partition_info -@generate_ir_tasks.register(Union) -def _( - ir: Union, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - key_name = get_key_name(ir) - partition = itertools.count() - return { - (key_name, next(partition)): child_key - for child in ir.children - for child_key in partition_info[child].keys(child) - } - - @lower_ir_node.register(MapFunction) def _( ir: MapFunction, rec: LowerIRTransformer diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py index 478c0a33beb..97168f0b02d 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py @@ -99,8 +99,6 @@ def evaluate_logical_plan( ------- The output DataFrame and metadata collector. """ - assert config_options.executor.runtime == "rapidsmpf", "Runtime must be rapidsmpf" - query_id = uuid.uuid4() with cudf_polars.dsl.tracing.bound_contextvars( @@ -202,8 +200,6 @@ def evaluate_pipeline( ------- The output DataFrame and metadata collector. """ - assert config_options.executor.runtime == "rapidsmpf", "Runtime must be rapidsmpf" - _original_mr: Any = None use_stream_pool = False if rmpf_context is not None: diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py index 7bc8dabddec..26ad95198f6 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py @@ -436,7 +436,7 @@ def execute_ir_on_rank( _RESERVED_EXECUTOR_KEYS: frozenset[str] = frozenset( - {"runtime", "cluster", "spmd_context", "ray_context", "dask_context"} + {"cluster", "spmd_context", "ray_context", "dask_context"} ) _RESERVED_ENGINE_KEYS: frozenset[str] = frozenset({"memory_resource", "executor"}) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py index 49810e998fd..b4300346132 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py @@ -688,7 +688,6 @@ def __init__( nranks=nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "dask", "dask_context": dask_ctx, }, @@ -736,7 +735,6 @@ def _reset( nranks=self._nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "dask", "dask_context": ctx, }, diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py index d8464aa7426..c7650bff513 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py @@ -234,11 +234,6 @@ class StreamingOptions: Env: ``CUDF_POLARS__EXECUTOR__DYNAMIC_PLANNING``. Default: enabled. Category: executor. - unique_fraction - Per-column uniqueness estimate (0-1). Defaults to ``1.0``. - Env: ``CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION``. - Default: ``{}``. - Category: executor. sink_to_directory Whether multi-partition sink operations should write to a directory rather than a single file. The ``spmd``/``ray``/``dask`` engines @@ -332,9 +327,6 @@ class StreamingOptions: dynamic_planning: dict[str, Any] | DynamicPlanningOptions | None | Unspecified = ( _opt("executor") ) - unique_fraction: dict[str, float] | Unspecified = _opt( - "executor", "CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION", json.loads - ) sink_to_directory: bool | Unspecified = _opt( "executor", "CUDF_POLARS__EXECUTOR__SINK_TO_DIRECTORY", parse_boolean ) @@ -515,7 +507,6 @@ def _get(attr: str) -> Any: broadcast_join_limit=_get("broadcast_join_limit"), target_partition_size=target_partition_size, dynamic_planning=dynamic_planning, - unique_fraction=_get("unique_fraction"), raise_on_fail=_get("raise_on_fail"), parquet_options=_get("parquet_options"), memory_resource_config=_get("memory_resource_config"), @@ -711,15 +702,6 @@ def _add_cli_args(parser: argparse.ArgumentParser) -> None: Enable dynamic planning. Use --no-dynamic-planning to disable. Env: CUDF_POLARS__EXECUTOR__DYNAMIC_PLANNING. Built-in default: enabled."""), ) - g.add_argument( - "--unique-fraction", - dest="unique_fraction", - default=None, - type=json.loads, - help=textwrap.dedent("""\ - Per-column uniqueness estimate as a JSON object (e.g. '{"col": 0.5}'). - Env: CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION. Built-in default: {}."""), - ) g.add_argument( "--stream-policy", dest="stream_policy", diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py index 1ba92de3e49..efbb1db9ad4 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py @@ -90,13 +90,9 @@ def evaluate_pipeline_ray_mode( Raises ------ - RuntimeError - If the configured executor runtime is not ``"rapidsmpf"``. RuntimeError If ``config_options.executor.ray_context`` is not set. """ - if config_options.executor.runtime != "rapidsmpf": - raise RuntimeError("Runtime must be rapidsmpf") if config_options.executor.ray_context is None: raise RuntimeError("ray_context must be set when cluster='ray'") rank_actors = config_options.executor.ray_context.rank_actors @@ -586,7 +582,6 @@ def __init__( nranks=nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "ray", "ray_context": RayContext(rank_actors), }, @@ -641,7 +636,6 @@ def _reset( nranks=len(self._rank_actors), executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "ray", "ray_context": RayContext(self._rank_actors), }, diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py index 65e3eb8b1e7..7e1bde808cd 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py @@ -23,12 +23,11 @@ from rapidsmpf.statistics import Statistics from rapidsmpf.streaming.core.context import Context -import polars as pl - import pylibcudf as plc import rmm.mr from pylibcudf.contiguous_split import pack +from cudf_polars.containers import DataFrame, DataType from cudf_polars.experimental.rapidsmpf.collectives.common import reserve_op_id from cudf_polars.experimental.rapidsmpf.frontend.core import ( ClusterInfo, @@ -53,6 +52,8 @@ from rapidsmpf.config import Options from rapidsmpf.streaming.cudf.channel_metadata import ChannelMetadata + import polars as pl + from cudf_polars.dsl.ir import IR from cudf_polars.experimental.parallel import ConfigOptions from cudf_polars.experimental.rapidsmpf.frontend.core import T @@ -98,8 +99,6 @@ def evaluate_pipeline_spmd_mode( The concatenated output DataFrame and, if ``collect_metadata`` is True, the list of channel metadata objects; otherwise ``None``. """ - if config_options.executor.runtime != "rapidsmpf": - raise RuntimeError("Runtime must be rapidsmpf") if config_options.executor.spmd_context is None: raise RuntimeError("spmd_context must be set for SPMD mode") comm = config_options.executor.spmd_context.comm @@ -155,8 +154,9 @@ def allgather_polars_dataframe( ctx = engine.context stream = ctx.get_stream_from_pool() col_names = local_df.columns + dtypes = [DataType(dtype) for dtype in local_df.dtypes] - plc_table = plc.Table.from_arrow(local_df.to_arrow()) + plc_table = plc.Table.from_arrow(local_df, stream=stream) packed_data = PackedData.from_cudf_packed_columns( pack(plc_table, stream), @@ -176,9 +176,12 @@ def allgather_polars_dataframe( plc_result = unpack_and_concat(results, stream, ctx.br()) # pylibcudf Table -> pl.DataFrame (restore column names) - ret = pl.from_arrow(plc_result.to_arrow(col_names)) - assert isinstance(ret, pl.DataFrame) - return ret + return DataFrame.from_table( + plc_result, + col_names, + dtypes, + stream, + ).to_polars() class SPMDEngine(StreamingEngine): @@ -389,7 +392,6 @@ def __init__( nranks=comm.nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "spmd", "spmd_context": SPMDContext( comm=comm, context=ctx, py_executor=self._py_executor @@ -494,7 +496,6 @@ def _reset( nranks=self._comm.nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "spmd", "spmd_context": SPMDContext( comm=self._comm, diff --git a/python/cudf_polars/cudf_polars/experimental/repartition.py b/python/cudf_polars/cudf_polars/experimental/repartition.py index 92d89a5f44c..84c39d930ca 100644 --- a/python/cudf_polars/cudf_polars/experimental/repartition.py +++ b/python/cudf_polars/cudf_polars/experimental/repartition.py @@ -4,20 +4,11 @@ from __future__ import annotations -import itertools -from functools import partial -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING from cudf_polars.dsl.ir import IR -from cudf_polars.experimental.base import get_key_name -from cudf_polars.experimental.dispatch import generate_ir_tasks -from cudf_polars.experimental.utils import _concat if TYPE_CHECKING: - from collections.abc import MutableMapping - - from cudf_polars.dsl.ir import IRExecutionContext - from cudf_polars.experimental.parallel import PartitionInfo from cudf_polars.typing import Schema @@ -43,35 +34,3 @@ def __init__(self, schema: Schema, df: IR): self.schema = schema self._non_child_args = () self.children = (df,) - - -@generate_ir_tasks.register(Repartition) -def _( - ir: Repartition, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - # Repartition an IR node. - # Only supports rapartitioning to fewer (for now). - - (child,) = ir.children - count_in = partition_info[child].count - count_out = partition_info[ir].count - - if count_out > count_in: # pragma: no cover - raise NotImplementedError( - f"Repartition {count_in} -> {count_out} not supported." - ) - - key_name = get_key_name(ir) - n, remainder = divmod(count_in, count_out) - # Spread remainder evenly over the partitions. - offsets = [0, *itertools.accumulate(n + (i < remainder) for i in range(count_out))] - child_keys = tuple(partition_info[child].keys(child)) - return { - (key_name, i): ( - partial(_concat, context=context), - *child_keys[offsets[i] : offsets[i + 1]], - ) - for i in range(count_out) - } diff --git a/python/cudf_polars/cudf_polars/experimental/scheduler.py b/python/cudf_polars/cudf_polars/experimental/scheduler.py deleted file mode 100644 index 97eae6ab378..00000000000 --- a/python/cudf_polars/cudf_polars/experimental/scheduler.py +++ /dev/null @@ -1,153 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 -"""Synchronous task scheduler.""" - -from __future__ import annotations - -from collections import Counter -from collections.abc import MutableMapping -from itertools import chain -from typing import TYPE_CHECKING, Any, TypeVar, Unpack - -if TYPE_CHECKING: - from collections.abc import Mapping - from typing import TypeAlias - - -Key: TypeAlias = str | tuple[str, Unpack[tuple[int, ...]]] -Graph: TypeAlias = MutableMapping[Key, Any] -T_ = TypeVar("T_") - - -# NOTE: This is a slimmed-down version of the single-threaded -# (synchronous) scheduler in `dask.core`. -# -# Key Differences: -# * We do not allow a task to contain a list of key names. -# Keys must be distinct elements of the task. -# * We do not support nested tasks. - - -def istask(x: Any) -> bool: - """Check if x is a callable task.""" - return isinstance(x, tuple) and bool(x) and callable(x[0]) - - -def is_hashable(x: Any) -> bool: - """Check if x is hashable.""" - try: - hash(x) - except BaseException: - return False - else: - return True - - -def _execute_task(arg: Any, cache: Mapping) -> Any: - """Execute a compute task.""" - if istask(arg): - return arg[0](*(_execute_task(a, cache) for a in arg[1:])) - elif is_hashable(arg): - return cache.get(arg, arg) - else: - return arg - - -def required_keys(key: Key, graph: Graph) -> list[Key]: - """ - Return the dependencies to extract a key from the graph. - - Parameters - ---------- - key - Root key we want to extract. - graph - The full task graph. - - Returns - ------- - List of other keys needed to extract ``key``. - """ - maybe_task = graph[key] - return [ - k - for k in ( - maybe_task[1:] - if istask(maybe_task) - else [maybe_task] # maybe_task might be a key - ) - if is_hashable(k) and k in graph - ] - - -def toposort(graph: Graph, dependencies: Mapping[Key, list[Key]]) -> list[Key]: - """Return a list of task keys sorted in topological order.""" - # Stack-based depth-first search traversal. This is based on Tarjan's - # algorithm for strongly-connected components - # (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) - ordered: list[Key] = [] - completed: set[Key] = set() - - for key in graph: - if key in completed: - continue - nodes = [key] - while nodes: - # Keep current node on the stack until all descendants are visited - current = nodes[-1] - if current in completed: # pragma: no cover - # Already fully traversed descendants of current - nodes.pop() - continue - - # Add direct descendants of current to nodes stack - next_nodes = set(dependencies[current]) - completed - if next_nodes: - nodes.extend(next_nodes) - else: - # Current has no more descendants to explore - ordered.append(current) - completed.add(current) - nodes.pop() - - return ordered - - -def synchronous_scheduler( - graph: Graph, - key: Key, - *, - cache: MutableMapping | None = None, -) -> Any: - """ - Execute the task graph for a given key. - - Parameters - ---------- - graph - The task graph to execute. - key - The final output key to extract from the graph. - cache - Intermediate-data cache. - - Returns - ------- - Executed task-graph result for ``key``. - """ - if key not in graph: # pragma: no cover - raise KeyError(f"{key} is not a key in the graph") - if cache is None: - cache = {} - - dependencies = {k: required_keys(k, graph) for k in graph} - refcount = Counter(chain.from_iterable(dependencies.values())) - - for k in toposort(graph, dependencies): - cache[k] = _execute_task(graph[k], cache) - for dep in dependencies[k]: - refcount[dep] -= 1 - if refcount[dep] == 0 and dep != key: - del cache[dep] - - return cache[key] diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py index 8e24dd83fe6..9381126775f 100644 --- a/python/cudf_polars/cudf_polars/experimental/shuffle.py +++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py @@ -4,112 +4,22 @@ from __future__ import annotations -import operator -from functools import partial -from typing import TYPE_CHECKING, Any, Concatenate, TypeVar, TypedDict +from typing import TYPE_CHECKING -import pylibcudf as plc -from rmm.pylibrmm.stream import DEFAULT_STREAM - -from cudf_polars.containers import DataFrame -from cudf_polars.dsl.expr import Col from cudf_polars.dsl.ir import IR -from cudf_polars.dsl.tracing import log_do_evaluate, nvtx_annotate_cudf_polars -from cudf_polars.experimental.base import get_key_name -from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node -from cudf_polars.experimental.utils import _concat, _dynamic_planning_on -from cudf_polars.utils.cuda_stream import get_dask_cuda_stream +from cudf_polars.dsl.tracing import log_do_evaluate +from cudf_polars.experimental.dispatch import lower_ir_node +from cudf_polars.experimental.utils import _dynamic_planning_on if TYPE_CHECKING: - from collections.abc import Callable, MutableMapping, Sequence + from collections.abc import MutableMapping - from cudf_polars.containers import DataType + from cudf_polars.containers import DataFrame from cudf_polars.dsl.expr import NamedExpr from cudf_polars.dsl.ir import IRExecutionContext from cudf_polars.experimental.dispatch import LowerIRTransformer from cudf_polars.experimental.parallel import PartitionInfo from cudf_polars.typing import Schema - from cudf_polars.utils.config import ShuffleMethod - - -# Supported shuffle methods -_SHUFFLE_METHODS = ("rapidsmpf", "tasks") - - -class ShuffleOptions(TypedDict): - """RapidsMPF shuffling options.""" - - on: Sequence[str] - column_names: Sequence[str] - dtypes: Sequence[DataType] - - -# Experimental rapidsmpf shuffler integration -class RMPFIntegration: # pragma: no cover - """cuDF-Polars protocol for rapidsmpf shuffler.""" - - @staticmethod - @nvtx_annotate_cudf_polars(message="RMPFIntegration.insert_partition") - def insert_partition( - df: DataFrame, - partition_id: int, # Not currently used - partition_count: int, - shuffler: Any, - options: ShuffleOptions, - *other: Any, - ) -> None: - """Add cudf-polars DataFrame chunks to an RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import partition_and_pack - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - on = options["on"] - assert not other, f"Unexpected arguments: {other}" - columns_to_hash = tuple(df.column_names.index(val) for val in on) - packed_inputs = partition_and_pack( - df.table, - columns_to_hash=columns_to_hash, - num_partitions=partition_count, - br=context.br, - stream=DEFAULT_STREAM, - ) - - shuffler.insert_chunks(packed_inputs) - - @staticmethod - @nvtx_annotate_cudf_polars(message="RMPFIntegration.extract_partition") - def extract_partition( - partition_id: int, - shuffler: Any, - options: ShuffleOptions, - ) -> DataFrame: - """Extract a finished partition from the RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import ( - unpack_and_concat, - unspill_partitions, - ) - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - shuffler.wait() - column_names = options["column_names"] - dtypes = options["dtypes"] - return DataFrame.from_table( - unpack_and_concat( - unspill_partitions( - shuffler.extract(partition_id), - br=context.br, - allow_overbooking=True, - ), - br=context.br, - stream=DEFAULT_STREAM, - ), - column_names, - dtypes, - get_dask_cuda_stream(), - ) class Shuffle(IR): @@ -118,29 +28,27 @@ class Shuffle(IR): Notes ----- - Only hash-based partitioning is supported (for now). See - `ShuffleSorted` for sorting-based shuffling. + Only hash-based partitioning is supported (for now). """ - __slots__ = ("keys", "shuffle_method") - _non_child = ("schema", "keys", "shuffle_method") - _n_non_child_args = 3 + __slots__ = ("keys",) + _non_child = ( + "schema", + "keys", + ) + _n_non_child_args = 2 keys: tuple[NamedExpr, ...] """Keys to shuffle on.""" - shuffle_method: ShuffleMethod - """Shuffle method to use.""" def __init__( self, schema: Schema, keys: tuple[NamedExpr, ...], - shuffle_method: ShuffleMethod, df: IR, ): self.schema = schema self.keys = keys - self.shuffle_method = shuffle_method - self._non_child_args = (schema, keys, shuffle_method) + self._non_child_args = (schema, keys) self.children = (df,) # the type-ignore is for @@ -153,7 +61,6 @@ def do_evaluate( cls, schema: Schema, keys: tuple[NamedExpr, ...], - shuffle_method: ShuffleMethod, df: DataFrame, *, context: IRExecutionContext, @@ -163,120 +70,6 @@ def do_evaluate( return df -@nvtx_annotate_cudf_polars(message="Shuffle") -def _hash_partition_dataframe( - df: DataFrame, - partition_id: int, # Used only by sorted shuffling - partition_count: int, - options: MutableMapping[str, Any] | None, # No options required - on: tuple[NamedExpr, ...], -) -> dict[int, DataFrame]: - """ - Partition an input DataFrame for hash-based shuffling. - - Parameters - ---------- - df - DataFrame to partition. - partition_id - Partition index (unused for hash partitioning). - partition_count - Total number of output partitions. - options - Options (unused for hash partitioning). - on - Expressions used for the hash partitioning. - - Returns - ------- - A dictionary mapping between int partition indices and - DataFrame fragments. - """ - assert not options, f"Expected no options, got: {options}" - - if df.num_rows == 0: - # Fast path for empty DataFrame - return dict.fromkeys(range(partition_count), df) - - # Hash the specified keys to calculate the output - # partition for each row - partition_map = plc.binaryop.binary_operation( - plc.hashing.murmurhash3_x86_32( - DataFrame([expr.evaluate(df) for expr in on], stream=df.stream).table, - stream=df.stream, - ), - plc.Scalar.from_py( - partition_count, plc.DataType(plc.TypeId.UINT32), stream=df.stream - ), - plc.binaryop.BinaryOperator.PYMOD, - plc.types.DataType(plc.types.TypeId.UINT32), - stream=df.stream, - ) - - # Apply partitioning - t, offsets = plc.partitioning.partition( - df.table, - partition_map, - partition_count, - stream=df.stream, - ) - splits = offsets[1:-1] - - # Split and return the partitioned result - return { - i: DataFrame.from_table( - split, - df.column_names, - df.dtypes, - df.stream, - ) - for i, split in enumerate(plc.copying.split(t, splits, stream=df.stream)) - } - - -# When dropping Python 3.10, can use _simple_shuffle_graph[OPT_T](...) -OPT_T = TypeVar("OPT_T") - - -def _simple_shuffle_graph( - name_in: str, - name_out: str, - count_in: int, - count_out: int, - _partition_dataframe_func: Callable[ - Concatenate[DataFrame, int, int, OPT_T, ...], - MutableMapping[int, DataFrame], - ], - options: OPT_T, - *other: Any, - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """Make a simple all-to-all shuffle graph.""" - split_name = f"split-{name_out}" - inter_name = f"inter-{name_out}" - - graph: MutableMapping[Any, Any] = {} - for part_out in range(count_out): - _concat_list = [] - for part_in in range(count_in): - graph[(split_name, part_in)] = ( - _partition_dataframe_func, - (name_in, part_in), - part_in, - count_out, - options, - *other, - ) - _concat_list.append((inter_name, part_out, part_in)) - graph[_concat_list[-1]] = ( - operator.getitem, - (split_name, part_in), - part_out, - ) - graph[(name_out, part_out)] = (partial(_concat, context=context), *_concat_list) - return graph - - @lower_ir_node.register(Shuffle) def _( ir: Shuffle, rec: LowerIRTransformer @@ -306,47 +99,3 @@ def _( partitioned_on=ir.keys, ) return new_node, pi - - -@generate_ir_tasks.register(Shuffle) -def _( - ir: Shuffle, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - # Extract "shuffle_method" configuration - shuffle_method = ir.shuffle_method - - # Try using rapidsmpf shuffler if we have "simple" shuffle - # keys, and the "shuffle_method" config is set to "rapidsmpf-single". - _keys: list[Col] - if shuffle_method == "rapidsmpf-single" and len( - _keys := [ne.value for ne in ir.keys if isinstance(ne.value, Col)] - ) == len(ir.keys): # pragma: no cover - from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph - - shuffle_on = [k.name for k in _keys] - - return rapidsmpf_shuffle_graph( - get_key_name(ir.children[0]), - get_key_name(ir), - partition_info[ir.children[0]].count, - partition_info[ir].count, - RMPFIntegration, - { - "on": shuffle_on, - "column_names": list(ir.schema.keys()), - "dtypes": list(ir.schema.values()), - }, - ) - - # Simple task-based fall-back - return partial(_simple_shuffle_graph, context=context)( - get_key_name(ir.children[0]), - get_key_name(ir), - partition_info[ir.children[0]].count, - partition_info[ir].count, - _hash_partition_dataframe, - None, - ir.keys, - ) diff --git a/python/cudf_polars/cudf_polars/experimental/sort.py b/python/cudf_polars/cudf_polars/experimental/sort.py index 6800fb4ab74..fa610324c2d 100644 --- a/python/cudf_polars/cudf_polars/experimental/sort.py +++ b/python/cudf_polars/cudf_polars/experimental/sort.py @@ -4,47 +4,30 @@ from __future__ import annotations -from functools import partial -from typing import TYPE_CHECKING, Any, TypedDict +from typing import TYPE_CHECKING import polars as pl import pylibcudf as plc -from rmm.pylibrmm.stream import DEFAULT_STREAM from cudf_polars.containers import Column, DataFrame, DataType from cudf_polars.dsl.expr import Col -from cudf_polars.dsl.ir import IR, Slice, Sort +from cudf_polars.dsl.ir import Slice, Sort from cudf_polars.dsl.traversal import traversal from cudf_polars.dsl.utils.naming import unique_names -from cudf_polars.experimental.base import PartitionInfo, get_key_name -from cudf_polars.experimental.dispatch import ( - generate_ir_tasks, - lower_ir_node, -) -from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.shuffle import _simple_shuffle_graph +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.experimental.utils import ( - _concat, - _fallback_inform, _lower_ir_fallback, ) -from cudf_polars.utils.config import ShuffleMethod -from cudf_polars.utils.cuda_stream import ( - get_dask_cuda_stream, - get_joined_cuda_stream, - join_cuda_streams, -) if TYPE_CHECKING: from collections.abc import MutableMapping, Sequence from rmm.pylibrmm.stream import Stream - from cudf_polars.dsl.expr import NamedExpr - from cudf_polars.dsl.ir import IRExecutionContext + from cudf_polars.dsl.ir import IR + from cudf_polars.experimental.base import PartitionInfo from cudf_polars.experimental.dispatch import LowerIRTransformer - from cudf_polars.typing import Schema def find_sort_splits( @@ -251,248 +234,6 @@ def _get_final_sort_boundaries( ) -def _sort_boundaries_graph( - name_in: str, - by: Sequence[str], - column_order: Sequence[plc.types.Order], - null_order: Sequence[plc.types.NullOrder], - count: int, - context: IRExecutionContext, -) -> tuple[str, MutableMapping[Any, Any]]: - """Graph to get the boundaries from all partitions.""" - local_boundaries_name = f"sort-boundaries_local-{name_in}" - concat_boundaries_name = f"sort-boundaries-concat-{name_in}" - global_boundaries_name = f"sort-boundaries-{name_in}" - graph: MutableMapping[Any, Any] = {} - - _concat_list = [] - for part_id in range(count): - graph[(local_boundaries_name, part_id)] = ( - _select_local_split_candidates, - (name_in, part_id), - by, - count, - part_id, - ) - _concat_list.append((local_boundaries_name, part_id)) - - graph[concat_boundaries_name] = (partial(_concat, context=context), *_concat_list) - graph[global_boundaries_name] = ( - _get_final_sort_boundaries, - concat_boundaries_name, - column_order, - null_order, - count, - ) - return global_boundaries_name, graph - - -class SortedShuffleOptions(TypedDict): - """RapidsMPF shuffling options.""" - - by: Sequence[str] - order: Sequence[plc.types.Order] - null_order: Sequence[plc.types.NullOrder] - column_names: Sequence[str] - column_dtypes: Sequence[DataType] - - -# Experimental rapidsmpf shuffler integration -class RMPFIntegrationSortedShuffle: # pragma: no cover - """cuDF-Polars protocol for rapidsmpf shuffler.""" - - @staticmethod - def insert_partition( - df: DataFrame, - partition_id: int, - partition_count: int, - shuffler: Any, - options: SortedShuffleOptions, - sort_boundaries: DataFrame, - ) -> None: - """Add cudf-polars DataFrame chunks to an RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import split_and_pack - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - by = options["by"] - data_streams = [ - df.stream, - sort_boundaries.stream, - ] - stream = get_joined_cuda_stream(get_dask_cuda_stream, upstreams=data_streams) - - splits = find_sort_splits( - df.select(by).table, - sort_boundaries.table, - partition_id, - options["order"], - options["null_order"], - stream=stream, - ) - packed_inputs = split_and_pack( - df.table, - splits=splits, - br=context.br, - stream=stream, - ) - # TODO: figure out handoff with rapidsmpf - # https://github.com/rapidsai/cudf/issues/20337 - shuffler.insert_chunks(packed_inputs) - - join_cuda_streams(downstreams=data_streams, upstreams=[stream]) - - @staticmethod - def extract_partition( - partition_id: int, - shuffler: Any, - options: SortedShuffleOptions, - ) -> DataFrame: - """Extract a finished partition from the RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import ( - unpack_and_concat, - unspill_partitions, - ) - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - shuffler.wait() - column_names = options["column_names"] - column_dtypes = options["column_dtypes"] - - stream = DEFAULT_STREAM - - # TODO: When sorting, this step should finalize with a merge (unless we - # require stability, as cudf merge is not stable). - # TODO: figure out handoff with rapidsmpf - # https://github.com/rapidsai/cudf/issues/20337 - return DataFrame.from_table( - unpack_and_concat( - unspill_partitions( - shuffler.extract(partition_id), - br=context.br, - allow_overbooking=True, - ), - br=context.br, - stream=stream, - ), - column_names, - column_dtypes, - stream=stream, - ) - - -def _sort_partition_dataframe( - df: DataFrame, - partition_id: int, # Not currently used - partition_count: int, - options: MutableMapping[str, Any], - sort_boundaries: DataFrame, -) -> MutableMapping[int, DataFrame]: - """ - Partition a sorted DataFrame for shuffling. - - Parameters - ---------- - df - The DataFrame to partition. - partition_id - The partition id of the current partition. - partition_count - The total number of partitions. - options - The sort options ``(by, order, null_order)``. - sort_boundaries - The global sort boundary candidates used to decide where to split. - """ - if df.num_rows == 0: # pragma: no cover - # Fast path for empty DataFrame - return dict.fromkeys(range(partition_count), df) - - stream = get_joined_cuda_stream( - get_dask_cuda_stream, upstreams=(df.stream, sort_boundaries.stream) - ) - - splits = find_sort_splits( - df.select(options["by"]).table, - sort_boundaries.table, - partition_id, - options["order"], - options["null_order"], - stream=stream, - ) - - # Split and return the partitioned result - return { - i: DataFrame.from_table( - split, - df.column_names, - df.dtypes, - stream=df.stream, - ) - for i, split in enumerate(plc.copying.split(df.table, splits, stream=stream)) - } - - -class ShuffleSorted(IR): - """ - Shuffle already locally sorted multi-partition data. - - Shuffling is performed by extracting sort boundary candidates from all partitions, - sharing them all-to-all and then exchanging data accordingly. - The sorting information is required to be passed in identically to the already - performed local sort and as of now the final result needs to be sorted again to - merge the partitions. - """ - - __slots__ = ("by", "null_order", "order", "shuffle_method") - _non_child = ("schema", "by", "order", "null_order", "shuffle_method") - _n_non_child_args = 5 - by: tuple[NamedExpr, ...] - """Keys by which the data was sorted.""" - order: tuple[plc.types.Order, ...] - """Sort order if sorted.""" - null_order: tuple[plc.types.NullOrder, ...] - """Null precedence if sorted.""" - shuffle_method: ShuffleMethod - """Shuffle method to use.""" - - def __init__( - self, - schema: Schema, - by: tuple[NamedExpr, ...], - order: tuple[plc.types.Order, ...], - null_order: tuple[plc.types.NullOrder, ...], - shuffle_method: ShuffleMethod, - df: IR, - ): - self.schema = schema - self.by = by - self.order = order - self.null_order = null_order - self.shuffle_method = shuffle_method - self._non_child_args = (schema, by, order, null_order, shuffle_method) - self.children = (df,) - - @classmethod - def do_evaluate( - cls, - schema: Schema, - by: tuple[NamedExpr, ...], - order: tuple[plc.types.Order, ...], - null_order: tuple[plc.types.NullOrder, ...], - shuffle_method: ShuffleMethod, - df: DataFrame, - *, - context: IRExecutionContext, - ) -> DataFrame: # pragma: no cover - """Evaluate and return a dataframe.""" - # Single-partition ShuffleSorted evaluation is a no-op - return df - - def _has_simple_zlice(zlice: tuple[int, int | None] | None) -> bool: """Check if a zlice is a simple top-k/bottom-k operation.""" if zlice is None: @@ -517,26 +258,7 @@ def _( msg="sort currently only supports column names as `by` keys.", ) - config_options = rec.state["config_options"] - executor = config_options.executor - runtime = executor.runtime - - # Special handling for slicing - # (May be a top- or bottom-k operation) - simple_zlice = _has_simple_zlice(ir.zlice) - if simple_zlice and runtime == "tasks": - from cudf_polars.experimental.parallel import _lower_ir_pwise - - new_node, partition_info = _lower_ir_pwise(ir, rec) - if partition_info[new_node].count > 1: - # Collapse down to single partition - inter = Repartition(new_node.schema, new_node) - partition_info[inter] = PartitionInfo(count=1) - # Sort reduced partition - new_node = ir.reconstruct([inter]) - partition_info[new_node] = PartitionInfo(count=1) - return new_node, partition_info - elif ir.zlice is not None and not simple_zlice: + if ir.zlice is not None and not _has_simple_zlice(ir.zlice): # Pull "complex" slices out of the Sort node altogether. return rec( Slice( @@ -557,112 +279,6 @@ def _( # Extract child partitioning child, partition_info = rec(ir.children[0]) - # The "rapidsmpf" runtime uses the sort_actor to handle everything else - if runtime == "rapidsmpf": - sort_node = ir.reconstruct([child]) - partition_info[sort_node] = partition_info[child] - return sort_node, partition_info - - # TODO: Remove everything below here when "tasks" is removed. - - # Avoid rapidsmpf shuffle with maintain_order=True (for now) - shuffle_method = ( - ShuffleMethod("tasks") if ir.stable else config_options.executor.shuffle_method - ) - if ( - shuffle_method != config_options.executor.shuffle_method - ): # pragma: no cover; Requires rapidsmpf - _fallback_inform( - f"shuffle_method={config_options.executor.shuffle_method} does not support maintain_order=True. " - f"Falling back to shuffle_method={shuffle_method}.", - config_options, - ) - - if partition_info[child].count == 1: - single_part_node = ir.reconstruct([child]) - partition_info[single_part_node] = partition_info[child] - return single_part_node, partition_info - - local_sort_node = ir.reconstruct([child]) - partition_info[local_sort_node] = partition_info[child] - - shuffle = ShuffleSorted( - ir.schema, - ir.by, - ir.order, - ir.null_order, - shuffle_method, - local_sort_node, - ) - partition_info[shuffle] = partition_info[child] - - # We sort again locally. - assert ir.zlice is None # zlice handling would be incorrect without adjustment - final_sort_node = ir.reconstruct([shuffle]) - partition_info[final_sort_node] = partition_info[shuffle] - - return final_sort_node, partition_info - - -@generate_ir_tasks.register(ShuffleSorted) -def _( - ir: ShuffleSorted, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - by = [ne.value.name for ne in ir.by if isinstance(ne.value, Col)] - if len(by) != len(ir.by): # pragma: no cover - # We should not reach here as this is checked in the lower_ir_node - raise NotImplementedError("Sorting columns must be column names.") - - (child,) = ir.children - - sort_boundaries_name, graph = _sort_boundaries_graph( - get_key_name(child), - by, - ir.order, - ir.null_order, - partition_info[child].count, - context, - ) - - options = { - "by": by, - "order": ir.order, - "null_order": ir.null_order, - "column_names": list(ir.schema.keys()), - "column_dtypes": list(ir.schema.values()), - } - - # Try using rapidsmpf shuffler if we have "simple" shuffle - # keys, and the "shuffle_method" config is set to "rapidsmpf-single". - shuffle_method = ir.shuffle_method - if shuffle_method == "rapidsmpf-single": # pragma: no cover - from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph - - graph.update( - rapidsmpf_shuffle_graph( - get_key_name(child), - get_key_name(ir), - partition_info[child].count, - partition_info[ir].count, - RMPFIntegrationSortedShuffle, - options, - sort_boundaries_name, - ) - ) - return graph - - # Simple task-based fall-back - graph.update( - partial(_simple_shuffle_graph, context=context)( - get_key_name(child), - get_key_name(ir), - partition_info[child].count, - partition_info[ir].count, - _sort_partition_dataframe, - options, - sort_boundaries_name, - ) - ) - return graph + sort_node = ir.reconstruct([child]) + partition_info[sort_node] = partition_info[child] + return sort_node, partition_info diff --git a/python/cudf_polars/cudf_polars/experimental/utils.py b/python/cudf_polars/cudf_polars/experimental/utils.py index 24ce606d41b..848a4d44759 100644 --- a/python/cudf_polars/cudf_polars/experimental/utils.py +++ b/python/cudf_polars/cudf_polars/experimental/utils.py @@ -10,7 +10,7 @@ from itertools import chain from typing import TYPE_CHECKING -from cudf_polars.dsl.expr import Col, Expr, GroupedWindow, UnaryFunction +from cudf_polars.dsl.expr import Col, GroupedWindow, UnaryFunction from cudf_polars.dsl.ir import Union from cudf_polars.dsl.traversal import traversal from cudf_polars.experimental.base import PartitionInfo @@ -49,11 +49,8 @@ def _fallback_inform( def _dynamic_planning_on(config_options: ConfigOptions[StreamingExecutor]) -> bool: - """Check if dynamic planning is enabled for rapidsmpf runtime.""" - return ( - config_options.executor.runtime == "rapidsmpf" - and config_options.executor.dynamic_planning is not None - ) + """Check if dynamic planning is enabled.""" + return config_options.executor.dynamic_planning is not None def _lower_ir_fallback( @@ -68,9 +65,6 @@ def _lower_ir_fallback( from cudf_polars.experimental.repartition import Repartition from cudf_polars.experimental.select import _inline_hstack_false - config_options = rec.state["config_options"] - rapidsmpf_engine = config_options.executor.runtime == "rapidsmpf" - # Make sure we avoid mixed-length columns in intermediate TableChunks. ir = _inline_hstack_false(ir) @@ -82,13 +76,10 @@ def _lower_ir_fallback( children = [] inform = False for c in lowered_children: - child = c - if multi_partitioned := partition_info[c].count > 1: + if partition_info[c].count > 1: inform = True - if multi_partitioned or rapidsmpf_engine: - # Fall-back logic - child = Repartition(child.schema, child) - partition_info[child] = PartitionInfo(count=1) + child = Repartition(c.schema, c) + partition_info[child] = PartitionInfo(count=1) children.append(child) if inform and msg: @@ -114,32 +105,6 @@ def _leaf_column_names(expr: Expr) -> tuple[str, ...]: return () -def _get_unique_fractions( - column_names: Sequence[str], - user_unique_fractions: dict[str, float], -) -> dict[str, float]: - """ - Return unique-fraction statistics subset. - - Parameters - ---------- - column_names - The column names to get unique-fractions for. - user_unique_fractions - The user-provided unique-fraction dictionary. - - Returns - ------- - unique_fractions - The final unique-fraction dictionary filtered to column_names. - """ - return { - c: max(min(f, 1.0), 0.00001) - for c, f in user_unique_fractions.items() - if c in column_names - } - - def _contains_over(exprs: Sequence[Expr]) -> bool: """Return True if any expression contains a window expression.""" return any(isinstance(e, GroupedWindow) for e in traversal(exprs)) diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index 9f0953cd4df..5611f8c3e70 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -30,7 +30,6 @@ # Will be overriden by `conftest.py` with the value from the `--executor` # and `--cluster` command-line arguments DEFAULT_EXECUTOR = "in-memory" -DEFAULT_RUNTIME = "tasks" DEFAULT_CLUSTER = "single" @@ -200,7 +199,6 @@ def get_default_engine( executor = executor or DEFAULT_EXECUTOR if executor == "streaming": executor_options["cluster"] = DEFAULT_CLUSTER - executor_options["runtime"] = DEFAULT_RUNTIME return GPUEngine( raise_on_fail=True, @@ -290,7 +288,8 @@ def assert_collect_raises( if polars_except != (): raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}") - engine = GPUEngine(raise_on_fail=True) + # TODO: https://github.com/rapidsai/cudf/issues/22346 + engine = GPUEngine(executor="in-memory", raise_on_fail=True) try: lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload] except cudf_except: diff --git a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py index 6fe2de4d154..7cfb62c414e 100644 --- a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py +++ b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py @@ -30,6 +30,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: choices=("in-memory", "spmd"), help="Which GPU engine variant to inject globally.", ) + # TODO: We never run with --inject-gpu-engine-blocksize in ci/run_cudf_polars_polars_tests.sh. Remove? group.addoption( "--inject-gpu-engine-blocksize", action="store", @@ -134,6 +135,7 @@ def pytest_report_header(config: pytest.Config) -> str: return f"injected GPU engine: {cls.__module__}.{cls.__name__}" +# TODO: This is just Mapping[str, str]? EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = { "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU", "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning", @@ -305,7 +307,6 @@ def pytest_report_header(config: pytest.Config) -> str: # Generally skip for: # 1) Tests that are too slow with --inject-gpu-engine-blocksize=small due to many small partitions for large data -# 2) Tests that fail during cudf_polars execution and segfaults later due to https://github.com/rapidsai/cudf/issues/22138 STREAMING_ENGINE_TESTS_TO_SKIP: Mapping[str, str] = { "tests/unit/operations/aggregation/test_aggregations.py::test_boolean_aggs": "float difference in std/var in the unit of least precision", "tests/benchmark/test_group_by.py::test_groupby_h2oai_q1": "Too slow with --inject-gpu-engine-blocksize=small", diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py index a6bbd73929b..7b5fb5c940c 100644 --- a/python/cudf_polars/cudf_polars/utils/config.py +++ b/python/cudf_polars/cudf_polars/utils/config.py @@ -55,9 +55,7 @@ "InMemoryExecutor", "ParquetOptions", "RayContext", - "Runtime", "SPMDContext", - "ShuffleMethod", "StreamingExecutor", "StreamingFallbackMode", ] @@ -112,15 +110,6 @@ def get_total_device_memory() -> int | None: return None -@functools.cache -def rapidsmpf_single_available() -> bool: # pragma: no cover - """Query whether rapidsmpf is available as a single-process shuffle method.""" - try: - return importlib.util.find_spec("rapidsmpf.integrations.single") is not None - except (ImportError, ValueError): - return False - - class StreamingFallbackMode(enum.StrEnum): """ How the streaming executor handles operations that don't support multiple partitions. @@ -138,20 +127,6 @@ class StreamingFallbackMode(enum.StrEnum): SILENT = "silent" -class Runtime(enum.StrEnum): - """ - The runtime to use for the streaming executor. - - * ``Runtime.TASKS`` : Use the task-based runtime. - This is the default runtime. - * ``Runtime.RAPIDSMPF`` : Use the coroutine-based streaming runtime (rapidsmpf). - This runtime is experimental. - """ - - TASKS = "tasks" - RAPIDSMPF = "rapidsmpf" - - class Cluster(enum.StrEnum): """ The cluster configuration for the streaming executor. @@ -172,27 +147,6 @@ class Cluster(enum.StrEnum): DASK = "dask" -class ShuffleMethod(enum.StrEnum): - """ - The method to use for shuffling data between workers with the streaming executor. - - * ``ShuffleMethod.TASKS`` : Use the task-based shuffler. - * ``ShuffleMethod.RAPIDSMPF`` : Use the rapidsmpf shuffler. - * ``ShuffleMethod._RAPIDSMPF_SINGLE`` : Use the single-process rapidsmpf shuffler. - - With :class:`cudf_polars.utils.config.StreamingExecutor`, the default of ``None`` - resolves to ``ShuffleMethod.TASKS``. - - The user should **not** specify ``ShuffleMethod._RAPIDSMPF_SINGLE`` directly. - A setting of ``ShuffleMethod.RAPIDSMPF`` will be converted to the single-process - shuffler automatically when using single-GPU execution. - """ - - TASKS = "tasks" - RAPIDSMPF = "rapidsmpf" - _RAPIDSMPF_SINGLE = "rapidsmpf-single" - - T = TypeVar("T") @@ -254,7 +208,7 @@ class ParquetOptions: will also be skipped if ``max_footer_samples`` is 0. use_rapidsmpf_native Whether to use the native rapidsmpf node for parquet reading. - This option is only used when the rapidsmpf runtime is enabled. + This option is only used by the streaming executor. Default is False. """ @@ -315,49 +269,32 @@ def __post_init__(self) -> None: # noqa: D105 raise TypeError("use_rapidsmpf_native must be a bool") -def default_target_partition_size(cluster: str, runtime: str) -> int: +@functools.cache +def default_target_partition_size() -> int: """Return the default blocksize.""" if (device_size := get_total_device_memory()) is None: # pragma: no cover # System doesn't have proper "GPU memory". # Fall back to a conservative 1GB default. return 1_000_000_000 - if ( - cluster == "single" - and runtime == "tasks" - and _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1 - ): - # We can use a larger blocksize when UVM is enabled - blocksize = int(device_size * 0.0625) - else: - # Otherwise, use a conservative default - blocksize = int(device_size * 0.025) + blocksize = int(device_size * 0.025) # Use lower and upper bounds of 1GB and 10GB return min(max(blocksize, 1_000_000_000), 10_000_000_000) -def default_broadcast_join_limit(cluster: str, runtime: str) -> int: +@functools.cache +def default_broadcast_join_limit() -> int: """Return the default broadcast join limit.""" if (device_size := get_total_device_memory()) is None: # pragma: no cover # System doesn't have proper "GPU memory". # We probably want to broadcast in most cases. return 32 - if runtime == "rapidsmpf": - # Target about 12.5% of the device memory when - # default_target_partition_size is used to set the - # target partition size (i.e. 5x the 2.5% default). - return min(5, int(max(1, (device_size * 0.125) // 1e9))) - elif _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1: - # The "tasks" runtime always runs single-GPU; we can lean on UVM - # to support most broadcast joins. - return 32 - else: - # Extra-conservative default for the "tasks" runtime without UVM. - # We cannot spill outside a rapidsmpf shuffle within this runtime, - # so shuffling is usually preferred. - return 2 + # Target about 12.5% of the device memory when + # default_target_partition_size is used to set the + # target partition size (i.e. 5x the 2.5% default). + return min(5, int(max(1, (device_size * 0.125) // 1e9))) @dataclasses.dataclass(frozen=True) @@ -599,17 +536,14 @@ class StreamingExecutor: Parameters ---------- - runtime - The runtime to use for the streaming executor. - ``Runtime.TASKS`` by default. cluster The cluster configuration for the streaming executor. ``Cluster.SINGLE`` by default. * ``Cluster.SINGLE``: Single-GPU execution - * ``Cluster.SPMD``: Multi-GPU SPMD execution (rapidsmpf runtime) - * ``Cluster.RAY``: Multi-GPU Ray execution (rapidsmpf runtime) - * ``Cluster.DASK``: Multi-GPU Dask execution (rapidsmpf runtime) + * ``Cluster.SPMD``: Multi-GPU SPMD execution + * ``Cluster.RAY``: Multi-GPU Ray execution + * ``Cluster.DASK``: Multi-GPU Dask execution fallback_mode How to handle errors when the GPU engine fails to execute a query. @@ -621,13 +555,6 @@ class StreamingExecutor: The maximum number of rows to process per partition. 1_000_000 by default. When the number of rows exceeds this value, the query will be split into multiple partitions and executed in parallel. - unique_fraction - A dictionary mapping column names to floats between 0 and 1 (inclusive - on the right). - - Each factor estimates the fractional number of unique values in the - column. By default, ``1.0`` is used for any column not included in - ``unique_fraction``. target_partition_size Target partition size, in bytes, for IO tasks. This configuration currently controls how large parquet files are split into multiple partitions. @@ -639,11 +566,8 @@ class StreamingExecutor: - keyword argument to ``polars.GPUEngine`` - the ``CUDF_POLARS__EXECUTOR__TARGET_PARTITION_SIZE`` environment variable - By default, cudf-polars uses a target partition size that's a fraction - of the device memory, where the fraction depends on the cluster and runtime: - - - rapidsmpf runtime: 1/40th of the device memory - - single cluster and tasks runtime: 1/16th of the device memory + By default, cudf-polars uses a target partition size of 1/40th of the + device memory. The pynvml library is used to query the total device memory on the first visible GPU. If the device size is not available, the default target @@ -651,26 +575,14 @@ class StreamingExecutor: NOTE: If this configuration is changed manually, it is recommended to set `broadcast_join_limit` manually as well. - groupby_n_ary - The factor by which the number of partitions is decreased when performing - a groupby on a partitioned column. For example, if a column has 64 partitions, - it will first be reduced to ``ceil(64 / 32) = 2`` partitions. - - This is useful when the absolute number of partitions is large. broadcast_join_limit The maximum number of partitions to allow for the smaller table in a broadcast join. For example, if the target partition size is 1GB and the broadcast join limit is 5, then the smaller table will be broadcasted - if it is smaller than 5GB (within the "rapidsmpf" runtime) or contains - fewer than 5 partitions (within the "tasks" runtime). The default depends - on the cluster and runtime. - shuffle_method - The method to use for shuffling data between workers. Defaults to - 'tasks' for the single-GPU cluster. + if it is smaller than 5GB. client_device_threshold - Threshold for spilling data from device memory in rapidsmpf. + Threshold for spilling data from device memory. Default is 50% of device memory on the client process. - This argument is only used by the "rapidsmpf" runtime. sink_to_directory Whether multi-partition sink operations write to a directory rather than a single file. For the spmd, ray, and dask clusters this is @@ -680,7 +592,7 @@ class StreamingExecutor: Options controlling dynamic shuffle planning. See :class:`~cudf_polars.utils.config.DynamicPlanningOptions` for more. max_io_threads - Maximum number of IO threads for the rapidsmpf runtime. Default is 4. + Maximum number of IO threads. Default is 4. This controls the parallelism of IO operations when reading data. spill_to_pinned_memory Whether RapidsMPF should spill to pinned host memory when available, @@ -688,8 +600,8 @@ class StreamingExecutor: bandwidth and lower latency for device to host transfers compared to regular pageable host memory. num_py_executors - Maximum number of workers for the Python ThreadPoolExecutor used by - the rapidsmpf runtime. Default is 8. + Maximum number of workers for the Python ThreadPoolExecutor. + Default is 8. Notes ----- @@ -700,13 +612,6 @@ class StreamingExecutor: _env_prefix = "CUDF_POLARS__EXECUTOR" name: Literal["streaming"] = dataclasses.field(default="streaming", init=False) - runtime: Runtime = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__RUNTIME", - Runtime.__call__, - default=Runtime.TASKS, - ) - ) cluster: Cluster | None = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__CLUSTER", @@ -726,33 +631,16 @@ class StreamingExecutor: f"{_env_prefix}__MAX_ROWS_PER_PARTITION", int, default=1_000_000 ) ) - unique_fraction: dict[str, float] = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__UNIQUE_FRACTION", json.loads, default={} - ) - ) target_partition_size: int = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__TARGET_PARTITION_SIZE", int, default=0 ) ) - groupby_n_ary: int = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__GROUPBY_N_ARY", int, default=32 - ) - ) broadcast_join_limit: int = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__BROADCAST_JOIN_LIMIT", int, default=0 ) ) - shuffle_method: ShuffleMethod = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__SHUFFLE_METHOD", - ShuffleMethod.__call__, - default=ShuffleMethod.TASKS, - ) - ) client_device_threshold: float = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__CLIENT_DEVICE_THRESHOLD", float, default=0.5 @@ -786,33 +674,10 @@ class StreamingExecutor: dask_context: DaskContext | None = None def __post_init__(self) -> None: # noqa: D105 - # Check for rapidsmpf runtime - if self.runtime == "rapidsmpf": # pragma: no cover; requires rapidsmpf runtime - if not rapidsmpf_single_available(): - raise ValueError("The rapidsmpf streaming engine requires rapidsmpf.") - object.__setattr__(self, "shuffle_method", "rapidsmpf") - if self.cluster is None: object.__setattr__(self, "cluster", Cluster.SINGLE) assert self.cluster is not None, "Expected cluster to be set." - # Handle shuffle_method defaults for streaming executor - if self.shuffle_method is None: - # Use task-based shuffle by default. - # TODO: Evaluate single-process shuffle by default. - object.__setattr__(self, "shuffle_method", "tasks") - elif self.shuffle_method == "rapidsmpf-single": - # The user should NOT specify "rapidsmpf-single" directly. - raise ValueError("rapidsmpf-single is not a supported shuffle method.") - elif self.shuffle_method == "rapidsmpf": - if self.cluster == "single" and not rapidsmpf_single_available(): - raise ValueError( - "rapidsmpf shuffle method requested, but rapidsmpf is not installed." - ) - # Select "rapidsmpf-single" for single-GPU - if self.cluster == "single": - object.__setattr__(self, "shuffle_method", "rapidsmpf-single") - # frozen dataclass, so use object.__setattr__ object.__setattr__( self, "fallback_mode", StreamingFallbackMode(self.fallback_mode) @@ -821,16 +686,15 @@ def __post_init__(self) -> None: # noqa: D105 object.__setattr__( self, "target_partition_size", - default_target_partition_size(self.cluster, self.runtime), + default_target_partition_size(), ) if self.broadcast_join_limit == 0: object.__setattr__( self, "broadcast_join_limit", - default_broadcast_join_limit(self.cluster, self.runtime), + default_broadcast_join_limit(), ) object.__setattr__(self, "cluster", Cluster(self.cluster)) - object.__setattr__(self, "shuffle_method", ShuffleMethod(self.shuffle_method)) # Handle dynamic_planning. # Can be None, dict, or DynamicPlanningOptions @@ -853,12 +717,8 @@ def __post_init__(self) -> None: # noqa: D105 # Type / value check everything else if not isinstance(self.max_rows_per_partition, int): raise TypeError("max_rows_per_partition must be an int") - if not isinstance(self.unique_fraction, dict): - raise TypeError("unique_fraction must be a dict of column name to float") if not isinstance(self.target_partition_size, int): raise TypeError("target_partition_size must be an int") - if not isinstance(self.groupby_n_ary, int): - raise TypeError("groupby_n_ary must be an int") if not isinstance(self.broadcast_join_limit, int): raise TypeError("broadcast_join_limit must be an int") if not isinstance(self.sink_to_directory, bool): @@ -873,10 +733,9 @@ def __post_init__(self) -> None: # noqa: D105 raise TypeError("num_py_executors must be an int") def __hash__(self) -> int: # noqa: D105 - # cardinality factory, a dict, isn't natively hashable. We'll dump it + # dynamic_planning factory, a dataclass, isn't natively hashable. We'll dump it # to json and hash that. d = dataclasses.asdict(self) - d["unique_fraction"] = json.dumps(d["unique_fraction"]) d["dynamic_planning"] = json.dumps(d["dynamic_planning"]) return hash(tuple(sorted(d.items()))) @@ -1059,19 +918,6 @@ def from_polars_engine( executor = InMemoryExecutor(**user_executor_options) case "streaming": user_executor_options = user_executor_options.copy() - # Handle the interaction between the default shuffle method, the - # cluster, and whether rapidsmpf is available. - env_shuffle_method = os.environ.get( - "CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", None - ) - if env_shuffle_method is not None: - shuffle_method_default = ShuffleMethod(env_shuffle_method) - else: - shuffle_method_default = None - - user_executor_options.setdefault( - "shuffle_method", shuffle_method_default - ) # Handle dynamic_planning: check user config, then env var user_dynamic_planning = user_executor_options.get( @@ -1097,7 +943,7 @@ def from_polars_engine( } # Handle "cuda-stream-policy". - # The default will depend on the runtime and executor. + # The default will depend on the executor. user_cuda_stream_policy = engine.config.get( "cuda_stream_policy", None ) or os.environ.get("CUDF_POLARS__CUDA_STREAM_POLICY", None) @@ -1105,24 +951,18 @@ def from_polars_engine( cuda_stream_policy: CUDAStreamPoolConfig | None if user_cuda_stream_policy is None: - if ( - executor.name == "streaming" and executor.runtime == Runtime.RAPIDSMPF - ): # pragma: no cover; requires rapidsmpf runtime - # the rapidsmpf runtime defaults to using a stream pool + if executor.name == "streaming": cuda_stream_policy = CUDAStreamPoolConfig() else: - # everything else defaults to the default stream cuda_stream_policy = None else: cuda_stream_policy = _convert_cuda_stream_policy(user_cuda_stream_policy) - # Pool policy is only supported by the rapidsmpf runtime. if isinstance(cuda_stream_policy, CUDAStreamPoolConfig) and ( - (executor.name != "streaming") - or (executor.name == "streaming" and executor.runtime != Runtime.RAPIDSMPF) + executor.name != "streaming" ): raise ValueError( - "A stream pool is only supported by the rapidsmpf runtime." + "A stream pool is only supported by the streaming executor." ) kwargs["cuda_stream_policy"] = cuda_stream_policy diff --git a/python/cudf_polars/cudf_polars/utils/cuda_stream.py b/python/cudf_polars/cudf_polars/utils/cuda_stream.py index c0708d3bea8..22022ee3401 100644 --- a/python/cudf_polars/cudf_polars/utils/cuda_stream.py +++ b/python/cudf_polars/cudf_polars/utils/cuda_stream.py @@ -17,11 +17,6 @@ from rmm.pylibrmm.stream import Stream -def get_dask_cuda_stream() -> Stream: - """Get the default CUDA stream for Dask.""" - return DEFAULT_STREAM - - def get_cuda_stream() -> Stream: """Get the default CUDA stream for the current thread.""" return DEFAULT_STREAM diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index d48793f0541..47633e42364 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "packaging", "polars>=1.30,<1.39", "pylibcudf==26.6.*,>=0.0.0a0", + "rapidsmpf==26.6.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -47,7 +48,6 @@ test = [ "pytest-cov", "pytest-httpserver", "pytest-xdist", - "rapidsmpf==26.6.*,>=0.0.0a0", "rich", "structlog", "zstandard", diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py index 7f00684638f..b3d83b36d36 100644 --- a/python/cudf_polars/tests/conftest.py +++ b/python/cudf_polars/tests/conftest.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import importlib.util from typing import TYPE_CHECKING import pytest @@ -54,13 +53,6 @@ def clear_memory_resource_cache(): @pytest.fixture(autouse=True) def _skip_unless_spmd(request: pytest.FixtureRequest) -> None: """Skip tests in SPMD multi-rank mode unless marked with ``pytest.mark.spmd``.""" - # Do not use `pytest.importorskip` here: this fixture is autouse, so an - # import-based skip would skip every test in the suite on environments - # without rapidsmpf (e.g. the coverage CI job), masking real coverage. - # We only want to gate the nranks>1 check on rapidsmpf being available. - if importlib.util.find_spec("rapidsmpf") is None: - return - from rapidsmpf.bootstrap import get_nranks, is_running_with_rrun if ( @@ -79,7 +71,6 @@ def streaming_engines() -> Generator[StreamingEngines, None, None]: name to a single shared engine instance, which is reused across the entire test session. """ - pytest.importorskip("rapidsmpf") from rapidsmpf import bootstrap from rapidsmpf.communicator.single import new_communicator as single_communicator from rapidsmpf.config import Options, get_environment_variables @@ -228,7 +219,8 @@ def engine_raise_on_fail() -> pl.GPUEngine: from ``.collect()``. Uses the in-memory executor so errors are not wrapped by a streaming task group. """ - return pl.GPUEngine(raise_on_fail=True) + # TODO: We should be testing will all supported engine variants + return pl.GPUEngine(executor="in-memory", raise_on_fail=True) def pytest_addoption(parser): @@ -240,14 +232,6 @@ def pytest_addoption(parser): help="Executor to use for GPUEngine.", ) - parser.addoption( - "--runtime", - action="store", - default="tasks", - choices=("tasks", "rapidsmpf"), - help="Runtime to use for the 'streaming' executor.", - ) - parser.addoption( "--cluster", action="store", @@ -278,17 +262,7 @@ def pytest_configure(config): # apply globally rather than per-module. config.addinivalue_line("filterwarnings", "ignore::ResourceWarning") - if config.getoption("--runtime") == "rapidsmpf": - if config.getoption("--executor") == "in-memory": - raise pytest.UsageError("Rapidsmpf runtime requires --executor='streaming'") - - if importlib.util.find_spec("rapidsmpf") is None: - raise pytest.UsageError( - "Rapidsmpf runtime requires the 'rapidsmpf' package" - ) - cudf_polars.testing.asserts.DEFAULT_EXECUTOR = config.getoption("--executor") - cudf_polars.testing.asserts.DEFAULT_RUNTIME = config.getoption("--runtime") cudf_polars.testing.asserts.DEFAULT_CLUSTER = config.getoption("--cluster") diff --git a/python/cudf_polars/tests/experimental/test_dask.py b/python/cudf_polars/tests/experimental/test_dask.py index 5ccdde864ef..93ef4318490 100644 --- a/python/cudf_polars/tests/experimental/test_dask.py +++ b/python/cudf_polars/tests/experimental/test_dask.py @@ -64,7 +64,6 @@ def test_yields_engine(engine: DaskEngine) -> None: def test_executor_options_forwarded(engine: DaskEngine) -> None: """Reserved executor_options keys are injected into the engine config.""" opts = engine.config["executor_options"] - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "dask" assert isinstance(opts["dask_context"], DaskContext) @@ -196,7 +195,6 @@ def test_reset_updates_executor_options(reset_engine: DaskEngine) -> None: opts = reset_engine.config["executor_options"] assert opts["max_rows_per_partition"] == 42 # Reserved keys are still injected by ``_reset``. - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "dask" assert isinstance(opts["dask_context"], DaskContext) diff --git a/python/cudf_polars/tests/experimental/test_explain.py b/python/cudf_polars/tests/experimental/test_explain.py index fecd4ba4d03..7f19e318778 100644 --- a/python/cudf_polars/tests/experimental/test_explain.py +++ b/python/cudf_polars/tests/experimental/test_explain.py @@ -540,8 +540,7 @@ def test_scan_properties(tmp_path: Path, predicate: pl.Expr | None): engine = pl.GPUEngine(executor="streaming", raise_on_fail=True) dag = serialize_query(q, engine) - # walk Union -> Scan - node = dag.nodes[dag.nodes[dag.roots[0]].children[0]] + node = dag.nodes[dag.roots[0]] assert node.type == "Scan" assert node.properties == expected_properties @@ -673,7 +672,6 @@ def test_dynamic_planning_adds_repartition(df, op): executor="streaming", raise_on_fail=True, executor_options={ - "runtime": "rapidsmpf", "dynamic_planning": {}, "max_rows_per_partition": 1_000_000, }, diff --git a/python/cudf_polars/tests/experimental/test_groupby.py b/python/cudf_polars/tests/experimental/test_groupby.py index 8d6ac5927e9..03d87fe23e9 100644 --- a/python/cudf_polars/tests/experimental/test_groupby.py +++ b/python/cudf_polars/tests/experimental/test_groupby.py @@ -270,10 +270,7 @@ def test_groupby_literal_key(df, streaming_engine): @pytest.mark.parametrize("keys", [("y",), ("y", "z")]) def test_groupby_agg_config_options(df, op, keys, streaming_engine_factory): streaming_engine = streaming_engine_factory( - StreamingOptions( - max_rows_per_partition=4, - unique_fraction={"z": 0.5}, - ), + StreamingOptions(max_rows_per_partition=4), ) agg = getattr(pl.col("x"), op)() if op in ("sum", "mean"): diff --git a/python/cudf_polars/tests/experimental/test_hstack.py b/python/cudf_polars/tests/experimental/test_hstack.py index 9bbb4b7aa33..0c21678f7e2 100644 --- a/python/cudf_polars/tests/experimental/test_hstack.py +++ b/python/cudf_polars/tests/experimental/test_hstack.py @@ -20,7 +20,6 @@ from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.testing.asserts import ( DEFAULT_CLUSTER, - DEFAULT_RUNTIME, assert_gpu_result_equal, ) from cudf_polars.utils.config import ConfigOptions @@ -34,7 +33,6 @@ def engine(): executor_options={ "max_rows_per_partition": 3, "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, }, ) diff --git a/python/cudf_polars/tests/experimental/test_options.py b/python/cudf_polars/tests/experimental/test_options.py index 291cbda7589..eb64cb97ed8 100644 --- a/python/cudf_polars/tests/experimental/test_options.py +++ b/python/cudf_polars/tests/experimental/test_options.py @@ -66,11 +66,6 @@ def test_executor_options_includes_set_fields() -> None: assert "log" not in result -def test_executor_options_unique_fraction() -> None: - result = StreamingOptions(unique_fraction={"col_a": 0.5}).to_executor_options() - assert result["unique_fraction"] == {"col_a": 0.5} - - def test_executor_options_num_py_executors() -> None: result = StreamingOptions(num_py_executors=4).to_executor_options() assert result["num_py_executors"] == 4 diff --git a/python/cudf_polars/tests/experimental/test_parallel.py b/python/cudf_polars/tests/experimental/test_parallel.py index 42365a113e2..67fc372e2e4 100644 --- a/python/cudf_polars/tests/experimental/test_parallel.py +++ b/python/cudf_polars/tests/experimental/test_parallel.py @@ -12,13 +12,9 @@ from polars.testing import assert_frame_equal from cudf_polars import Translator -from cudf_polars.dsl.expressions.base import Col, NamedExpr from cudf_polars.dsl.traversal import traversal -from cudf_polars.experimental.parallel import lower_ir_graph from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions -from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.testing.asserts import assert_gpu_result_equal -from cudf_polars.utils.config import ConfigOptions @pytest.mark.parametrize("column", ["a", "b"]) @@ -134,40 +130,3 @@ def test_pickle_conditional_join_args(): ir = Translator(q._ldf.visit(), GPUEngine()).translate_ir() for node in traversal([ir]): pickle.loads(pickle.dumps(node._non_child_args)) - - -def test_preserve_partitioning(streaming_engine_factory): - streaming_engine = streaming_engine_factory( - StreamingOptions( - max_rows_per_partition=2, - broadcast_join_limit=2, - unique_fraction={"a": 1.0}, - ), - ) - left = pl.LazyFrame({"a": [1, 2, 3, 4] * 5, "b": range(20)}) - right = pl.LazyFrame({"a": [3, 4, 5, 6, 7] * 4, "c": range(20)}) - q = ( - left.join(right, on="a") - .filter(pl.col("a") == 2) - .group_by(pl.col("a")) - .mean() - .select(pl.col("a"), pl.col("c")) - ) - _engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 2, - "broadcast_join_limit": 2, - "unique_fraction": {"a": 1.0}, - }, - ) - config_options = ConfigOptions.from_polars_engine(_engine) - ir = Translator(q._ldf.visit(), _engine).translate_ir() - ir, partition_info = lower_ir_graph( - ir, config_options, collect_statistics(ir, config_options) - ) - expect_dtype = ir.schema["a"] - expect_expr = (NamedExpr("a", Col(expect_dtype, "a")),) - assert partition_info[ir].partitioned_on == expect_expr - assert_gpu_result_equal(q, engine=streaming_engine) diff --git a/python/cudf_polars/tests/experimental/test_ray.py b/python/cudf_polars/tests/experimental/test_ray.py index ded4903c594..f62c3e3b831 100644 --- a/python/cudf_polars/tests/experimental/test_ray.py +++ b/python/cudf_polars/tests/experimental/test_ray.py @@ -53,7 +53,7 @@ def engine() -> Iterator[RayEngine]: def test_reserved_executor_keys() -> None: """executor_options rejects reserved keys.""" - for key in ("runtime", "cluster", "spmd_context", "ray_context"): + for key in ("cluster", "spmd_context", "ray_context"): with pytest.raises(TypeError, match="reserved"): RayEngine(executor_options={key: "anything"}) @@ -109,7 +109,6 @@ def test_executor_options_forwarded( ) -> None: """Reserved executor_options keys are injected into the engine config.""" opts = engine.config["executor_options"] - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "ray" assert isinstance(opts["ray_context"], RayContext) assert engine.rank_actors == opts["ray_context"].rank_actors @@ -258,7 +257,6 @@ def test_reset_updates_executor_options(reset_engine: RayEngine) -> None: opts = reset_engine.config["executor_options"] assert opts["max_rows_per_partition"] == 42 # Reserved keys are still injected by ``_reset``. - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "ray" assert isinstance(opts["ray_context"], RayContext) assert opts["ray_context"].rank_actors == reset_engine.rank_actors diff --git a/python/cudf_polars/tests/experimental/test_sort.py b/python/cudf_polars/tests/experimental/test_sort.py index 194686acf6b..f0abf5caade 100644 --- a/python/cudf_polars/tests/experimental/test_sort.py +++ b/python/cudf_polars/tests/experimental/test_sort.py @@ -9,7 +9,6 @@ from cudf_polars.testing.asserts import ( DEFAULT_CLUSTER, - DEFAULT_RUNTIME, assert_gpu_result_equal, ) @@ -22,7 +21,6 @@ def engine(): executor_options={ "max_rows_per_partition": 3, "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, "fallback_mode": "raise", }, ) @@ -36,7 +34,6 @@ def engine_large(): executor_options={ "max_rows_per_partition": 2_100, "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, "fallback_mode": "raise", }, ) @@ -139,7 +136,6 @@ def test_sort_after_sparse_join(): executor="streaming", executor_options={ "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, "max_rows_per_partition": 4, }, ) diff --git a/python/cudf_polars/tests/experimental/test_spmd.py b/python/cudf_polars/tests/experimental/test_spmd.py index 9fef0e00350..96ec5eab932 100644 --- a/python/cudf_polars/tests/experimental/test_spmd.py +++ b/python/cudf_polars/tests/experimental/test_spmd.py @@ -66,7 +66,7 @@ def test_single_communicator_outside_rrun() -> None: def test_reserved_keys() -> None: """executor_options rejects reserved keys.""" - for key in ("runtime", "cluster", "spmd_context"): + for key in ("cluster", "spmd_context"): with ( pytest.raises(TypeError, match="reserved"), SPMDEngine(executor_options={key: "anything"}), @@ -320,7 +320,6 @@ def test_reset_updates_executor_options(comm: Communicator) -> None: opts = engine.config["executor_options"] assert opts["max_rows_per_partition"] == 42 # Reserved keys are still injected by ``_reset``. - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "spmd" assert isinstance(opts["spmd_context"], SPMDContext) diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py index 49d2b580300..6bb30624cb6 100644 --- a/python/cudf_polars/tests/experimental/test_unique.py +++ b/python/cudf_polars/tests/experimental/test_unique.py @@ -34,12 +34,9 @@ def df(): @pytest.mark.parametrize("subset", [None, ("y",), ("y", "z")]) @pytest.mark.parametrize("keep", ["first", "last", "any", "none"]) @pytest.mark.parametrize("maintain_order", [True, False]) -@pytest.mark.parametrize("cardinality", [{}, {"y": 0.7}]) -def test_unique( - df, streaming_engine_factory, keep, subset, maintain_order, cardinality -): +def test_unique(df, streaming_engine_factory, keep, subset, maintain_order): engine = streaming_engine_factory( - StreamingOptions(unique_fraction=cardinality, fallback_mode="warn"), + StreamingOptions(fallback_mode="warn"), ) q = df.unique(subset=subset, keep=keep, maintain_order=maintain_order) check_row_order = maintain_order @@ -50,40 +47,16 @@ def test_unique( assert_gpu_result_equal(q, engine=engine, check_row_order=check_row_order) -def test_unique_fallback(df, streaming_engine_factory): - engine = streaming_engine_factory( - StreamingOptions( - unique_fraction={"y": 1.0}, - fallback_mode="raise", - dynamic_planning=None, - ), - ) - q = df.unique(keep="first", maintain_order=True) - with pytest.raises( - NotImplementedError, - match="Unsupported unique options", - ): - assert_gpu_result_equal(q, engine=engine) - - @pytest.mark.parametrize("maintain_order", [True, False]) -@pytest.mark.parametrize("cardinality", [{}, {"y": 0.5}]) -def test_unique_select(df, streaming_engine_factory, maintain_order, cardinality): +def test_unique_select(df, streaming_engine_factory, maintain_order): engine = streaming_engine_factory( StreamingOptions( max_rows_per_partition=4, - unique_fraction=cardinality, fallback_mode="warn", ), ) q = df.select(pl.col("y").unique(maintain_order=maintain_order)) - if cardinality == {"y": 0.5} and maintain_order: - with pytest.warns( - UserWarning, match="Unsupported unique options for multiple partitions." - ): - assert_gpu_result_equal(q, engine=engine, check_row_order=False) - else: - assert_gpu_result_equal(q, engine=engine, check_row_order=False) + assert_gpu_result_equal(q, engine=engine, check_row_order=False) @pytest.mark.parametrize("keep", ["first", "last", "any"]) diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py index 3cd66bc527d..6004c5eef40 100644 --- a/python/cudf_polars/tests/test_config.py +++ b/python/cudf_polars/tests/test_config.py @@ -4,7 +4,7 @@ from __future__ import annotations import sys -from typing import Any, cast +from typing import cast import pytest @@ -35,20 +35,7 @@ StreamingExecutor, _default_cuda_stream_policy, ) -from cudf_polars.utils.cuda_stream import ( - get_cuda_stream, - get_dask_cuda_stream, -) - - -@pytest.fixture(params=[False, True], ids=["norapidsmpf.single", "rapidsmpf.single"]) -def rapidsmpf_single_available(request, monkeypatch): - monkeypatch.setattr( - cudf_polars.utils.config, - "rapidsmpf_single_available", - lambda: request.param, - ) - return request.param +from cudf_polars.utils.cuda_stream import get_cuda_stream def test_polars_verbose_warns(monkeypatch): @@ -232,47 +219,6 @@ def test_parquet_options_from_none() -> None: assert config.parquet_options.chunked is True -def test_validate_streaming_executor_shuffle_method( - *, rapidsmpf_single_available: bool -) -> None: - config = ConfigOptions.from_polars_engine( - pl.GPUEngine( - executor="streaming", - executor_options={"shuffle_method": "tasks"}, - ) - ) - assert config.executor.name == "streaming" - assert config.executor.shuffle_method == "tasks" - - # rapidsmpf with single cluster - engine = pl.GPUEngine( - executor="streaming", - executor_options={"shuffle_method": "rapidsmpf", "cluster": "single"}, - ) - - if rapidsmpf_single_available: - config = ConfigOptions.from_polars_engine(engine) - assert config.executor.name == "streaming" - assert config.executor.shuffle_method == "rapidsmpf-single" - else: - with pytest.raises(ValueError, match="rapidsmpf is not installed"): - ConfigOptions.from_polars_engine(engine) - - -def test_join_rapidsmpf_single_private_config() -> None: - # The user may not specify "rapidsmpf-single" directly - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "shuffle_method": "rapidsmpf-single", - "runtime": "tasks", - }, - ) - with pytest.raises(ValueError, match="not a supported shuffle method"): - ConfigOptions.from_polars_engine(engine) - - @pytest.mark.parametrize("executor", ["in-memory", "streaming"]) def test_hashable(executor: str) -> None: config = ConfigOptions.from_polars_engine( @@ -319,31 +265,11 @@ def test_validate_cluster() -> None: ) -def test_validate_shuffle_method_defaults() -> None: - config = ConfigOptions.from_polars_engine( - pl.GPUEngine( - executor="streaming", - ) - ) - assert config.executor.name == "streaming" - assert config.executor.shuffle_method == "tasks" # Default for single cluster - - with pytest.raises(ValueError, match="'foo' is not a valid ShuffleMethod"): - ConfigOptions.from_polars_engine( - pl.GPUEngine( - executor="streaming", - executor_options={"shuffle_method": "foo"}, - ) - ) - - @pytest.mark.parametrize( "option", [ "max_rows_per_partition", - "unique_fraction", "target_partition_size", - "groupby_n_ary", "broadcast_join_limit", "sink_to_directory", "client_device_threshold", @@ -409,11 +335,8 @@ def test_config_option_from_env(monkeypatch: pytest.MonkeyPatch) -> None: m.setenv("CUDF_POLARS__EXECUTOR__CLUSTER", "single") m.setenv("CUDF_POLARS__EXECUTOR__FALLBACK_MODE", "silent") m.setenv("CUDF_POLARS__EXECUTOR__MAX_ROWS_PER_PARTITION", "42") - m.setenv("CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION", '{"a": 0.5}') m.setenv("CUDF_POLARS__EXECUTOR__TARGET_PARTITION_SIZE", "100") - m.setenv("CUDF_POLARS__EXECUTOR__GROUPBY_N_ARY", "43") m.setenv("CUDF_POLARS__EXECUTOR__BROADCAST_JOIN_LIMIT", "44") - m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "tasks") m.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default") engine = pl.GPUEngine() @@ -422,11 +345,8 @@ def test_config_option_from_env(monkeypatch: pytest.MonkeyPatch) -> None: assert config.executor.cluster == "single" assert config.executor.fallback_mode == "silent" assert config.executor.max_rows_per_partition == 42 - assert config.executor.unique_fraction == {"a": 0.5} assert config.executor.target_partition_size == 100 - assert config.executor.groupby_n_ary == 43 assert config.executor.broadcast_join_limit == 44 - assert config.executor.shuffle_method == "tasks" assert config.cuda_stream_policy is None @@ -498,12 +418,6 @@ def test_default_executor() -> None: assert config.executor.name == "streaming" -def test_default_runtime() -> None: - config = ConfigOptions.from_polars_engine(pl.GPUEngine()) - assert config.executor.name == "streaming" - assert config.executor.runtime == "tasks" - - @pytest.mark.parametrize( "memory_resource, memory_resource_config", [ @@ -537,10 +451,7 @@ def test_memory_resource(memory_resource, memory_resource_config) -> None: if memory_resource is None and memory_resource_config is None: # The default case: We make a new RMM MR, whose type depends on the GPU's features. - if _is_concurrent_managed_access_supported(): - assert isinstance(result, rmm.mr.PrefetchResourceAdaptor) - else: - assert isinstance(result, rmm.mr.CudaAsyncMemoryResource) + assert isinstance(result, rmm.mr.CudaAsyncMemoryResource) elif memory_resource is None: # Configured through memory_resource_config @@ -608,21 +519,17 @@ def test_cuda_stream_pool(): def test_cuda_stream_policy_default(monkeypatch: pytest.MonkeyPatch) -> None: # Default from engine config = ConfigOptions.from_polars_engine(pl.GPUEngine()) - assert config.cuda_stream_policy is None + assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "tasks"}) - ) - assert config.cuda_stream_policy is None + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) + assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) # Default from env monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default") config = ConfigOptions.from_polars_engine(pl.GPUEngine()) assert config.cuda_stream_policy is None - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "tasks"}) - ) + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) assert config.cuda_stream_policy is None @@ -635,26 +542,19 @@ def test_default_cuda_stream_policy(monkeypatch: pytest.MonkeyPatch) -> None: assert isinstance(result, CUDAStreamPoolConfig) -def test_cuda_stream_policy_from_config(*, rapidsmpf_single_available: bool) -> None: +def test_cuda_stream_policy_from_config() -> None: engine = pl.GPUEngine( executor="streaming", - executor_options={"runtime": "rapidsmpf"}, cuda_stream_policy={ "pool_size": 32, "flags": rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING, }, ) - if rapidsmpf_single_available: - config = ConfigOptions.from_polars_engine(engine) - assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) - assert config.cuda_stream_policy.pool_size == 32 - assert ( - config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING - ) - config.cuda_stream_policy.build().get_stream() # no exception - else: - with pytest.raises(ValueError, match="The rapidsmpf streaming engine"): - ConfigOptions.from_polars_engine(engine) + config = ConfigOptions.from_polars_engine(engine) + assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) + assert config.cuda_stream_policy.pool_size == 32 + assert config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING + config.cuda_stream_policy.build().get_stream() # no exception @pytest.mark.parametrize( @@ -667,26 +567,19 @@ def test_cuda_stream_policy_from_config(*, rapidsmpf_single_available: bool) -> '{"pool_size": 32}', ], ) -def test_cuda_stream_policy_from_env( - monkeypatch: pytest.MonkeyPatch, env: str, *, rapidsmpf_single_available: bool -) -> None: +def test_cuda_stream_policy_from_env(monkeypatch: pytest.MonkeyPatch, env: str) -> None: monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", env) - runtime = "tasks" if env == "default" else "rapidsmpf" - engine = pl.GPUEngine(executor="streaming", executor_options={"runtime": runtime}) - if runtime == "rapidsmpf" and rapidsmpf_single_available: - config = ConfigOptions.from_polars_engine(engine) + engine = pl.GPUEngine(executor="streaming") + config = ConfigOptions.from_polars_engine(engine) + if env == "default": + assert config.cuda_stream_policy is None + else: assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) if env == "pool": assert config.cuda_stream_policy.pool_size == 16 assert config.cuda_stream_policy.flags == CudaStreamFlags.NON_BLOCKING else: assert config.cuda_stream_policy.pool_size == 32 - elif runtime == "rapidsmpf": - with pytest.raises(ValueError, match="The rapidsmpf streaming engine"): - ConfigOptions.from_polars_engine(engine) - else: - config = ConfigOptions.from_polars_engine(engine) - assert config.cuda_stream_policy is None def test_cuda_stream_policy_from_env_invalid(monkeypatch: pytest.MonkeyPatch): @@ -696,41 +589,26 @@ def test_cuda_stream_policy_from_env_invalid(monkeypatch: pytest.MonkeyPatch): def test_cuda_stream_policy_default_rapidsmpf(monkeypatch: pytest.MonkeyPatch) -> None: - pytest.importorskip("rapidsmpf") - # Default from engine - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "rapidsmpf"}) - ) + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) assert config.cuda_stream_policy.pool_size == 16 assert config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING # "default" user argument overrides pool default monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default") - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "rapidsmpf"}) - ) + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) assert config.cuda_stream_policy is None -@pytest.mark.parametrize( - "polars_kwargs", - [ - {"executor": "in-memory"}, - {"executor": "streaming", "executor_options": {"runtime": "tasks"}}, - ], -) -def test_cuda_stream_policy_pool_only_supported_by_rapidsmpf( - polars_kwargs: dict[str, Any], -) -> None: +def test_cuda_stream_policy_pool_in_memory_unsupported() -> None: with pytest.raises( ValueError, - match="A stream pool is only supported by the rapidsmpf runtime.", + match="A stream pool is only supported by the streaming executor.", ): ConfigOptions.from_polars_engine( pl.GPUEngine( - **polars_kwargs, + executor="in-memory", cuda_stream_policy={"pool_size": 32, "flags": "NON_BLOCKING"}, ) ) @@ -903,8 +781,3 @@ def test_dask_sink_to_directory_false_raises() -> None: ValueError, match="The dask cluster requires sink_to_directory=True" ): StreamingExecutor(cluster=Cluster.DASK, sink_to_directory=False) - - -def test_get_dask_cuda_stream() -> None: - stream = get_dask_cuda_stream() - assert stream is not None diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py index e3e788f2866..a655efbe422 100644 --- a/python/cudf_polars/tests/test_scan.py +++ b/python/cudf_polars/tests/test_scan.py @@ -718,7 +718,7 @@ def test_scan_parquet_zero_width_with_limit( ): request.applymarker( pytest.mark.xfail( - is_streaming_engine(engine) and custom_engine is None, + is_streaming_engine(engine) or custom_engine is not None, reason="https://github.com/rapidsai/cudf/issues/21644", ) ) diff --git a/python/cudf_polars/tests/test_sink.py b/python/cudf_polars/tests/test_sink.py index 7b69f6904b4..d23559d2134 100644 --- a/python/cudf_polars/tests/test_sink.py +++ b/python/cudf_polars/tests/test_sink.py @@ -157,6 +157,7 @@ def test_chunked_sink_empty_table_to_parquet(tmp_path): pl.LazyFrame(), tmp_path / "out.parquet", engine=pl.GPUEngine( + executor="in-memory", raise_on_fail=True, parquet_options={"chunked": True, "n_output_chunks": 2}, ), diff --git a/python/cudf_polars/tests/test_tracing.py b/python/cudf_polars/tests/test_tracing.py index 184c0a77d38..283ca361682 100644 --- a/python/cudf_polars/tests/test_tracing.py +++ b/python/cudf_polars/tests/test_tracing.py @@ -55,9 +55,10 @@ def test_trace_basic( assert b"frames_input" in result assert b"total_bytes_output" in result assert b"total_bytes_input" in result - assert b"rmm_total_bytes_output" in result - assert b"rmm_total_bytes_input" in result - assert b"rmm_current_bytes_output" in result + # TODO: With rapidsmpf are the rmm fields not supposed to be logged? + assert b"rmm_total_bytes_output" not in result + assert b"rmm_total_bytes_input" not in result + assert b"rmm_current_bytes_output" not in result assert b"overhead_duration" in result @@ -79,10 +80,6 @@ def test_import_without_structlog() -> None: subprocess.check_call([sys.executable, "-c", code]) -@pytest.mark.skipif( - cudf_polars.testing.asserts.DEFAULT_RUNTIME != "rapidsmpf", - reason="Requires 'rapidsmpf' runtime.", -) def test_log_query_plan() -> None: """Test that log_query_plan emits a Query Plan event.""" import os @@ -98,7 +95,6 @@ def test_log_query_plan() -> None: executor="streaming", executor_options={ "cluster": "single", - "runtime": "rapidsmpf", "max_rows_per_partition": 5, }, memory_resource=rmm.mr.ManagedMemoryResource(), @@ -126,7 +122,6 @@ def test_log_query_plan() -> None: reason="Requires CUDF_POLARS_LOG_TRACES=1.", ) def test_sets_cudf_polars_query_id(): - pytest.importorskip("rapidsmpf") left = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) right = pl.LazyFrame({"a": [1, 2, 3], "c": [7, 8, 9]}) @@ -136,7 +131,6 @@ def test_sets_cudf_polars_query_id(): engine = pl.GPUEngine( executor="streaming", raise_on_fail=True, - executor_options={"runtime": "rapidsmpf"}, ) with structlog.testing.capture_logs( diff --git a/python/cudf_polars/tests/testing/test_engine_utils.py b/python/cudf_polars/tests/testing/test_engine_utils.py index faf113502d6..346a11acf2e 100644 --- a/python/cudf_polars/tests/testing/test_engine_utils.py +++ b/python/cudf_polars/tests/testing/test_engine_utils.py @@ -3,8 +3,6 @@ from __future__ import annotations -import pytest - from cudf_polars.testing.engine_utils import ( EngineFixtureParam, create_streaming_options, @@ -30,7 +28,6 @@ def test_engine_fixture_param_small_blocksize(): def test_create_streaming_options_medium(): - pytest.importorskip("rapidsmpf") opts = create_streaming_options("medium") assert opts.max_rows_per_partition == 50 assert opts.target_partition_size == 1_000_000 @@ -38,7 +35,6 @@ def test_create_streaming_options_medium(): def test_create_streaming_options_small(): - pytest.importorskip("rapidsmpf") opts = create_streaming_options("small") assert opts.max_rows_per_partition == 4 assert opts.target_partition_size == 10 @@ -46,7 +42,6 @@ def test_create_streaming_options_small(): def test_create_streaming_options_overrides_merge(): """Overrides take precedence over the blocksize baseline.""" - pytest.importorskip("rapidsmpf") from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions overrides = StreamingOptions(max_rows_per_partition=999) From 65df1061882b16b1c5e4696fe2dedda432be4ca8 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 6 May 2026 20:00:13 -0700 Subject: [PATCH 09/12] Use thread pool to submit hybrid scan host IO tasks (#21992) This PR uses the host worker pool to submit hybrid scan's host-read IO tasks so that the mutex can be safely released after submission. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Tianyu Liu (https://github.com/kingcrimsontianyu) - Shruti Shivakumar (https://github.com/shrshi) URL: https://github.com/rapidsai/cudf/pull/21992 --- cpp/src/io/parquet/io_utils/parquet_io_utils.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp b/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp index 3e67b49d03e..9b6953b4bd1 100644 --- a/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp +++ b/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp @@ -6,6 +6,7 @@ #include "io/comp/common.hpp" #include "io/parquet/parquet_common.hpp" +#include #include #include #include @@ -16,8 +17,8 @@ #include #include +#include #include -#include #include @@ -118,8 +119,7 @@ fetch_byte_ranges_to_device_async( stream.synchronize(); { - auto iter = - thrust::make_zip_iterator(io_offsets.begin(), io_sizes.begin(), destinations.begin()); + auto iter = cuda::make_zip_iterator(io_offsets.begin(), io_sizes.begin(), destinations.begin()); std::lock_guard lock(mutex); @@ -128,16 +128,14 @@ fetch_byte_ranges_to_device_async( auto const io_size = cuda::std::get<1>(tuple); auto const dest = cuda::std::get<2>(tuple); - // Directly read the column chunk data to the device - // buffer if supported + // Directly read the column chunk data to the device buffer if supported if (datasource.supports_device_read() and datasource.is_device_read_preferred(io_size)) { device_read_tasks.emplace_back( datasource.device_read_async(io_offset, io_size, dest, stream)); } else { - // Read the column chunk data to the host buffer and - // copy it to the device buffer - host_read_tasks.emplace_back( - std::async(std::launch::deferred, [&datasource, io_offset, io_size, dest, stream]() { + // Read the column chunk data to the host buffer copy it to the device buffer + host_read_tasks.emplace_back(cudf::detail::host_worker_pool().submit_task( + [&datasource, io_offset, io_size, dest, stream]() { auto host_buffer = datasource.host_read(io_offset, io_size); cudf::detail::cuda_memcpy_async( cudf::device_span{dest, io_size}, From 8d76fc287ea0474f4f66a60f86368aa604a9b4bd Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 6 May 2026 23:03:11 -0700 Subject: [PATCH 10/12] Python bindings and pytests for `cudf::apply_deletion_mask` (#22145) Follow up #22144 Adds Python bindings for the `cudf::apply_deletion_mask` API and adds pytests for stream compaction. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Bradley Dice (https://github.com/bdice) - Matthew Murray (https://github.com/Matt711) URL: https://github.com/rapidsai/cudf/pull/22145 --- .../libcudf/lists/stream_compaction.pxd | 7 ++ .../pylibcudf/libcudf/stream_compaction.pxd | 7 ++ python/pylibcudf/pylibcudf/lists.pxd | 7 ++ python/pylibcudf/pylibcudf/lists.pyi | 6 ++ python/pylibcudf/pylibcudf/lists.pyx | 42 +++++++++++ .../pylibcudf/pylibcudf/stream_compaction.pxd | 7 ++ .../pylibcudf/pylibcudf/stream_compaction.pyi | 6 ++ .../pylibcudf/pylibcudf/stream_compaction.pyx | 36 ++++++++++ .../pylibcudf/tests/test_stream_compaction.py | 69 +++++++++++++++++++ 9 files changed, 187 insertions(+) create mode 100644 python/pylibcudf/tests/test_stream_compaction.py diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd index 0187642e0c7..7514f9d159a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd @@ -19,6 +19,13 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \ device_async_resource_ref mr ) except +libcudf_exception_handler + cdef unique_ptr[column] apply_deletion_mask( + const lists_column_view& lists_column, + const lists_column_view& deletion_mask, + cudaStream_t stream, + device_async_resource_ref mr + ) except +libcudf_exception_handler + cdef unique_ptr[column] distinct( const lists_column_view& lists_column, null_equality nulls_equal, diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd index 9f8686da472..9b5f6d287f3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd @@ -48,6 +48,13 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: device_async_resource_ref mr ) except +libcudf_exception_handler + cdef unique_ptr[table] apply_deletion_mask( + table_view source_table, + column_view deletion_mask, + cudaStream_t stream, + device_async_resource_ref mr + ) except +libcudf_exception_handler + cdef unique_ptr[table] unique( table_view input, vector[size_type] keys, diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd index 88b09c01531..75db812de14 100644 --- a/python/pylibcudf/pylibcudf/lists.pxd +++ b/python/pylibcudf/pylibcudf/lists.pxd @@ -150,6 +150,13 @@ cpdef Column apply_boolean_mask( DeviceMemoryResource mr=*, ) +cpdef Column apply_deletion_mask( + Column, + Column, + object stream=*, + DeviceMemoryResource mr=*, +) + cpdef Column distinct( Column, null_equality, diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi index 1e418b59726..6ff27345854 100644 --- a/python/pylibcudf/pylibcudf/lists.pyi +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -131,6 +131,12 @@ def apply_boolean_mask( stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... +def apply_deletion_mask( + input: Column, + deletion_mask: Column, + stream: CudaStreamLike | None = None, + mr: DeviceMemoryResource | None = None, +) -> Column: ... def distinct( input: Column, nulls_equal: NullEquality, diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index fd05242e44f..fbc07eebb8a 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -32,6 +32,7 @@ from pylibcudf.libcudf.lists.sorting cimport ( ) from pylibcudf.libcudf.lists.stream_compaction cimport ( apply_boolean_mask as cpp_apply_boolean_mask, + apply_deletion_mask as cpp_apply_deletion_mask, distinct as cpp_distinct, ) from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option @@ -61,6 +62,7 @@ __all__ = [ "ConcatenateNullPolicy", "DuplicateFindOption", "apply_boolean_mask", + "apply_deletion_mask", "concatenate_list_elements", "concatenate_rows", "contains", @@ -833,6 +835,46 @@ cpdef Column apply_boolean_mask( return Column.from_libcudf(move(c_result), _stream, mr) +cpdef Column apply_deletion_mask( + Column input, + Column deletion_mask, + object stream=None, + DeviceMemoryResource mr=None, +): + """Filters elements in each row of the input lists column using a deletion mask. + + For details, see :cpp:func:`apply_deletion_mask`. + + Parameters + ---------- + input : Column + The input lists column. + deletion_mask : Column + A lists-of-bools column used as a deletion mask. + + Returns + ------- + Column + Lists column with elements removed where deletion_mask is true. + """ + cdef unique_ptr[column] c_result + cdef ListsColumnView list_view = input.list_view() + cdef ListsColumnView mask_view = deletion_mask.list_view() + + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + mr = _get_memory_resource(mr) + + with nogil: + c_result = cpp_apply_deletion_mask( + list_view.view(), + mask_view.view(), + _cs, + mr.get_mr(), + ) + return Column.from_libcudf(move(c_result), _stream, mr) + + cpdef Column distinct( Column input, null_equality nulls_equal, diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd index 6e904e11ce1..ffe36cebfbd 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd @@ -37,6 +37,13 @@ cpdef Table apply_boolean_mask( DeviceMemoryResource mr = *, ) +cpdef Table apply_deletion_mask( + Table source_table, + Column deletion_mask, + object stream = *, + DeviceMemoryResource mr = *, +) + cpdef Table unique( Table input, list keys, diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi index afdd692dde2..76e669f8995 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyi +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi @@ -37,6 +37,12 @@ def apply_boolean_mask( stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... +def apply_deletion_mask( + source_table: Table, + deletion_mask: Column, + stream: CudaStreamLike | None = None, + mr: DeviceMemoryResource | None = None, +) -> Table: ... def unique( input: Table, keys: list[int], diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx index b4751078acb..2fe8705ea52 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyx +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx @@ -29,6 +29,7 @@ from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "DuplicateKeepOption", "apply_boolean_mask", + "apply_deletion_mask", "distinct", "distinct_indices", "drop_nans", @@ -151,6 +152,41 @@ cpdef Table apply_boolean_mask( return Table.from_libcudf(move(c_result), _stream, mr) +cpdef Table apply_deletion_mask( + Table source_table, + Column deletion_mask, + object stream=None, + DeviceMemoryResource mr=None, +): + """Filters out rows from the input table using a deletion mask. + + For details, see :cpp:func:`apply_deletion_mask`. + + Parameters + ---------- + source_table : Table + The input table to filter. + deletion_mask : Column + A boolean column used as a deletion mask. + + Returns + ------- + Table + Table with rows removed where deletion_mask is true. + """ + cdef unique_ptr[table] c_result + + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + mr = _get_memory_resource(mr) + + with nogil: + c_result = cpp_stream_compaction.apply_deletion_mask( + source_table.view(), deletion_mask.view(), _cs, mr.get_mr() + ) + return Table.from_libcudf(move(c_result), _stream, mr) + + cpdef Table unique( Table input, list keys, diff --git a/python/pylibcudf/tests/test_stream_compaction.py b/python/pylibcudf/tests/test_stream_compaction.py new file mode 100644 index 00000000000..ccf21c2a6b3 --- /dev/null +++ b/python/pylibcudf/tests/test_stream_compaction.py @@ -0,0 +1,69 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +import pyarrow as pa +import pytest +from utils import assert_column_eq, assert_table_eq + +import pylibcudf as plc + + +@pytest.fixture +def lists_column_and_mask(): + pa_input = pa.array( + [[0, 1, 2, 3], [4, 5], [6, 7, 8, 9]], type=pa.list_(pa.int32()) + ) + pa_mask = pa.array( + [ + [True, False, True, False], + [True, False], + [True, False, True, False], + ], + type=pa.list_(pa.bool_()), + ) + return pa_input, pa_mask + + +def test_lists_apply_boolean_mask(lists_column_and_mask): + pa_input, pa_mask = lists_column_and_mask + result = plc.lists.apply_boolean_mask( + plc.Column.from_arrow(pa_input), plc.Column.from_arrow(pa_mask) + ) + expected = pa.array([[0, 2], [4], [6, 8]], type=pa.list_(pa.int32())) + assert_column_eq(expected, result) + + +def test_lists_apply_deletion_mask(lists_column_and_mask): + pa_input, pa_mask = lists_column_and_mask + result = plc.lists.apply_deletion_mask( + plc.Column.from_arrow(pa_input), plc.Column.from_arrow(pa_mask) + ) + expected = pa.array([[1, 3], [5], [7, 9]], type=pa.list_(pa.int32())) + assert_column_eq(expected, result) + + +def test_apply_boolean_mask(): + pa_table = pa.table( + { + "a": pa.array([10, 40, 70, 5, 2, 10], type=pa.int32()), + "b": pa.array([10, 40, 70, 5, 2, 10], type=pa.float64()), + } + ) + pa_mask = pa.array( + [True, False, True, False, True, False], type=pa.bool_() + ) + result = plc.stream_compaction.apply_boolean_mask( + plc.Table.from_arrow(pa_table), plc.Column.from_arrow(pa_mask) + ) + expected = pa_table.filter(pa_mask) + assert_table_eq(expected, result) + + +def test_apply_deletion_mask(): + pa_table = pa.table({"a": pa.array([1, 2, 3, 4, 5], type=pa.int32())}) + pa_mask = pa.array([True, False, True, False, True], type=pa.bool_()) + result = plc.stream_compaction.apply_deletion_mask( + plc.Table.from_arrow(pa_table), plc.Column.from_arrow(pa_mask) + ) + expected = pa.table({"a": pa.array([2, 4], type=pa.int32())}) + assert_table_eq(expected, result) From c9ad1c58ab2626b788d7fc9a03e027e89cf210be Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Thu, 7 May 2026 08:52:41 -0500 Subject: [PATCH 11/12] Refactor ``sort_actor`` to prepare for ``OrderScheme`` changes (#22350) - Follow up to https://github.com/rapidsai/cudf/pull/22315 - Further revises `sort_actor` in preparation for https://github.com/rapidsai/rapidsmpf/pull/853 - Part of https://github.com/rapidsai/cudf/issues/22128 - Breaks apart `sort_actor` logic into modular steps, so we can avoid collecting boundaries when we already know the boundaries (future work). Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Matthew Murray (https://github.com/Matt711) - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/22350 --- .../rapidsmpf/collectives/sort.py | 129 +++++++++++++----- 1 file changed, 94 insertions(+), 35 deletions(-) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py index ffc10ea44c2..a950df3ce34 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py @@ -337,16 +337,25 @@ async def _receive_and_buffer_chunks( return local_candidates_list +async def _forward_from_chunk_store( + context: Context, ch_out: Channel[TableChunk], chunk_store: ChunkStore +) -> None: + """Forward buffered messages from a ChunkStore into a channel.""" + for msg in chunk_store: + await ch_out.send(context, msg) + await ch_out.drain(context) + + async def _insert_chunks_into_shuffle( context: Context, comm: Communicator, + ir: Sort, + ir_context: IRExecutionContext, + ch_in: Channel[TableChunk], num_partitions: int, collective_ids: list[int], metadata_in: ChannelMetadata, - chunk_store: ChunkStore, sort_boundaries_df: DataFrame, - ir: Sort, - ir_context: IRExecutionContext, by: list[str], ) -> tuple[ShuffleManager, Sort]: """Create shuffle manager and insert each buffered chunk with sort-based splits.""" @@ -364,7 +373,7 @@ async def _insert_chunks_into_shuffle( partition_assignment=PartitionAssignment.CONTIGUOUS, ) async with shuffle.inserting() as inserter: - for msg in chunk_store: + while (msg := await ch_in.recv(context)) is not None: if skip_insert: continue seq_num = msg.sequence_number @@ -379,6 +388,8 @@ async def _insert_chunks_into_shuffle( upstreams=(available_chunk.stream, sort_boundaries_df.stream), ) + # TODO: Pre-sort chunks if they do not originate from the ChunkStore. + # (Not possible until we use _global_sort outside of sort_actor.) splits = find_sort_splits( sort_cols_tbl, sort_boundaries_df.table, @@ -453,6 +464,52 @@ async def _extract_partitions_and_send( await ch_out.drain(context) +async def _global_sort( + context: Context, + comm: Communicator, + ir: Sort, + ir_context: IRExecutionContext, + ch_out: Channel[TableChunk], + ch_in: Channel[TableChunk], + metadata_in: ChannelMetadata, + by: list[str], + num_partitions: int, + sort_boundaries_df: DataFrame, + collective_ids: list[int], + *, + tracer: ActorTracer | None, +) -> None: + """Global sort.""" + # TODO: Attach OrderScheme metadata here. + output_metadata = ChannelMetadata( + local_count=max(1, num_partitions // comm.nranks), + partitioning=Partitioning(inter_rank=None, local="inherit"), + ) + await send_metadata(ch_out, context, output_metadata) + + shuffle, post_sort_ir = await _insert_chunks_into_shuffle( + context, + comm, + ir, + ir_context, + ch_in, + num_partitions, + collective_ids, + metadata_in, + sort_boundaries_df, + by, + ) + await _extract_partitions_and_send( + context, + ch_out, + shuffle, + post_sort_ir, + ir_context, + ir.schema, + tracer=tracer, + ) + + @define_actor() async def sort_actor( context: Context, @@ -467,10 +524,18 @@ async def sort_actor( collective_ids: list[int], ) -> None: """Streaming sort actor.""" - ch_replay = context.create_channel() + ch_sample_replay = context.create_channel() + ch_chunk_store = context.create_channel() async with shutdown_on_error( - context, ch_in, ch_out, ch_replay, trace_ir=ir, ir_context=ir_context + context, + ch_in, + ch_out, + ch_sample_replay, + ch_chunk_store, + trace_ir=ir, + ir_context=ir_context, ) as tracer: + # TODO: Skip sort if OrderScheme metadata is present and compatible. metadata_in = await recv_metadata(ch_in, context) if ir.zlice is not None: @@ -494,20 +559,19 @@ async def sort_actor( context, comm, ch_in, num_partitions, metadata_in, executor, collective_ids ) - output_metadata = ChannelMetadata( - local_count=max(1, num_partitions // comm.nranks), - partitioning=Partitioning(inter_rank=None, local="inherit"), - ) - await send_metadata(ch_out, context, output_metadata) - chunk_store = ChunkStore(context) _, local_candidates_list = await gather_in_task_group( replay_buffered_channel( - context, ch_replay, ch_in, sampled_chunks, metadata_in, trace_ir=ir + context, + ch_sample_replay, + ch_in, + sampled_chunks, + metadata_in, + trace_ir=ir, ), _receive_and_buffer_chunks( context, - ch_replay, + ch_sample_replay, chunk_store, ir, by, @@ -529,27 +593,22 @@ async def sort_actor( collective_ids.pop() if need_allgather else None, ) - shuffle, post_sort_ir = await _insert_chunks_into_shuffle( - context, - comm, - num_partitions, - collective_ids, - metadata_in, - chunk_store, - sort_boundaries_df, - ir, - ir_context, - by, - ) - - await _extract_partitions_and_send( - context, - ch_out, - shuffle, - post_sort_ir, - ir_context, - ir.schema, - tracer=tracer, + await gather_in_task_group( + _forward_from_chunk_store(context, ch_chunk_store, chunk_store), + _global_sort( + context, + comm, + ir, + ir_context, + ch_out, + ch_chunk_store, + metadata_in, + by, + num_partitions, + sort_boundaries_df, + collective_ids, + tracer=tracer, + ), ) From 16c6356f094b895afaf26887aeac9300c003c9b0 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 7 May 2026 21:07:24 +0200 Subject: [PATCH 12/12] Run the cudf-polars test suite against `DaskEngine` and `RayEngine` (#22381) Builds on the cached `streaming_engines` fixture from #22364, which amortizes SPMD bootstrap via `_reset()`, and extends the same pattern to Dask and Ray. With this change, the test matrix runs against: `["in-memory", "spmd", "spmd-small", "dask", "ray"]` subject to package availability and `rrun` gating. We might change the different setups later, but for now CI runs: | Engine | Block Size(s) | GPU Configuration | |----------------|-----------------------|-------------------| | `SPMDEngine` | `"medium"`, `"small"` | Single GPU | | `DaskEngine` | `"medium"` | Single GPU | | `RayEngine` | `"medium"` | Two GPUs | Authors: - Mads R. B. Kristensen (https://github.com/madsbk) - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Matthew Murray (https://github.com/Matt711) - Bradley Dice (https://github.com/bdice) - Peter Andreas Entschev (https://github.com/pentschev) - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/22381 --- .github/workflows/pr.yaml | 1 + .github/workflows/test.yaml | 1 + ci/run_cudf_polars_experimental_pytests.sh | 2 +- ci/test_cudf_polars_experimental.sh | 2 +- dependencies.yaml | 13 ++++ .../cudf_polars/experimental/join.py | 20 +++--- .../cudf_polars/testing/engine_utils.py | 43 +++++++++++ python/cudf_polars/pyproject.toml | 3 + python/cudf_polars/tests/conftest.py | 72 +++++++++++++++++-- .../experimental/test_all_gather_host_data.py | 2 - .../tests/experimental/test_dataframescan.py | 21 +++--- .../tests/experimental/test_filter.py | 9 +-- .../tests/experimental/test_groupby.py | 8 ++- .../tests/experimental/test_io_multirank.py | 48 +++---------- .../tests/experimental/test_join.py | 52 ++++++++------ .../tests/experimental/test_metadata.py | 22 ++++-- .../tests/experimental/test_parallel.py | 4 +- .../tests/experimental/test_rolling.py | 13 +++- .../tests/experimental/test_select.py | 25 ++++--- .../tests/experimental/test_spilling.py | 8 +-- .../tests/experimental/test_statistics.py | 53 +++----------- .../tests/experimental/test_unique.py | 13 ++-- 22 files changed, 266 insertions(+), 169 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 41a4c734f21..844f20fe573 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -503,6 +503,7 @@ jobs: # (rapidsmpf compatibility already validated in rapidsmpf CI) matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" script: "ci/test_cudf_polars_experimental.sh" cudf-polars-polars-tests: needs: [wheel-build-cudf-polars, changed-files] diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b4977f60def..a6b0b6f3326 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -174,6 +174,7 @@ jobs: matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: "ci/test_cudf_polars_experimental.sh" diff --git a/ci/run_cudf_polars_experimental_pytests.sh b/ci/run_cudf_polars_experimental_pytests.sh index d0a4767bd99..da659c7b386 100755 --- a/ci/run_cudf_polars_experimental_pytests.sh +++ b/ci/run_cudf_polars_experimental_pytests.sh @@ -10,5 +10,5 @@ set -euo pipefail # Support invoking outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/ -echo "Running the full cudf-polars test suite with both the in-memory and spmd engine" +echo "Running the full cudf-polars test suite" python -m pytest --cache-clear "$@" tests diff --git a/ci/test_cudf_polars_experimental.sh b/ci/test_cudf_polars_experimental.sh index aa3abd66254..4b796ff4b94 100755 --- a/ci/test_cudf_polars_experimental.sh +++ b/ci/test_cudf_polars_experimental.sh @@ -28,7 +28,7 @@ rapids-pip-retry install \ -v \ --prefer-binary \ --constraint "${PIP_CONSTRAINT}" \ - "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental]" \ + "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental,ray]" \ "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" diff --git a/dependencies.yaml b/dependencies.yaml index b1eb276befb..f4acc169263 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -384,6 +384,14 @@ files: key: experimental includes: - run_cudf_polars_experimental + py_run_cudf_polars_ray: + output: pyproject + pyproject_dir: python/cudf_polars + extras: + table: project.optional-dependencies + key: ray + includes: + - depends_on_ray py_test_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars @@ -1290,6 +1298,11 @@ dependencies: - matrix: packages: - *rapidsmpf_unsuffixed + depends_on_ray: + common: + - output_types: [conda, requirements, pyproject] + packages: + - ray>=2.55.1 depends_on_rapids_logger: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/experimental/join.py b/python/cudf_polars/cudf_polars/experimental/join.py index cd5c514b45a..1682762c9e8 100644 --- a/python/cudf_polars/cudf_polars/experimental/join.py +++ b/python/cudf_polars/cudf_polars/experimental/join.py @@ -164,20 +164,22 @@ def _( left, pi_left = rec(left) right, pi_right = rec(right) - # Fallback to single partition on the smaller table + # Fallback to single partition on the smaller table whenever either + # side has more than one partition. left_count = pi_left[left].count right_count = pi_right[right].count output_count = max(left_count, right_count) - fallback_msg = "ConditionalJoin not supported for multiple partitions." - if left_count < right_count: - if left_count > 1 or dynamic_planning: + if output_count > 1 or dynamic_planning: + if left_count < right_count: left = Repartition(left.schema, left) pi_left[left] = PartitionInfo(count=1) - _fallback_inform(fallback_msg, config_options) - elif right_count > 1 or dynamic_planning: - right = Repartition(right.schema, right) - pi_right[right] = PartitionInfo(count=1) - _fallback_inform(fallback_msg, config_options) + else: + right = Repartition(right.schema, right) + pi_right[right] = PartitionInfo(count=1) + _fallback_inform( + "ConditionalJoin not supported for multiple partitions.", + config_options, + ) # Reconstruct and return new_node = ir.reconstruct([left, right]) diff --git a/python/cudf_polars/cudf_polars/testing/engine_utils.py b/python/cudf_polars/cudf_polars/testing/engine_utils.py index c36bcf2ed27..b0b640615f7 100644 --- a/python/cudf_polars/cudf_polars/testing/engine_utils.py +++ b/python/cudf_polars/cudf_polars/testing/engine_utils.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from collections.abc import Mapping + from contextlib import AbstractContextManager import polars as pl @@ -21,6 +22,15 @@ STREAMING_ENGINE_FIXTURE_PARAMS: list[str] = [] if importlib.util.find_spec("rapidsmpf") is not None: STREAMING_ENGINE_FIXTURE_PARAMS.extend(["spmd", "spmd-small"]) + # ``DaskEngine`` and ``RayEngine`` both reject construction inside an + # ``rrun`` cluster. + from rapidsmpf.bootstrap import is_running_with_rrun as _is_running_with_rrun + + if not _is_running_with_rrun(): # pragma: no cover + if importlib.util.find_spec("distributed") is not None: + STREAMING_ENGINE_FIXTURE_PARAMS.append("dask") + if importlib.util.find_spec("ray") is not None: + STREAMING_ENGINE_FIXTURE_PARAMS.append("ray") ALL_ENGINE_FIXTURE_PARAMS = ["in-memory", *STREAMING_ENGINE_FIXTURE_PARAMS] @@ -63,6 +73,34 @@ def is_streaming_engine(obj: Any) -> bool: return isinstance(obj, StreamingEngine) +def warns_on_spmd( # pragma: no cover; rapidsmpf-only path + engine: Any, + *args: Any, + when: bool = True, + **kwargs: Any, +) -> AbstractContextManager[Any]: + """ + ``pytest.warns(*args, **kwargs)`` on SPMD; ``nullcontext`` otherwise. + + ``pytest.warns`` only captures warnings emitted in the test process. On + multi-process backends (``DaskEngine``, ``RayEngine``) the fallback + warning fires on workers/actors and only appears in worker logs/stdout, + so the assertion is replaced with a passthrough on those backends. + + The optional ``when`` kwarg lets callers compose an additional gate (e.g. + a parametrize value) without an outer ``if``. + """ + import contextlib + + import pytest + + from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + + if when and isinstance(engine, SPMDEngine): + return pytest.warns(*args, **kwargs) + return contextlib.nullcontext() + + def create_streaming_options( blocksize_mode: Literal["medium", "small"], overrides: StreamingOptions | None = None, @@ -87,6 +125,9 @@ def create_streaming_options( from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.utils.config import StreamingFallbackMode + # ``allow_gpu_sharing=True`` is always set so the cached multi-rank + # engines (Dask workers, Ray actors with ``num_ranks > 1``) don't trip + # the UUID-collision guard on every ``_reset(...)``. match blocksize_mode: case "medium": baseline = StreamingOptions( @@ -94,6 +135,7 @@ def create_streaming_options( dynamic_planning={}, target_partition_size=1_000_000, raise_on_fail=True, + allow_gpu_sharing=True, ) case "small": baseline = StreamingOptions( @@ -102,6 +144,7 @@ def create_streaming_options( target_partition_size=10, raise_on_fail=True, fallback_mode=StreamingFallbackMode.SILENT, + allow_gpu_sharing=True, ) case _: # pragma: no cover raise ValueError(f"Unknown blocksize_mode: {blocksize_mode!r}") diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 47633e42364..7703cad7dad 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -63,6 +63,9 @@ rapidsmpf = [ "pyarrow>=19.0.0,<24", "rapidsmpf==26.6.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +ray = [ + "ray>=2.55.1", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cudf" diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py index b3d83b36d36..65445b683ae 100644 --- a/python/cudf_polars/tests/conftest.py +++ b/python/cudf_polars/tests/conftest.py @@ -31,6 +31,12 @@ StreamingEngines: TypeAlias = Mapping[str, StreamingEngine] +# Number of ranks for multi-rank streaming engines that share one GPU +# (currently ``RayEngine``). Single-GPU dev hosts and CI runners require +# ``allow_gpu_sharing=True`` to oversubscribe one device across actors. +NUM_RANKS = 2 + + @pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"], scope="session") def with_nulls(request): return request.param @@ -89,6 +95,27 @@ def streaming_engines() -> Generator[StreamingEngines, None, None]: ) engines: dict[str, StreamingEngine] = {"spmd": SPMDEngine(comm=comm)} + + if "dask" in STREAMING_ENGINE_FIXTURE_PARAMS: # pragma: no cover + from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine + + engines["dask"] = DaskEngine(engine_options={"allow_gpu_sharing": True}) + + if "ray" in STREAMING_ENGINE_FIXTURE_PARAMS: # pragma: no cover + from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine + + # Always pin ``num_ranks`` so the cached engine has a deterministic + # actor count regardless of how many GPUs the host happens to have; + # otherwise ``RayEngine`` defaults to ``get_num_gpus_in_ray_cluster()`` + # and tests that depend on rank-count behavior (e.g. fast-count + # parquet, concat) become non-portable. Pinning ``num_ranks`` requires + # ``allow_gpu_sharing=True`` (production guard). + engines["ray"] = RayEngine( + num_ranks=NUM_RANKS, + engine_options={"allow_gpu_sharing": True}, + ray_init_options={"include_dashboard": False}, + ) + try: yield engines finally: @@ -108,6 +135,28 @@ def spmd_engine(streaming_engines: StreamingEngines) -> SPMDEngine: return engine +@pytest.fixture +def spmd_engine_factory( + streaming_engines: StreamingEngines, +) -> Callable[..., SPMDEngine]: + """ + Return a factory that yields the shared :class:`SPMDEngine`. + + Use this in place of :func:`streaming_engine_factory` for tests that + must run on SPMD only. + """ + from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + + param = EngineFixtureParam(full_name="spmd") + + def factory(options: StreamingOptions | None = None) -> SPMDEngine: + engine = build_streaming_engine(param, streaming_engines, options) + assert isinstance(engine, SPMDEngine) + return engine + + return factory + + @pytest.fixture(params=STREAMING_ENGINE_FIXTURE_PARAMS) def _streaming_engine_param(request: pytest.FixtureRequest) -> EngineFixtureParam: """Parametrization helper to run tests for each streaming engine variant.""" @@ -246,10 +295,9 @@ def pytest_configure(config): config.addinivalue_line( "markers", - "skip_on_streaming_engine(reason): skip the test for streaming " - '``engine`` variants (e.g. ``"spmd"``, ``"spmd-small"``) while ' - "still letting the in-memory variant run. Use this to track features " - "that have no multi-partition implementation", + "skip_on_streaming_engine(reason, *, engine=None): skip the test for " + 'streaming ``engine`` variants (e.g. ``"spmd"``, ``"spmd-small"``, ' + '``"dask"``, ``"ray"``) while still allowing the in-memory variant to run.', ) # Ray's internal subprocess management leaks `/dev/null` file handles, and @@ -275,9 +323,23 @@ def pytest_collection_modifyitems(items): callspec = getattr(item, "callspec", None) if callspec is None: continue - engine_param = callspec.params.get("_all_engine_param") + # Tests bind to either ``engine`` (parametrized via ``_all_engine_param``) + # or ``streaming_engine`` / ``streaming_engine_factory`` (parametrized via + # ``_streaming_engine_param``). Check both. + engine_param = callspec.params.get("_all_engine_param") or callspec.params.get( + "_streaming_engine_param" + ) if engine_param is None or engine_param == "in-memory": continue + engine_filter = marker.kwargs.get("engine") + if engine_filter is not None: + if isinstance(engine_filter, str): + engine_filter = (engine_filter,) + # Strip the ``-small`` suffix so ``"spmd-small"`` matches + # ``engine=("spmd",)``. + engine_name = engine_param.removesuffix("-small") + if engine_name not in engine_filter: + continue reason = ( marker.args[0] if marker.args diff --git a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py index 8f09a82c4bd..c85598a8c64 100644 --- a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py +++ b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py @@ -59,8 +59,6 @@ def test_gather_cluster_info(streaming_engine) -> None: assert isinstance(info.gpu_uuid, str) # Each rank runs in its own process. assert len({info.pid for info in infos}) == streaming_engine.nranks - # Without allow_gpu_sharing, all UUIDs must be unique (enforced at init). - assert len({info.gpu_uuid for info in infos}) == streaming_engine.nranks def test_cluster_info_cuda_visible_devices(monkeypatch) -> None: diff --git a/python/cudf_polars/tests/experimental/test_dataframescan.py b/python/cudf_polars/tests/experimental/test_dataframescan.py index dbf22848824..fb263e20b94 100644 --- a/python/cudf_polars/tests/experimental/test_dataframescan.py +++ b/python/cudf_polars/tests/experimental/test_dataframescan.py @@ -60,19 +60,20 @@ def test_parallel_dataframescan(df, streaming_engine_factory, max_rows_per_parti assert count == 1 -@pytest.mark.xfail( - reason=( - "Multi-rank Union interleaves child outputs across ranks: client " - "receives [rank0_A, rank0_B, rank1_A, rank1_B] instead of the " - "polars-CPU [A, B]. Tracked in " - "https://github.com/rapidsai/cudf/issues/22376." - ), - strict=False, -) -def test_dataframescan_concat(df, streaming_engine_factory): +def test_dataframescan_concat(request, df, streaming_engine_factory): streaming_engine = streaming_engine_factory( StreamingOptions(max_rows_per_partition=1_000), ) + if streaming_engine.nranks > 1: + # Multi-rank Union interleaves child outputs across ranks: client + # receives [rank0_A, rank0_B, rank1_A, rank1_B] instead of the + # polars-CPU [A, B]. + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/22376", + strict=False, + ) + ) df2 = pl.concat([df, df]) assert_gpu_result_equal(df2, engine=streaming_engine) diff --git a/python/cudf_polars/tests/experimental/test_filter.py b/python/cudf_polars/tests/experimental/test_filter.py index 4fb11df691c..b8b4fb2749c 100644 --- a/python/cudf_polars/tests/experimental/test_filter.py +++ b/python/cudf_polars/tests/experimental/test_filter.py @@ -9,12 +9,11 @@ from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.engine_utils import warns_on_spmd @pytest.fixture def engine(streaming_engine_factory): - # ``fallback_mode="warn"`` overrides the small-blocksize baseline (which - # sets SILENT) so ``test_filter_non_pointwise`` can assert on the warning. return streaming_engine_factory( StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"), ) @@ -38,7 +37,9 @@ def test_filter_pointwise(df, engine): def test_filter_non_pointwise(df, engine): query = df.filter(pl.col("a") > pl.col("a").max()) - with pytest.warns( - UserWarning, match="This filter is not supported for multiple partitions." + with warns_on_spmd( + engine, + UserWarning, + match="This filter is not supported for multiple partitions.", ): assert_gpu_result_equal(query, engine=engine) diff --git a/python/cudf_polars/tests/experimental/test_groupby.py b/python/cudf_polars/tests/experimental/test_groupby.py index 03d87fe23e9..6ca11387da0 100644 --- a/python/cudf_polars/tests/experimental/test_groupby.py +++ b/python/cudf_polars/tests/experimental/test_groupby.py @@ -131,8 +131,8 @@ def test_groupby_std_var_ddof(df, engine, agg, ddof): @pytest.mark.parametrize("fallback_mode", ["silent", "raise", "warn", "foo"]) -def test_groupby_fallback(df, fallback_mode, streaming_engine_factory): - streaming_engine = streaming_engine_factory( +def test_groupby_fallback(df, fallback_mode, spmd_engine_factory): + streaming_engine = spmd_engine_factory( StreamingOptions(fallback_mode=fallback_mode), ) match = "Failed to decompose groupby aggs" @@ -287,6 +287,10 @@ def test_groupby_count_type_mismatch(df, streaming_engine_factory): assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False) +@pytest.mark.skip_on_streaming_engine( + "patch.object on ShuffleManager.Inserter doesn't reach worker processes", + engine=("dask", "ray"), +) def test_shuffle_reduce_insert_finished_called_on_oom(streaming_engine_factory): streaming_engine = streaming_engine_factory( StreamingOptions(target_partition_size=10, max_rows_per_partition=5), diff --git a/python/cudf_polars/tests/experimental/test_io_multirank.py b/python/cudf_polars/tests/experimental/test_io_multirank.py index 2208cc67316..bf9e8e70343 100644 --- a/python/cudf_polars/tests/experimental/test_io_multirank.py +++ b/python/cudf_polars/tests/experimental/test_io_multirank.py @@ -7,16 +7,15 @@ from typing import TYPE_CHECKING import pytest -from rapidsmpf.bootstrap import is_running_with_rrun import polars as pl -from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine +from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.testing.asserts import assert_sink_result_equal from cudf_polars.utils.config import Cluster, StreamingExecutor if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable from pathlib import Path from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine @@ -39,43 +38,14 @@ def df() -> pl.LazyFrame: ) -@pytest.fixture(params=["spmd", "ray", "dask"]) +@pytest.fixture def engine( - request: pytest.FixtureRequest, - spmd_engine: SPMDEngine, -) -> Iterator[StreamingEngine]: - """Yield each supported streaming engine.""" - backend = request.param - executor_options = {"max_rows_per_partition": 1_000} - - if backend == "spmd": - with SPMDEngine( - comm=spmd_engine.comm, - executor_options=executor_options, - ) as eng: - yield eng - return - - if is_running_with_rrun(): - pytest.skip(f"{backend}Engine must not be created from within an rrun cluster") - - if backend == "ray": - pytest.importorskip("ray", reason="ray is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine - - with RayEngine( - executor_options=executor_options, - ray_init_options={"include_dashboard": False}, - ) as eng: - yield eng - return - - assert backend == "dask" - pytest.importorskip("distributed", reason="distributed is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine - - with DaskEngine(executor_options=executor_options) as eng: - yield eng + streaming_engine_factory: Callable[..., StreamingEngine], +) -> StreamingEngine: + """Yield each supported streaming engine pinned to small partitions.""" + return streaming_engine_factory( + StreamingOptions(max_rows_per_partition=1_000), + ) def test_sink_parquet_directory( diff --git a/python/cudf_polars/tests/experimental/test_join.py b/python/cudf_polars/tests/experimental/test_join.py index 6a09ff95ef5..1b4635dd924 100644 --- a/python/cudf_polars/tests/experimental/test_join.py +++ b/python/cudf_polars/tests/experimental/test_join.py @@ -19,6 +19,7 @@ from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.engine_utils import warns_on_spmd from cudf_polars.utils.config import ConfigOptions, StreamingExecutor @@ -103,12 +104,11 @@ def test_join_conditional(reverse, max_rows_per_partition, streaming_engine_fact if reverse: left, right = right, left q = left.join_where(right, pl.col("y") < pl.col("yy")) - if max_rows_per_partition == 3: - with pytest.warns( - UserWarning, match="ConditionalJoin not supported for multiple partitions." - ): - assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False) - else: + with warns_on_spmd( + streaming_engine, + UserWarning, + match="ConditionalJoin not supported for multiple partitions.", + ): assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False) @@ -156,7 +156,7 @@ def test_join(left, right, how, reverse, streaming_engine_factory, options): @pytest.mark.parametrize("zlice", [(0, 2), (2, 2), (-2, None)]) -def test_join_and_slice(zlice, streaming_engine_factory): +def test_join_and_slice(request, zlice, streaming_engine_factory): streaming_engine = streaming_engine_factory( StreamingOptions( max_rows_per_partition=3, @@ -164,6 +164,16 @@ def test_join_and_slice(zlice, streaming_engine_factory): fallback_mode="warn", ), ) + if streaming_engine.nranks > 1: + # The multi-rank fallback for slice doesn't preserve row order + # within equal-key groups, so the slice can pick different rows + # than the CPU baseline. + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/22405", + strict=False, + ) + ) left = pl.LazyFrame( { "a": [1, 2, 3, 1, None], @@ -181,23 +191,22 @@ def test_join_and_slice(zlice, streaming_engine_factory): q = left.join(right, on="a", how="inner").slice(*zlice) # Check that we get the correct row count # See: https://github.com/rapidsai/cudf/issues/19153 - if zlice in {(2, 2), (-2, None)}: - with pytest.warns( - UserWarning, match="This slice not supported for multiple partitions." - ): - assert q.collect(engine=streaming_engine).height == q.collect().height - else: + with warns_on_spmd( + streaming_engine, + UserWarning, + match="This slice not supported for multiple partitions.", + when=zlice in {(2, 2), (-2, None)}, + ): assert q.collect(engine=streaming_engine).height == q.collect().height # Need sort to match order after a join q = left.join(right, on="a", how="inner").sort(pl.col("a")).slice(*zlice) - if zlice == (2, 2): - with pytest.warns( - UserWarning, - match="This slice not supported for multiple partitions.", - ): - assert_gpu_result_equal(q, engine=streaming_engine) - else: + with warns_on_spmd( + streaming_engine, + UserWarning, + match="This slice not supported for multiple partitions.", + when=zlice == (2, 2), + ): assert_gpu_result_equal(q, engine=streaming_engine) @@ -232,7 +241,8 @@ def test_join_maintain_order_fallback_streaming( ) q = left.join(right, on="y", how="inner", maintain_order=maintain_order) - with pytest.warns( + with warns_on_spmd( + streaming_engine, UserWarning, match=r"Join\(maintain_order=.*\) not supported for multiple partitions\.", ): diff --git a/python/cudf_polars/tests/experimental/test_metadata.py b/python/cudf_polars/tests/experimental/test_metadata.py index 618087a27c5..791e33744cd 100644 --- a/python/cudf_polars/tests/experimental/test_metadata.py +++ b/python/cudf_polars/tests/experimental/test_metadata.py @@ -66,20 +66,30 @@ def right() -> pl.LazyFrame: def test_rapidsmpf_join_metadata( left: pl.LazyFrame, right: pl.LazyFrame, - streaming_engine_factory, + spmd_engine_factory, options, ) -> None: - streaming_engine = streaming_engine_factory(options) - config_options = ConfigOptions.from_polars_engine(streaming_engine) + # Pinned to SPMD: ``ChannelMetadata.__reduce_cython__`` can't pickle + # ``self._handle`` across worker/actor processes, so the + # ``metadata_collector`` round-trip fails on Dask and Ray. + # + # When https://github.com/rapidsai/cudf/pull/22394 lands, dedup of + # replicated outputs moves to the Dask/Ray frontends and the + # ``duplicated`` flag's semantics change to "every rank holds the + # data". Revisit the ``len(metadata_collector) == 1`` and + # ``metadata.duplicated is False`` assertions below, and reconsider + # whether this test can widen to ``streaming_engine_factory``. + engine = spmd_engine_factory(options) + config_options = ConfigOptions.from_polars_engine(engine) broadcast_join_limit = config_options.executor.broadcast_join_limit q = left.join( right, on="y", how="left", ).filter(pl.col("x") > pl.col("zz")) - ir = Translator(q._ldf.visit(), streaming_engine).translate_ir() - left_count = left.collect(engine=streaming_engine).height - right_count = right.collect(engine=streaming_engine).height + ir = Translator(q._ldf.visit(), engine).translate_ir() + left_count = left.collect(engine=engine).height + right_count = right.collect(engine=engine).height metadata_collector = evaluate_logical_plan( ir, config_options, collect_metadata=True diff --git a/python/cudf_polars/tests/experimental/test_parallel.py b/python/cudf_polars/tests/experimental/test_parallel.py index 67fc372e2e4..a9a0ff63786 100644 --- a/python/cudf_polars/tests/experimental/test_parallel.py +++ b/python/cudf_polars/tests/experimental/test_parallel.py @@ -50,10 +50,10 @@ def test_rename_concat(streaming_engine) -> None: assert_gpu_result_equal(q, engine=streaming_engine) -def test_fallback_on_concat_zlice(streaming_engine_factory) -> None: +def test_fallback_on_concat_zlice(spmd_engine_factory) -> None: # Pin ``fallback_mode="warn"`` so the spmd-small baseline (which sets # ``SILENT``) doesn't suppress the warning this test asserts on. - streaming_engine = streaming_engine_factory(StreamingOptions(fallback_mode="warn")) + streaming_engine = spmd_engine_factory(StreamingOptions(fallback_mode="warn")) q = pl.concat( [ pl.LazyFrame({"a": [1, 2]}), diff --git a/python/cudf_polars/tests/experimental/test_rolling.py b/python/cudf_polars/tests/experimental/test_rolling.py index 37de6f7f8a1..ee3ae137e27 100644 --- a/python/cudf_polars/tests/experimental/test_rolling.py +++ b/python/cudf_polars/tests/experimental/test_rolling.py @@ -8,6 +8,7 @@ import polars as pl from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions +from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine from cudf_polars.testing.asserts import assert_gpu_result_equal from cudf_polars.utils.versions import POLARS_VERSION_LT_136 @@ -46,10 +47,20 @@ def test_rolling_datetime(request, engine): assert_gpu_result_equal(q, engine=engine) -def test_over_in_filter_unsupported(streaming_engine_factory) -> None: +def test_over_in_filter_unsupported(request, streaming_engine_factory) -> None: engine = streaming_engine_factory( StreamingOptions(max_rows_per_partition=1, fallback_mode="warn"), ) + if not isinstance(engine, SPMDEngine): + # On Dask/Ray the fallback warning fires on worker processes and is + # invisible to ``pytest.warns``; the multi-rank fallback also + # doesn't preserve row order. + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/22405", + strict=False, + ) + ) q = pl.concat( [ pl.LazyFrame({"k": ["x", "y"], "v": [3, 2]}), diff --git a/python/cudf_polars/tests/experimental/test_select.py b/python/cudf_polars/tests/experimental/test_select.py index 264f8b5aab1..cef9f0f66cf 100644 --- a/python/cudf_polars/tests/experimental/test_select.py +++ b/python/cudf_polars/tests/experimental/test_select.py @@ -22,6 +22,7 @@ assert_gpu_result_equal, assert_ir_translation_raises, ) +from cudf_polars.testing.engine_utils import warns_on_spmd from cudf_polars.utils.versions import ( POLARS_VERSION_LT_132, POLARS_VERSION_LT_134, @@ -54,8 +55,8 @@ def test_select(df, engine): @pytest.mark.parametrize("fallback_mode", ["silent", "raise", "warn", "foo"]) -def test_select_reduce_fallback(df, streaming_engine_factory, fallback_mode): - engine = streaming_engine_factory( +def test_select_reduce_fallback(df, spmd_engine_factory, fallback_mode): + engine = spmd_engine_factory( StreamingOptions(max_rows_per_partition=3, fallback_mode=fallback_mode), ) match = "This selection is not supported for multiple partitions." @@ -84,13 +85,17 @@ def test_select_reduce_fallback(df, streaming_engine_factory, fallback_mode): assert_gpu_result_equal(query, engine=engine) -def test_select_fill_null_with_strategy(df, engine): +def test_select_fill_null_with_strategy(df, streaming_engine_factory): + engine = streaming_engine_factory( + StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"), + ) q = df.select(pl.col("a").forward_fill()) if POLARS_VERSION_LT_132: assert_ir_translation_raises(q, NotImplementedError) else: - with pytest.warns( + with warns_on_spmd( + engine, UserWarning, match="fill_null with strategy other than 'zero' or 'one' is not supported for multiple partitions", ): @@ -183,15 +188,19 @@ def test_select_mean_with_decimals(engine): assert_gpu_result_equal(q, engine=engine, check_dtypes=not POLARS_VERSION_LT_134) -def test_select_with_len(engine): - # https://github.com/pola-rs/polars/issues/25592 +def test_select_with_len(streaming_engine_factory): + engine = streaming_engine_factory( + StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"), + ) df1 = pl.LazyFrame({"c0": [1] * 4}) df2 = pl.LazyFrame({"c0": [2] * 4}) q = pl.concat([df1.join(df2, how="cross"), df1.with_columns(pl.lit(None))]).select( pl.len() ) - with pytest.warns( - UserWarning, match="Cross join not support for multiple partitions" + with warns_on_spmd( + engine, + UserWarning, + match="Cross join not support for multiple partitions", ): assert_gpu_result_equal(q, engine=engine) diff --git a/python/cudf_polars/tests/experimental/test_spilling.py b/python/cudf_polars/tests/experimental/test_spilling.py index 6aa11801132..7f79b911038 100644 --- a/python/cudf_polars/tests/experimental/test_spilling.py +++ b/python/cudf_polars/tests/experimental/test_spilling.py @@ -50,20 +50,20 @@ def create_test_table(nbytes: int, stream: Stream) -> plc.Table: ], ) def test_make_spill_function( - streaming_engine_factory, + spmd_engine_factory, *, pinned_memory: bool, spilled_host_mem_type: MemoryType, ) -> None: """Test that spilling prioritizes longest queues and newest messages.""" - engine = streaming_engine_factory(StreamingOptions(pinned_memory=pinned_memory)) + engine = spmd_engine_factory(StreamingOptions(pinned_memory=pinned_memory)) context = engine.context if spilled_host_mem_type == MemoryType.PINNED_HOST: - assert engine.context.br().pinned_mr is not None + assert context.br().pinned_mr is not None other_host_mem_type = MemoryType.HOST else: - assert engine.context.br().pinned_mr is None + assert context.br().pinned_mr is None other_host_mem_type = MemoryType.PINNED_HOST # Create 3 spillable message containers simulating fanout buffers diff --git a/python/cudf_polars/tests/experimental/test_statistics.py b/python/cudf_polars/tests/experimental/test_statistics.py index 82c121d5830..42014a02106 100644 --- a/python/cudf_polars/tests/experimental/test_statistics.py +++ b/python/cudf_polars/tests/experimental/test_statistics.py @@ -7,14 +7,12 @@ from typing import TYPE_CHECKING import pytest -from rapidsmpf.bootstrap import is_running_with_rrun -from rapidsmpf.config import Options from rapidsmpf.statistics import Statistics -from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine +from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine @@ -25,49 +23,14 @@ ] -@pytest.fixture(params=["spmd", "ray", "dask"]) +@pytest.fixture def engine( - request: pytest.FixtureRequest, - spmd_engine: SPMDEngine, -) -> Iterator[StreamingEngine]: + streaming_engine_factory: Callable[..., StreamingEngine], +) -> StreamingEngine: """Yield each supported streaming engine with statistics enabled.""" - backend = request.param - rapidsmpf_options = Options({"statistics": "True"}) - executor_options = {"max_rows_per_partition": 10} - - if backend == "spmd": - with SPMDEngine( - comm=spmd_engine.comm, - rapidsmpf_options=rapidsmpf_options, - executor_options=executor_options, - ) as engine: - yield engine - return - - if is_running_with_rrun(): - pytest.skip(f"{backend}Engine must not be created from within an rrun cluster") - - if backend == "ray": - pytest.importorskip("ray", reason="ray is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine - - with RayEngine( - rapidsmpf_options=rapidsmpf_options, - executor_options=executor_options, - ray_init_options={"include_dashboard": False}, - ) as engine: - yield engine - return - - assert backend == "dask" - pytest.importorskip("distributed", reason="distributed is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine - - with DaskEngine( - rapidsmpf_options=rapidsmpf_options, - executor_options=executor_options, - ) as engine: - yield engine + return streaming_engine_factory( + StreamingOptions(statistics=True, max_rows_per_partition=10), + ) def test_statistics(engine: StreamingEngine) -> None: diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py index 6bb30624cb6..1a157c3fe21 100644 --- a/python/cudf_polars/tests/experimental/test_unique.py +++ b/python/cudf_polars/tests/experimental/test_unique.py @@ -10,13 +10,7 @@ from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.testing.asserts import assert_gpu_result_equal - - -@pytest.fixture -def engine(streaming_engine_factory): - return streaming_engine_factory( - StreamingOptions(fallback_mode="warn"), - ) +from cudf_polars.testing.engine_utils import warns_on_spmd @pytest.fixture(scope="module") @@ -77,11 +71,12 @@ def test_unique_head_tail(keep, zlice, streaming_engine_factory): ) -def test_unique_complex_slice_fallback(df, engine): +def test_unique_complex_slice_fallback(df, streaming_engine_factory): """Test that unique with complex slice (offset >= 1) falls back correctly.""" + engine = streaming_engine_factory(StreamingOptions(fallback_mode="warn")) # unique().slice(offset=5, length=10) has zlice[0] >= 1, triggering fallback q = df.unique(subset=("y",), keep="any").slice(5, 10) - with pytest.warns(UserWarning, match="Complex slice not supported"): + with warns_on_spmd(engine, UserWarning, match="Complex slice not supported"): result = q.collect(engine=engine) # Just verify the fallback produces valid output with expected shape assert result.shape == (10, 3)