Skip to content

Commit f5f93f3

Browse files
authored
Remove remaining uses of ColumnBase.from_pylibcudf (rapidsai#21448)
Towards rapidsai#21229 There remains no use of `ColumnBase.from_pylibcudf` within cuDF after this PR. I haven't removed the method entirely yet as I should probably audit other RAPIDS libraries to ensure they are not using this method either. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: rapidsai#21448
1 parent 59e2194 commit f5f93f3

8 files changed

Lines changed: 36 additions & 57 deletions

File tree

python/cudf/cudf/core/column/column.py

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,26 +1286,13 @@ def from_arrow(cls, array: pa.Array | pa.ChunkedArray) -> ColumnBase:
12861286
type=array.type.value_type,
12871287
)
12881288
)
1289-
codes_dtype = cudf_dtype_from_pa_type(codes.type)
1290-
result = cls.create(plc.Column.from_arrow(codes), codes_dtype)
1291-
1292-
# For categories, handle special cases:
1293-
# - NULL type (empty categoricals): use from_pylibcudf to infer type
1294-
# - ExtensionType (intervals): arrow conversion may return pandas dtype
1295-
if pa.types.is_null(dictionary.type) or isinstance(
1296-
dictionary.type, pa.ExtensionType
1297-
):
1298-
categories = cls.from_pylibcudf(
1299-
plc.Column.from_arrow(dictionary)
1300-
)
1301-
else:
1302-
categories_dtype = cudf_dtype_from_pa_type(dictionary.type)
1303-
categories = cls.create(
1304-
plc.Column.from_arrow(dictionary), categories_dtype
1305-
)
1289+
categories_dtype = cudf_dtype_from_pa_type(dictionary.type)
1290+
categories = cls.create(
1291+
plc.Column.from_arrow(dictionary), categories_dtype
1292+
)
13061293

13071294
return ColumnBase.create(
1308-
result.plc_column,
1295+
plc.Column.from_arrow(codes),
13091296
CategoricalDtype(
13101297
categories=categories, ordered=array.type.ordered
13111298
),
@@ -3141,33 +3128,13 @@ def as_column(
31413128
categories=new_cats, ordered=arbitrary.dtype.ordered
31423129
)
31433130
arbitrary = arbitrary.astype(new_dtype)
3144-
elif (
3145-
isinstance(
3146-
arbitrary.dtype.categories.dtype, pd.IntervalDtype
3147-
)
3148-
and dtype is None
3149-
):
3150-
# Conversion to arrow converts IntervalDtype to StructDtype
3151-
dtype = CategoricalDtype(
3152-
categories=arbitrary.dtype.categories,
3153-
ordered=arbitrary.dtype.ordered,
3154-
)
31553131
result = as_column(
31563132
pa.array(arbitrary, from_pandas=True),
31573133
nan_as_null=nan_as_null,
31583134
dtype=dtype,
31593135
length=length,
31603136
)
3161-
if isinstance(arbitrary.dtype, pd.IntervalDtype):
3162-
# Wrap StructColumn as IntervalColumn with proper metadata
3163-
result = ColumnBase.create(
3164-
result.plc_column,
3165-
IntervalDtype(
3166-
subtype=arbitrary.dtype.subtype,
3167-
closed=arbitrary.dtype.closed,
3168-
),
3169-
)
3170-
elif (
3137+
if (
31713138
isinstance(arbitrary.dtype, pd.CategoricalDtype)
31723139
and is_pandas_nullable_extension_dtype(
31733140
arbitrary.dtype.categories.dtype

python/cudf/cudf/core/column/numerical.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
CUDF_STRING_DTYPE,
3030
cudf_dtype_from_pa_type,
3131
cudf_dtype_to_pa_type,
32+
dtype_from_pylibcudf_column,
3233
dtype_to_pylibcudf_type,
3334
find_common_type,
3435
get_dtype_of_same_kind,
@@ -849,10 +850,12 @@ def _validate_fillna_value(
849850
) -> plc.Scalar | ColumnBase:
850851
"""Align fill_value for .fillna based on column type."""
851852
if is_scalar(fill_value):
852-
cudf_obj = ColumnBase.from_pylibcudf(
853-
plc.Column.from_scalar(
854-
pa_scalar_to_plc_scalar(pa.scalar(fill_value)), 1
855-
)
853+
plc_col = plc.Column.from_scalar(
854+
pa_scalar_to_plc_scalar(pa.scalar(fill_value)), 1
855+
)
856+
cudf_obj = ColumnBase.create(
857+
plc_col,
858+
dtype=dtype_from_pylibcudf_column(plc_col),
856859
)
857860
if not cudf_obj.can_cast_safely(self.dtype):
858861
raise TypeError(

python/cudf/cudf/core/dataframe.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8502,7 +8502,10 @@ def from_pylibcudf(
85028502

85038503
plc_columns = tbl.columns()
85048504
cudf_cols = (
8505-
ColumnBase.from_pylibcudf(plc_col) for plc_col in plc_columns
8505+
ColumnBase.create(
8506+
plc_col, dtype=dtype_from_pylibcudf_column(plc_col)
8507+
)
8508+
for plc_col in plc_columns
85068509
)
85078510
# We only have child names if the source is a pylibcudf.io.TableWithMetadata.
85088511
if child_names is not None:

python/cudf/cudf/core/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def from_pylibcudf(
380380
raise ValueError("Metadata dict must only contain a name")
381381
name = metadata.get("name")
382382
return cls._from_column(
383-
ColumnBase.from_pylibcudf(col),
383+
ColumnBase.create(col, dtype=dtype_from_pylibcudf_column(col)),
384384
name=name,
385385
)
386386

python/cudf/cudf/core/series.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
from cudf.utils.dtypes import (
6565
CUDF_STRING_DTYPE,
6666
_get_nan_for_dtype,
67+
dtype_from_pylibcudf_column,
6768
find_common_type,
6869
get_dtype_of_same_kind,
6970
is_mixed_with_object_dtype,
@@ -3863,7 +3864,7 @@ def from_pylibcudf(
38633864
name = metadata.get("name")
38643865
index = metadata.get("index")
38653866
return cls._from_column(
3866-
ColumnBase.from_pylibcudf(col),
3867+
ColumnBase.create(col, dtype=dtype_from_pylibcudf_column(col)),
38673868
name=name,
38683869
index=index,
38693870
)

python/cudf/cudf/tests/private_objects/test_nrt_stats.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
22
# SPDX-License-Identifier: Apache-2.0
3+
import numpy as np
34
import pytest
45
from numba.cuda import config
56
from numba.cuda.memory_management.nrt import rtsys
@@ -77,8 +78,9 @@ def double(st):
7778

7879
with _CUDFNumbaConfig():
7980
kernel.forall(len(sr))(*launch_args)
80-
col = ColumnBase.from_pylibcudf(
81-
strings_udf.column_from_managed_udf_string_array(ans_col)
81+
col = ColumnBase.create(
82+
strings_udf.column_from_managed_udf_string_array(ans_col),
83+
dtype=np.dtype(object),
8284
)
8385

8486
# MemInfos that own the strings should still be alive

python/cudf/cudf/tests/testing/test_assert_column_equal.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
22
# SPDX-License-Identifier: Apache-2.0
33

44
import pyarrow as pa
@@ -57,8 +57,8 @@ def test_assert_column_memory_basic_same(arrow_arrays):
5757
plc_col = data.plc_column
5858

5959
# Create two references to same underlying data
60-
left = cudf.core.column.ColumnBase.from_pylibcudf(plc_col)
61-
right = cudf.core.column.ColumnBase.from_pylibcudf(plc_col)
60+
left = cudf.core.column.ColumnBase.create(plc_col, dtype=data.dtype)
61+
right = cudf.core.column.ColumnBase.create(plc_col, dtype=data.dtype)
6262

6363
assert_column_memory_eq(left, right)
6464
with pytest.raises(AssertionError):

python/cudf/cudf/utils/dtypes.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88
import pandas as pd
99
import pyarrow as pa
10+
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
1011
from pandas.core.computation.common import result_type_many
1112

1213
import pylibcudf as plc
@@ -128,17 +129,17 @@ def cudf_dtype_to_pa_type(dtype: DtypeObj) -> pa.DataType:
128129
def cudf_dtype_from_pa_type(typ: pa.DataType) -> DtypeObj:
129130
"""Given a pyarrow dtype, converts it into the equivalent cudf dtype."""
130131
if pa.types.is_list(typ):
131-
return cudf.core.dtypes.ListDtype.from_arrow(typ)
132+
return cudf.ListDtype.from_arrow(typ)
132133
elif pa.types.is_struct(typ):
133-
return cudf.core.dtypes.StructDtype.from_arrow(typ)
134+
return cudf.StructDtype.from_arrow(typ)
134135
elif pa.types.is_decimal(typ):
135136
if isinstance(typ, pa.Decimal256Type):
136137
raise NotImplementedError("cudf does not support Decimal256Type")
137138
if isinstance(typ, pa.Decimal32Type):
138-
return cudf.core.dtypes.Decimal32Dtype.from_arrow(typ)
139+
return cudf.Decimal32Dtype.from_arrow(typ)
139140
if isinstance(typ, pa.Decimal64Type):
140-
return cudf.core.dtypes.Decimal64Dtype.from_arrow(typ)
141-
return cudf.core.dtypes.Decimal128Dtype.from_arrow(typ)
141+
return cudf.Decimal64Dtype.from_arrow(typ)
142+
return cudf.Decimal128Dtype.from_arrow(typ)
142143
elif pa.types.is_large_string(typ) or pa.types.is_string(typ):
143144
return CUDF_STRING_DTYPE
144145
elif pa.types.is_date(typ):
@@ -152,6 +153,8 @@ def cudf_dtype_from_pa_type(typ: pa.DataType) -> DtypeObj:
152153
elif pa.types.is_null(typ):
153154
# Similar to PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.EMPTY]
154155
return np.dtype(np.int8)
156+
elif isinstance(typ, ArrowIntervalType):
157+
return cudf.IntervalDtype.from_arrow(typ)
155158
else:
156159
return cudf.api.types.pandas_dtype(typ.to_pandas_dtype())
157160

0 commit comments

Comments
 (0)