Skip to content

Commit adc2f8f

Browse files
authored
Fix casting to StringDtype for ArrowExtensionArray duration and decimal types (rapidsai#21230)
Fixes conversion of ArrowExtensionArray columns (duration and decimal128 types) to pandas StringDtype when using `astype('string[python]')` or `astype('string[pyarrow]')`. Contributes to rapidsai#18659 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: rapidsai#21230
1 parent d7a7d12 commit adc2f8f

3 files changed

Lines changed: 16 additions & 24 deletions

File tree

python/cudf/cudf/core/column/decimal.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
)
3131
from cudf.core.mixins import BinaryOperand
3232
from cudf.utils.dtypes import (
33-
CUDF_STRING_DTYPE,
3433
cudf_dtype_to_pa_type,
3534
get_dtype_of_same_kind,
3635
get_dtype_of_same_type,
@@ -171,12 +170,12 @@ def as_string_column(self, dtype: DtypeObj) -> StringColumn:
171170
)
172171
return cast(
173172
cudf.core.column.string.StringColumn,
174-
type(self).from_pylibcudf(plc_column),
173+
ColumnBase.create(plc_column, dtype),
175174
)
176175
else:
177176
return cast(
178177
cudf.core.column.StringColumn,
179-
cudf.core.column.column_empty(0, dtype=CUDF_STRING_DTYPE),
178+
cudf.core.column.column_empty(0, dtype=dtype),
180179
)
181180

182181
def __pow__(self, other: ColumnBinaryOperand) -> ColumnBase:

python/cudf/cudf/core/column/timedelta.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from cudf.core.column.temporal_base import TemporalBaseColumn
2020
from cudf.errors import MixedTypeError
2121
from cudf.utils.dtypes import (
22+
CUDF_STRING_DTYPE,
2223
cudf_dtype_from_pa_type,
2324
cudf_dtype_to_pa_type,
2425
find_common_type,
@@ -248,27 +249,29 @@ def as_datetime_column(self, dtype: np.dtype) -> None: # type: ignore[override]
248249
f"cannot astype a timedelta from {self.dtype} to {dtype}"
249250
)
250251

251-
def strftime(self, format: str) -> StringColumn:
252+
def strftime(
253+
self, format: str, dtype: DtypeObj = CUDF_STRING_DTYPE
254+
) -> StringColumn:
252255
if len(self) == 0:
253256
return super().strftime(format)
254-
else:
255-
with self.access(mode="read", scope="internal"):
256-
return cast(
257-
cudf.core.column.string.StringColumn,
258-
type(self).from_pylibcudf(
259-
plc.strings.convert.convert_durations.from_durations(
260-
self.plc_column, format
261-
)
257+
with self.access(mode="read", scope="internal"):
258+
return cast(
259+
cudf.core.column.string.StringColumn,
260+
ColumnBase.create(
261+
plc.strings.convert.convert_durations.from_durations(
262+
self.plc_column, format
262263
),
263-
)
264+
dtype,
265+
),
266+
)
264267

265268
def as_string_column(self, dtype: DtypeObj) -> StringColumn:
266269
if cudf.get_option("mode.pandas_compatible"):
267270
if isinstance(dtype, np.dtype) and dtype.kind == "O":
268271
raise MixedTypeError(
269272
f"cannot astype a timedelta like from {self.dtype} to {dtype}"
270273
)
271-
return self.strftime("%D days %H:%M:%S")
274+
return self.strftime("%D days %H:%M:%S", dtype=dtype)
272275

273276
def as_timedelta_column(self, dtype: np.dtype) -> TimeDeltaColumn:
274277
if dtype == self.dtype:

python/cudf/cudf/pandas/scripts/conftest-patch.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,16 +1473,6 @@ def set_copy_on_write_option():
14731473
"tests/extension/test_arrow.py::TestArrowArray::test_accumulate_series[bool-cummin-True]",
14741474
"tests/extension/test_arrow.py::TestArrowArray::test_accumulate_series[bool-cumprod-False]",
14751475
"tests/extension/test_arrow.py::TestArrowArray::test_accumulate_series[bool-cumprod-True]",
1476-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[decimal128(7, 3)-string[pyarrow]]",
1477-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[decimal128(7, 3)-string[python]]",
1478-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[ms]-string[pyarrow]]",
1479-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[ms]-string[python]]",
1480-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[ns]-string[pyarrow]]",
1481-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[ns]-string[python]]",
1482-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[s]-string[pyarrow]]",
1483-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[s]-string[python]]",
1484-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[us]-string[pyarrow]]",
1485-
"tests/extension/test_arrow.py::TestArrowArray::test_astype_string[duration[us]-string[python]]",
14861476
"tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[decimal128(7, 3)-mean-False]",
14871477
"tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[decimal128(7, 3)-median-True]",
14881478
"tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[decimal128(7, 3)-prod-False]",

0 commit comments

Comments
 (0)