Skip to content

Map blocks unable to infer datatype of object arrays #58

@miguelcarcamov

Description

@miguelcarcamov

I'm trying to convert the following xarray dataset to xarray cupy:

<xarray.Dataset>
Dimensions:        (row: 119, xyz: 3)
Coordinates:
    ROWID          (row) int64 dask.array<chunksize=(119,), meta=np.ndarray>
Dimensions without coordinates: row, xyz
Data variables:
    POSITION       (row, xyz) float64 dask.array<chunksize=(119, 3), meta=np.ndarray>
    TYPE           (row) object dask.array<chunksize=(119,), meta=np.ndarray>
    NAME           (row) object dask.array<chunksize=(119,), meta=np.ndarray>
    MOUNT          (row) object dask.array<chunksize=(119,), meta=np.ndarray>
    OFFSET         (row, xyz) float64 dask.array<chunksize=(119, 3), meta=np.ndarray>
    FLAG_ROW       (row) bool dask.array<chunksize=(119,), meta=np.ndarray>
    DISH_DIAMETER  (row) float64 dask.array<chunksize=(119,), meta=np.ndarray>
    STATION        (row) object dask.array<chunksize=(119,), meta=np.ndarray>
Attributes:
    __daskms_partition_schema__:  ()

However, I'm getting the following error:

"name": "ValueError",
"message": "`dtype` inference failed in `map_blocks`.

Please specify the dtype explicitly using the `dtype` kwarg.

Original error is below:
------------------------
ValueError('Unsupported dtype object')

Traceback:
---------
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py\", line 456, in apply_infer_dtype
    o = func(*args, **kwargs)
        ^^^^^^^^^^^^^^^^^^^^^
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy/_creation/from_data.py\", line 88, in asarray
    return _core.array(a, dtype, False, order, blocking=blocking)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File \"cupy/_core/core.pyx\", line 2383, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2410, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2549, in cupy._core.core._array_default
",
	"stack": "---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[7], line 1
----> 1 dataset = x.read(filter_flag_column=False, calculate_psf=False)

File ~/Documents/pyralysis/src/pyralysis/io/daskms.py:134, in DaskMS.read(self, read_flagged_data, filter_flag_column, calculate_psf, taql_query, chunks)
    132 # Creating antenna object
    133 antennas = xds_from_table(self.ms_name_dask + \"ANTENNA\", taql_where=taql_query_flag_row)[0]
--> 134 antenna_obj = Antenna(dataset=antennas)
    136 if obs_obj.ntelescope > 1:
    137     # if there is more than one telescope in the dataset, allocate space for
    138     # one observation id per antenna
    139     antenna_obs_id = da.zeros_like(antenna_obj.dataset.ROWID, dtype=np.int32)

File <string>:4, in __init__(self, dataset)

File ~/Documents/pyralysis/src/pyralysis/base/antenna.py:36, in Antenna.__post_init__(self)
     33 self.logger.setLevel(logging.INFO)
     35 print(self.dataset)
---> 36 self.dataset = xarray_as_cupy(self.dataset)
     37 if self.dataset is not None:
     38     self.max_diameter = self.dataset.DISH_DIAMETER.data.max() * u.m

File ~/Documents/pyralysis/src/pyralysis/utils/xarray_cupy_transformer.py:16, in xarray_as_cupy(xarray_object)
     11 def xarray_as_cupy(
     12     xarray_object: Union[xarray.DataArray, xarray.Dataset] = None
     13 ) -> Union[xarray.DataArray, xarray.Dataset]:
     15     if cupy_xarray and dask.config.get(\"array.backend\") == \"cupy\" and xarray_object is not None:
---> 16         return xarray_object.as_cupy()
     17     else:
     18         return xarray_object

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:162, in _.<locals>.as_cupy(*args, **kwargs)
    161 def as_cupy(*args, **kwargs):
--> 162     return ds.cupy.as_cupy(*args, **kwargs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:119, in CupyDatasetAccessor.as_cupy(self)
    118 def as_cupy(self):
--> 119     data_vars = {var: da.as_cupy() for var, da in self.ds.data_vars.items()}
    120     return Dataset(data_vars=data_vars, coords=self.ds.coords, attrs=self.ds.attrs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:148, in _.<locals>.as_cupy(*args, **kwargs)
    147 def as_cupy(*args, **kwargs):
--> 148     return da.cupy.as_cupy(*args, **kwargs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:56, in CupyDataArrayAccessor.as_cupy(self)
     32 \"\"\"
     33 Converts the DataArray's underlying array type to cupy.
     34 
   (...)
     52 
     53 \"\"\"
     54 if isinstance(self.da.data, dask_array_type):
     55     return DataArray(
---> 56         data=self.da.data.map_blocks(cp.asarray),
     57         coords=self.da.coords,
     58         dims=self.da.dims,
     59         name=self.da.name,
     60         attrs=self.da.attrs,
     61     )
     62 return DataArray(
     63     data=cp.asarray(self.da.data),
     64     coords=self.da.coords,
   (...)
     67     attrs=self.da.attrs,
     68 )

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py:2689, in Array.map_blocks(self, func, *args, **kwargs)
   2687 @wraps(map_blocks)
   2688 def map_blocks(self, func, *args, **kwargs):
-> 2689     return map_blocks(func, self, *args, **kwargs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py:813, in map_blocks(func, name, token, dtype, chunks, drop_axis, new_axis, enforce_ndim, meta, *args, **kwargs)
    810     except Exception:
    811         pass
--> 813     dtype = apply_infer_dtype(func, args, original_kwargs, \"map_blocks\")
    815 if drop_axis:
    816     ndim_out = len(out_ind)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py:481, in apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype, nout)
    479     msg = None
    480 if msg is not None:
--> 481     raise ValueError(msg)
    482 return getattr(o, \"dtype\", type(o)) if nout is None else tuple(e.dtype for e in o)

ValueError: `dtype` inference failed in `map_blocks`.

Please specify the dtype explicitly using the `dtype` kwarg.

Original error is below:
------------------------
ValueError('Unsupported dtype object')

Traceback:
---------
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py\", line 456, in apply_infer_dtype
    o = func(*args, **kwargs)
        ^^^^^^^^^^^^^^^^^^^^^
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy/_creation/from_data.py\", line 88, in asarray
    return _core.array(a, dtype, False, order, blocking=blocking)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File \"cupy/_core/core.pyx\", line 2383, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2410, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2549, in cupy._core.core._array_default
"

It seems map_blocks inference fails for object dtype arrays? Could this dtype be forced by passing a dtype parameter to map_blocks?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions