diff --git a/changes/3249.doc.rst b/changes/3249.doc.rst new file mode 100644 index 0000000000..d46f775d9c --- /dev/null +++ b/changes/3249.doc.rst @@ -0,0 +1,2 @@ +Expand the data type docs to include a demonstration of the ``parse_data_type`` function. +Expand the docstring for the ``parse_data_type`` function. \ No newline at end of file diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst index dc29874b3b..d4b49ca43f 100644 --- a/docs/user-guide/data_types.rst +++ b/docs/user-guide/data_types.rst @@ -409,4 +409,47 @@ We want to avoid a situation where the same native data type matches multiple Za a NumPy data type should *uniquely* specify a single Zarr data type. But data type resolution is dynamic, so it's not possible to statically guarantee this uniqueness constraint. Therefore, we attempt data type resolution against *every* data type class, and if, for some reason, a native data -type matches multiple Zarr data types, we treat this as an error and raise an exception. \ No newline at end of file +type matches multiple Zarr data types, we treat this as an error and raise an exception. + +If you have a NumPy data type and you want to get the corresponding ``ZDType`` instance, you can use +the ``parse_data_type`` function, which will use the dynamic resolution described above. ``parse_data_type`` +handles a range of input types: + +- NumPy data types: + + .. code-block:: python + + >>> import numpy as np + >>> from zarr.dtype import parse_data_type + >>> my_dtype = np.dtype('>M8[10s]') + >>> parse_data_type(my_dtype, zarr_format=2) + DateTime64(endianness='big', scale_factor=10, unit='s') + + +- NumPy data type-compatible strings: + + .. code-block:: python + + >>> dtype_str = '>M8[10s]' + >>> parse_data_type(dtype_str, zarr_format=2) + DateTime64(endianness='big', scale_factor=10, unit='s') + +- ``ZDType`` instances: + + .. code-block:: python + + >>> from zarr.dtype import DateTime64 + >>> zdt = DateTime64(endianness='big', scale_factor=10, unit='s') + >>> parse_data_type(zdt, zarr_format=2) # Use a ZDType (this is a no-op) + DateTime64(endianness='big', scale_factor=10, unit='s') + +- Python dictionaries (requires ``zarr_format=3``). These dictionaries must be consistent with the + ``JSON`` form of the data type: + + .. code-block:: python + + >>> dt_dict = {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}} + >>> parse_data_type(dt_dict, zarr_format=3) + DateTime64(endianness='little', scale_factor=10, unit='s') + >>> parse_data_type(dt_dict, zarr_format=3).to_json(zarr_format=3) + {'name': 'numpy.datetime64', 'configuration': {'unit': 's', 'scale_factor': 10}} diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 1d36689ec8..aadf127c9b 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -189,6 +189,32 @@ def parse_data_type( ) -> ZDType[TBaseDType, TBaseScalar]: """ Interpret the input as a ZDType instance. + + Parameters + ---------- + dtype_spec : ZDTypeLike + The input to be interpreted as a ZDType instance. This could be a native data type + (e.g., a NumPy data type), a Python object that can be converted into a native data type, + a ZDType instance (in which case the input is returned unchanged), or a JSON object + representation of a data type. + zarr_format : ZarrFormat + The zarr format version. + + Returns + ------- + ZDType[TBaseDType, TBaseScalar] + The ZDType instance corresponding to the input. + + Examples + -------- + >>> from zarr.dtype import parse_data_type + >>> import numpy as np + >>> parse_data_type("int32", zarr_format=2) + Int32(endianness='little') + >>> parse_data_type(np.dtype('S10'), zarr_format=2) + NullTerminatedBytes(length=10) + >>> parse_data_type({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) + DateTime64(endianness='little', scale_factor=10, unit='s') """ if isinstance(dtype_spec, ZDType): return dtype_spec