Change the meaning of auto to default preserve behavior

jsignell · jsignell · commit 2aa143f91eb7 · 2026-04-15T16:05:49.000-04:00
diff --git a/properties/test_parallelcompat.py b/properties/test_parallelcompat.py
@@ -20,7 +20,7 @@ def test_preserve_all_chunks(
         target = 1024 * 1024
 
         actual = ChunkManagerEntrypoint.preserve_chunks(
-            chunks=("preserve",) * len(shape),
+            chunks=("auto",) * len(shape),
             shape=shape,
             target=target,
             typesize=typesize,
@@ -47,7 +47,7 @@ def test_preserve_some_chunks(
         target = 2 * 1024 * 1024
 
         actual = ChunkManagerEntrypoint.preserve_chunks(
-            chunks=(first_chunk, *["preserve" for _ in range(len(shape) - 1)]),
+            chunks=(first_chunk, *["auto" for _ in range(len(shape) - 1)]),
             shape=shape,
             target=target,
             typesize=typesize,
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -259,11 +259,11 @@ def _chunk_ds(
             name,
             var,
             var_chunks,
+            chunkmanager,
             overwrite_encoded_chunks=overwrite_encoded_chunks,
             name_prefix=name_prefix,
             token=token,
             inline_array=inline_array,
-            chunked_array_type=chunkmanager,
             from_array_kwargs=from_array_kwargs.copy(),
             just_use_token=True,
         )
@@ -292,9 +292,9 @@ def _dataset_from_backend_dataset(
     create_default_indexes,
     **extra_tokens,
 ):
-    if not isinstance(chunks, int | dict) and chunks not in {None, "auto", "preserve"}:
+    if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}:
         raise ValueError(
-            f"chunks must be an int, dict, 'auto', 'preserve', or None. Instead found {chunks}."
+            f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}."
         )
 
     _protect_dataset_variables_inplace(backend_ds, cache)
@@ -430,14 +430,14 @@ def open_dataset(
         "netcdf4" over "h5netcdf" over "scipy" (customizable via
         ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend
         class (a subclass of ``BackendEntrypoint``) can also be used.
-    chunks : int, dict, 'auto', 'preserve' or None, default: None
+    chunks : int, dict, 'auto', 'dask-auto' or None, default: None
         If provided, used to load the data into dask arrays.
 
-        - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
-          engine preferred chunks.
-        - ``chunks="preserve"`` will use a chunking scheme that never splits encoded
-          chunks. If encoded chunks are small then "preserve" takes multiples of them
+        - ``chunks="auto"`` will use a chunking scheme that never splits encoded
+          chunks. If encoded chunks are small then "auto" takes multiples of them
           over the largest dimension.
+        - ``chunks="dask-auto"`` will use dask ``auto`` chunking taking into account the
+          engine preferred chunks.
         - ``chunks=None`` skips using dask. This uses xarray's internally private
           :ref:`lazy indexing classes <internal design.lazy indexing>`,
           but data is eagerly loaded into memory as numpy arrays when accessed.
@@ -677,14 +677,14 @@ def open_dataarray(
         "netcdf4" over "h5netcdf" over "scipy" (customizable via
         ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend
         class (a subclass of ``BackendEntrypoint``) can also be used.
-    chunks : int, dict, 'auto', 'preserve', or None, default: None
+    chunks : int, dict, 'auto', 'dask-auto', or None, default: None
         If provided, used to load the data into dask arrays.
 
-        - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
-          engine preferred chunks.
-        - ``chunks="preserve"`` will use a chunking scheme that never splits encoded
-          chunks. If encoded chunks are small then "preserve" takes multiples of them
+        - ``chunks="auto"`` will use a chunking scheme that never splits encoded
+          chunks. If encoded chunks are small then "auto" takes multiples of them
           over the largest dimension.
+        - ``chunks='dask-auto'`` will use dask ``auto`` chunking taking into account the
+          engine preferred chunks.
         - ``chunks=None`` skips using dask. This uses xarray's internally private
           :ref:`lazy indexing classes <internal design.lazy indexing>`,
           but data is eagerly loaded into memory as numpy arrays when accessed.
@@ -906,13 +906,13 @@ def open_datatree(
         "h5netcdf" over "netcdf4" (customizable via ``netcdf_engine_order`` in
         ``xarray.set_options()``). A custom backend class (a subclass of
         ``BackendEntrypoint``) can also be used.
-    chunks : int, dict, 'auto', preserve, or None, default: None
+    chunks : int, dict, 'auto', 'dask-auto', or None, default: None
         If provided, used to load the data into dask arrays.
 
-        - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
+        - ``chunks="dask-auto"`` will use dask ``auto`` chunking taking into account the
           engine preferred chunks.
-        - ``chunks="preserve"`` will use a chunking scheme that never splits encoded
-          chunks. If encoded chunks are small then "preserve" takes multiples of them
+        - ``chunks="auto"`` will use a chunking scheme that never splits encoded
+          chunks. If encoded chunks are small then "auto" takes multiples of them
           over the largest dimension.
         - ``chunks=None`` skips using dask. This uses xarray's internally private
           :ref:`lazy indexing classes <internal design.lazy indexing>`,
@@ -1155,14 +1155,14 @@ def open_groups(
         ``xarray.set_options()``). A custom backend class (a subclass of
         ``BackendEntrypoint``) can also be used.
         can also be used.
-    chunks : int, dict, 'auto', 'preserve', or None, default: None
+    chunks : int, dict, 'auto', 'dask-auto', or None, default: None
         If provided, used to load the data into dask arrays.
 
-        - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
-          engine preferred chunks.
-        - ``chunks="preserve"`` will use a chunking scheme that never splits encoded
-          chunks. If encoded chunks are small then "preserve" takes multiples of them
+        - ``chunks="auto"`` will use a chunking scheme that never splits encoded
+          chunks. If encoded chunks are small then "auto" takes multiples of them
           over the largest dimension.
+        - ``chunks="dask-auto"`` will use dask ``auto`` chunking taking into account the
+          engine preferred chunks.
         - ``chunks=None`` skips using dask. This uses xarray's internally private
           :ref:`lazy indexing classes <internal design.lazy indexing>`,
           but data is eagerly loaded into memory as numpy arrays when accessed.
@@ -1430,7 +1430,7 @@ def open_mfdataset(
         concatenation along more than one dimension is desired, then ``paths`` must be a
         nested list-of-lists (see ``combine_nested`` for details). (A string glob will
         be expanded to a 1-dimensional list.)
-    chunks : int, dict, 'auto', 'preserve', or None, optional
+    chunks : int, dict, 'auto', 'dask-auto', or None, optional
         Dictionary with keys given by dimension names and values given by chunk sizes.
         In general, these should divide the dimensions of each dataset. If int, chunk
         each dimension by ``chunks``. By default, chunks will be chosen to match the
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -1499,15 +1499,15 @@ def open_zarr(
         Array synchronizer provided to zarr
     group : str, optional
         Group path. (a.k.a. `path` in zarr terminology.)
-    chunks : int, dict, "auto", "preserve", or None, optional
+    chunks : int, dict, "auto", "dask-auto", or None, optional
         Used to load the data into dask arrays. Default behavior is to use
         ``chunks={}`` if dask is available, otherwise ``chunks=None``.
 
-        - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
-          engine preferred chunks.
-        - ``chunks="preserve"`` will use a chunking scheme that never splits encoded
-          chunks. If encoded chunks are small then "preserve" takes multiples of them
+        - ``chunks="auto"`` will use a chunking scheme that never splits encoded
+          chunks. If encoded chunks are small then "auto" takes multiples of them
           over the largest dimension.
+        - ``chunks='dask-auto'`` will use dask ``auto`` chunking taking into account the
+          engine preferred chunks.
         - ``chunks=None`` skips using dask. This uses xarray's internally private
           :ref:`lazy indexing classes <internal design.lazy indexing>`,
           but data is eagerly loaded into memory as numpy arrays when accessed.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -2639,11 +2639,11 @@ def _resolve_resampler(name: Hashable, resampler: Resampler) -> tuple[int, ...]:
                 k,
                 v,
                 chunks_mapping_ints,
+                chunkmanager,
                 token,
                 lock,
                 name_prefix,
                 inline_array=inline_array,
-                chunked_array_type=chunkmanager,
                 from_array_kwargs=from_array_kwargs.copy(),
             )
             for k, v in self.variables.items()
diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py
@@ -74,7 +74,7 @@ def dtype(self) -> _DType_co: ...
 _NormalizedChunks = tuple[tuple[int, ...], ...]
 # FYI in some cases we don't allow `None`, which this doesn't take account of.
 # # FYI the `str` is for a size string, e.g. "16MB", supported by dask.
-T_ChunkDim: TypeAlias = str | int | Literal["auto", "preserve"] | tuple[int, ...] | None  # noqa: PYI051
+T_ChunkDim: TypeAlias = str | int | Literal["auto"] | tuple[int, ...] | None  # noqa: PYI051
 # We allow the tuple form of this (though arguably we could transition to named dims only)
 T_Chunks: TypeAlias = T_ChunkDim | Mapping[Any, T_ChunkDim]
 
diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py
@@ -850,7 +850,7 @@ def preserve_chunks(
             of tuple of ints.
         """
         new_chunks = [*previous_chunks]
-        auto_dims = [c == "preserve" for c in chunks]
+        auto_dims = [c == "auto" for c in chunks]
         max_chunks = np.array(shape)
         for i, previous_chunk in enumerate(previous_chunks):
             chunk = chunks[i]
@@ -869,8 +869,8 @@ def preserve_chunks(
                         max_chunks[i] = max(previous_chunk)
 
                 if isinstance(previous_chunk, int):
-                    # preserve, None or () means we want to track previous chunk
-                    if chunk == "preserve" or not chunk:
+                    # auto, None or () means we want to track previous chunk
+                    if chunk == "auto" or not chunk:
                         max_chunks[i] = previous_chunk
                     # otherwise use the explicitly provided chunk
                     else:
diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py
@@ -236,12 +236,7 @@ def _get_chunk(  # type: ignore[no-untyped-def]
         limit = None
         dtype = data.dtype
 
-    if any(c == "preserve" for c in chunk_shape) and any(
-        c == "auto" for c in chunk_shape
-    ):
-        raise ValueError('chunks cannot use a combination of "auto" and "preserve"')
-
-    if shape and preferred_chunk_shape and any(c == "preserve" for c in chunk_shape):
+    if shape and preferred_chunk_shape and any(c == "auto" for c in chunk_shape):
         chunk_shape = chunkmanager.preserve_chunks(
             chunk_shape,
             shape=shape,
diff --git a/xarray/structure/chunks.py b/xarray/structure/chunks.py
@@ -14,7 +14,6 @@
 from xarray.namedarray.parallelcompat import (
     ChunkManagerEntrypoint,
     get_chunked_array_type,
-    guess_chunkmanager,
 )
 
 if TYPE_CHECKING:
@@ -65,12 +64,12 @@ def _maybe_chunk(
     name: Hashable,
     var: Variable,
     chunks: Mapping[Any, T_ChunkDim] | None,
+    chunkmanager: ChunkManagerEntrypoint,
     token=None,
     lock=None,
     name_prefix: str = "xarray-",
     overwrite_encoded_chunks: bool = False,
     inline_array: bool = False,
-    chunked_array_type: str | ChunkManagerEntrypoint | None = None,
     from_array_kwargs=None,
     just_use_token=False,
 ) -> Variable:
@@ -80,10 +79,24 @@ def _maybe_chunk(
         chunks = {dim: chunks[dim] for dim in var.dims if dim in chunks}
 
     if var.ndim:
-        chunked_array_type = guess_chunkmanager(
-            chunked_array_type
-        )  # coerce string to ChunkManagerEntrypoint type
-        if isinstance(chunked_array_type, DaskManager):
+        if (
+            var.shape
+            and var.chunks
+            and chunks
+            and any(c == "auto" for c in chunks.values())
+        ):
+            chunk_shape = chunkmanager.preserve_chunks(
+                tuple(chunks.get(dim, ()) for dim in var.dims),
+                shape=var.shape,
+                target=chunkmanager.get_auto_chunk_size(),
+                typesize=getattr(var.dtype, "itemsize", 8),
+                previous_chunks=var.chunks,
+            )
+            chunks = {
+                dim: chunk_shape[i] for i, dim in enumerate(var.dims) if dim in chunks
+            }
+
+        if isinstance(chunkmanager, DaskManager):
             if not just_use_token:
                 from dask.base import tokenize
 
@@ -104,7 +117,7 @@ def _maybe_chunk(
 
         var = var.chunk(
             chunks,
-            chunked_array_type=chunked_array_type,
+            chunked_array_type=chunkmanager,
             from_array_kwargs=from_array_kwargs,
         )
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -7405,7 +7405,6 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path: Path) -> None:
 @pytest.mark.parametrize(
     "chunks", ["auto", -1, {}, {"x": "auto"}, {"x": -1}, {"x": "auto", "y": -1}]
 )
-@pytest.mark.filterwarnings("ignore:The specified chunks separate")
 def test_chunking_consistency(chunks, tmp_path: Path) -> None:
     encoded_chunks: dict[str, Any] = {}
     dask_arr = da.from_array(
@@ -7439,12 +7438,12 @@ def test_chunking_consistency(chunks, tmp_path: Path) -> None:
 @pytest.mark.parametrize(
     "chunks,expected",
     [
-        ("preserve", (160, 500)),
+        ("auto", (160, 500)),
         (-1, (500, 500)),
         ({}, (10, 10)),
-        ({"x": "preserve"}, (500, 10)),
+        ({"x": "auto"}, (500, 10)),
         ({"x": -1}, (500, 10)),
-        ({"x": "preserve", "y": -1}, (160, 500)),
+        ({"x": "auto", "y": -1}, (160, 500)),
     ],
 )
 def test_open_dataset_chunking_zarr_with_preserve(