From d021cb67bbcca41a49512d96916d537d97791ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 12 Aug 2025 10:59:09 +0200 Subject: [PATCH 1/4] clarify lazy behaviour and eager loading chunks=None in open_*-functions --- xarray/backends/api.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 28639c242d5..0791a3ad446 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -578,8 +578,10 @@ def open_dataset( - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - - ``chunks=None`` skips using dask, which is generally faster for - small arrays. + - ``chunks=None`` skips using dask. This uses xarray's internally private + :ref:`lazy indexing classes `, + but data is eagerly loaded into memory as numpy arrays when accessed. + This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using the engine's preferred chunk size, generally identical to the format's chunk size. If not available, a @@ -819,8 +821,10 @@ def open_dataarray( - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - - ``chunks=None`` skips using dask, which is generally faster for - small arrays. + - ``chunks=None`` skips using dask. This uses xarray's internally private + :ref:`lazy indexing classes `, + but data is eagerly loaded into memory as numpy arrays when accessed. + This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using engine preferred chunks if exposed by the backend, otherwise with a single chunk for all arrays. @@ -1044,8 +1048,10 @@ def open_datatree( - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - - ``chunks=None`` skips using dask, which is generally faster for - small arrays. + - ``chunks=None`` skips using dask. This uses xarray's internally private + :ref:`lazy indexing classes `, + but data is eagerly loaded into memory as numpy arrays when accessed. + This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using the engine's preferred chunk size, generally identical to the format's chunk size. If not available, a @@ -1288,8 +1294,10 @@ def open_groups( - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - - ``chunks=None`` skips using dask, which is generally faster for - small arrays. + - ``chunks=None`` skips using dask. This uses xarray's internally private + :ref:`lazy indexing classes `, + but data is eagerly loaded into memory as numpy arrays when accessed. + This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using the engine's preferred chunk size, generally identical to the format's chunk size. If not available, a From 6192b29b3f537f2391658b9ba24d9d322d1738d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 12 Aug 2025 11:09:23 +0200 Subject: [PATCH 2/4] clarify lazy behaviour and eager loading chunks=None in open_*-functions --- xarray/backends/zarr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 1b62a87d10c..425f72c591c 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1370,8 +1370,10 @@ def open_zarr( - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the engine preferred chunks. - - ``chunks=None`` skips using dask, which is generally faster for - small arrays. + - ``chunks=None`` skips using dask. This uses xarray's internally private + :ref:`lazy indexing classes `, + but data is eagerly loaded into memory as numpy arrays when accessed. + This can be more efficient for smaller arrays, though results may vary. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using engine preferred chunks if exposed by the backend, otherwise with a single chunk for all arrays. From dbb2e71492874084ec2fe7e421940ed277445bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 12 Aug 2025 11:09:35 +0200 Subject: [PATCH 3/4] add whats-new.rst entry --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 81d370766ab..4ef6e7d86e2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,10 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Clarify lazy behaviour and eager loading for ``chunks=None`` in :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_dataarray`, :py:func:`~xarray.open_datatree`, :py:func:`~xarray.open_groups` and :py:func:`~xarray.open_zarr` (:issue:`10612`, :pull:`10627`). + By `Kai Mühlbauer `_. + + Internal Changes ~~~~~~~~~~~~~~~~ From 206e0876fe880524ea864400ca3b06aceaa2b4de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 13 Aug 2025 16:00:49 +0200 Subject: [PATCH 4/4] Update xarray/backends/api.py Co-authored-by: Deepak Cherian --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0791a3ad446..af1f5730340 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -581,7 +581,7 @@ def open_dataset( - ``chunks=None`` skips using dask. This uses xarray's internally private :ref:`lazy indexing classes `, but data is eagerly loaded into memory as numpy arrays when accessed. - This can be more efficient for smaller arrays, though results may vary. + This can be more efficient for smaller arrays or when large arrays are sliced before computation. - ``chunks=-1`` loads the data with dask using a single chunk for all arrays. - ``chunks={}`` loads the data with dask using the engine's preferred chunk size, generally identical to the format's chunk size. If not available, a