Replies: 2 comments 1 reply
-
|
This is a bug. Do you have a fully reproducible example? from xarray.tests import raise_if_dask_computes
with raise_if_dask_computes():
# code hereYou can use ^ to raise an error at the exact point of compute to verify your hypothesis |
Beta Was this translation helpful? Give feedback.
-
|
Thanks for pointing me to The DataArrays have additional non-dimension coordinates that are also dask arrays. Here is a minimal example:
The interpolation doesn't trigger computation:
However, the multiplication does:
Expand to show traceback---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[3], line 2
1 with raise_if_dask_computes():
----> 2 d = a * b
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/core/_typed_ops.py:552, in DataArrayOpsMixin.__mul__(self, other)
551 def __mul__(self, other: DaCompatible) -> Self | Dataset | DataTree:
--> 552 return self._binary_op(other, operator.mul)
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/core/dataarray.py:4922, in DataArray._binary_op(self, other, f, reflexive)
4915 other_coords = getattr(other, "coords", None)
4917 variable = (
4918 f(self.variable, other_variable_or_arraylike)
4919 if not reflexive
4920 else f(other_variable_or_arraylike, self.variable)
4921 )
-> 4922 coords, indexes = self.coords._merge_raw(
4923 other_coords, reflexive, compat=OPTIONS["arithmetic_compat"]
4924 )
4925 name = result_name([self, other])
4927 return self._replace(variable, coords, name, indexes=indexes)
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/core/coordinates.py:518, in Coordinates._merge_raw(self, other, reflexive, compat)
516 else:
517 coord_list = [self, other] if not reflexive else [other, self]
--> 518 variables, indexes = merge_coordinates_without_align(
519 coord_list, compat=compat
520 )
521 return variables, indexes
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/structure/merge.py:465, in merge_coordinates_without_align(objects, prioritized, exclude_dims, combine_attrs, compat)
461 filtered = collected
463 # TODO: indexes should probably be filtered in collected elements
464 # before merging them
--> 465 merged_coords, merged_indexes = merge_collected(
466 filtered, prioritized, compat=compat, combine_attrs=combine_attrs
467 )
468 merged_indexes = filter_indexes_from_coords(merged_indexes, set(merged_coords))
470 return merged_coords, merged_indexes
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/structure/merge.py:321, in merge_collected(grouped, prioritized, compat, combine_attrs, equals)
319 variables = [variable for variable, _ in elements_list]
320 try:
--> 321 equals_this_var, merged_vars[name] = unique_variable(
322 name, variables, compat, equals.get(name)
323 )
324 # This is very likely to result in false positives, but there is no way
325 # to tell if the output will change without computing.
326 if (
327 isinstance(compat, CombineKwargDefault)
328 and compat == "no_conflicts"
329 and len(variables) > 1
330 and not equals_this_var
331 ):
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/structure/merge.py:167, in unique_variable(name, variables, compat, equals)
164 lazy_equals = equals
165 if equals is None:
166 # now compare values with minimum number of computes
--> 167 out = out.compute()
168 for var in variables[1:]:
169 equals = getattr(out, compat_str)(var)
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/core/variable.py:1088, in Variable.compute(self, **kwargs)
1059 """Trigger loading data into memory and return a new variable.
1060
1061 Data will be computed and/or loaded from disk or a remote source.
(...) 1085 Dataset.compute
1086 """
1087 new = self.copy(deep=False)
-> 1088 return new.load(**kwargs)
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/core/variable.py:1022, in Variable.load(self, **kwargs)
993 def load(self, **kwargs) -> Self:
994 """Trigger loading data into memory and return this variable.
995
996 Data will be computed and/or loaded from disk or a remote source.
(...) 1020 Dataset.load
1021 """
-> 1022 self._data = to_duck_array(self._data, **kwargs)
1023 return self
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/namedarray/pycompat.py:139, in to_duck_array(data, **kwargs)
137 if is_chunked_array(data):
138 chunkmanager = get_chunked_array_type(data)
--> 139 loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated]
140 return loaded_data
142 if isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter):
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/namedarray/daskmanager.py:85, in DaskManager.compute(self, *data, **kwargs)
80 def compute(
81 self, *data: Any, **kwargs: Any
82 ) -> tuple[np.ndarray[Any, _DType_co], ...]:
83 from dask.array import compute
---> 85 return compute(*data, **kwargs)
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/dask/base.py:685, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
682 expr = expr.optimize()
683 keys = list(flatten(expr.__dask_keys__()))
--> 685 results = schedule(expr, keys, **kwargs)
687 return repack(results)
File ~/micromamba/envs/eopf3-dev/lib/python3.13/site-packages/xarray/tests/__init__.py:264, in CountingScheduler.__call__(self, dsk, keys, **kwargs)
262 self.total_computes += 1
263 if self.total_computes > self.max_computes:
--> 264 raise RuntimeError(
265 f"Too many computes. Total: {self.total_computes} > max: {self.max_computes}."
266 )
267 return dask.get(dsk, keys, **kwargs)
RuntimeError: Too many computes. Total: 1 > max: 0.If I either drop the Note that the multiplication also doesn't preserve the
So since the additional |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
I'm trying to wrap my head around the following situation:
Using a library based on xarray and dask (EOPF-CPM), I am reading in some DataArrays wrapping dask arrays. I then do an interpolation like
which is performed fully lazy. I have
aandbof the same dimensions, coordinates and chunk structure.However, when I now run
d = a * b, this is not lazy, but blocks until the computation is done.I tried multiplying the underlying dask arrays directly using
a.data * b.data, which dask performs lazily, as expected.So what are the conditions that need to be met for xarray to keep operations on dask backed DataArrays lazy?
Beta Was this translation helpful? Give feedback.
All reactions