66import re
77import textwrap
88import uuid
9+ from collections .abc import Iterable
910from copy import deepcopy
1011from fnmatch import fnmatchcase
1112from itertools import groupby
1213from pathlib import Path
13- from typing import Any , Iterator , Sequence , Union
14+ from typing import Any , Iterator , Sequence , TypeVar , Union
1415
1516import dask
17+ from dask .delayed import Delayed
1618from iris .cube import Cube
1719
1820from esmvalcore import esgf , local
@@ -80,8 +82,12 @@ def _ismatch(facet_value: FacetValue, pattern: FacetValue) -> bool:
8082 and fnmatchcase (facet_value , pattern ))
8183
8284
83- def _first (elems ):
84- return elems [0 ]
85+ T = TypeVar ('T' )
86+
87+
88+ def _first (elems : Iterable [T ]) -> T :
89+ """Return the first element."""
90+ return next (iter (elems ))
8591
8692
8793class Dataset :
@@ -669,16 +675,16 @@ def files(self) -> Sequence[File]:
669675 def files (self , value ):
670676 self ._files = value
671677
672- def load (self , compute = True ) -> Cube :
678+ def load (self , compute = True ) -> Cube | Delayed :
673679 """Load dataset.
674680
675681 Parameters
676682 ----------
677683 compute:
678- If :obj:`True`, return the cube immediately. If :obj:`False`,
679- return a :class:`~dask.delayed.Delayed` object that can be used
680- to load the cube by calling its
681- :func :`~dask.delayed.Delayed.compute` method. Multiple datasets
684+ If :obj:`True`, return the :class:`~iris.cube.Cube` immediately.
685+ If :obj:`False`, return a :class:`~dask.delayed.Delayed` object
686+ that can be used to load the cube by calling its
687+ :meth :`~dask.delayed.Delayed.compute` method. Multiple datasets
682688 can be loaded in parallel by passing a list of such delayeds
683689 to :func:`dask.compute`.
684690
@@ -731,7 +737,14 @@ def _load(self) -> Cube:
731737 msg = "\n " .join (lines )
732738 raise InputFilesNotFound (msg )
733739
740+ input_files = [
741+ file .local_file (self .session ['download_dir' ]) if isinstance (
742+ file , esgf .ESGFFile ) else file for file in self .files
743+ ]
734744 output_file = _get_output_file (self .facets , self .session .preproc_dir )
745+ debug = self .session ['save_intermediary_cubes' ]
746+
747+ # Load all input files and concatenate them.
735748 fix_dir_prefix = Path (
736749 self .session ._fixed_file_dir ,
737750 self ._get_joined_summary_facets ('_' , join_lists = True ) + '_' ,
@@ -757,36 +770,6 @@ def _load(self) -> Cube:
757770 settings ['concatenate' ] = {
758771 'check_level' : self .session ['check_level' ]
759772 }
760- settings ['cmor_check_metadata' ] = {
761- 'check_level' : self .session ['check_level' ],
762- 'cmor_table' : self .facets ['project' ],
763- 'mip' : self .facets ['mip' ],
764- 'frequency' : self .facets ['frequency' ],
765- 'short_name' : self .facets ['short_name' ],
766- }
767- if 'timerange' in self .facets :
768- settings ['clip_timerange' ] = {
769- 'timerange' : self .facets ['timerange' ],
770- }
771- settings ['fix_data' ] = {
772- 'check_level' : self .session ['check_level' ],
773- 'session' : self .session ,
774- ** self .facets ,
775- }
776- settings ['cmor_check_data' ] = {
777- 'check_level' : self .session ['check_level' ],
778- 'cmor_table' : self .facets ['project' ],
779- 'mip' : self .facets ['mip' ],
780- 'frequency' : self .facets ['frequency' ],
781- 'short_name' : self .facets ['short_name' ],
782- }
783-
784- input_files = [
785- file .local_file (self .session ['download_dir' ]) if isinstance (
786- file , esgf .ESGFFile ) else file for file in self .files
787- ]
788-
789- debug = self .session ['save_intermediary_cubes' ]
790773
791774 result = []
792775 for input_file in input_files :
@@ -798,6 +781,7 @@ def _load(self) -> Cube:
798781 debug = debug ,
799782 ** settings ['fix_file' ],
800783 )
784+ # Multiple cubes may be present in a file.
801785 cubes = dask .delayed (preprocess )(
802786 files ,
803787 'load' ,
@@ -806,6 +790,7 @@ def _load(self) -> Cube:
806790 debug = debug ,
807791 ** settings ['load' ],
808792 )
793+ # Combine the cubes into a single cube per file.
809794 cubes = dask .delayed (preprocess )(
810795 cubes ,
811796 'fix_metadata' ,
@@ -817,6 +802,7 @@ def _load(self) -> Cube:
817802 cube = dask .delayed (_first )(cubes )
818803 result .append (cube )
819804
805+ # Concatenate the cubes from all files.
820806 result = dask .delayed (preprocess )(
821807 result ,
822808 'concatenate' ,
@@ -825,7 +811,34 @@ def _load(self) -> Cube:
825811 debug = debug ,
826812 ** settings ['concatenate' ],
827813 )
828- for step , kwargs in dict (tuple (settings .items ())[4 :]).items ():
814+
815+ # At this point `result` is a list containing a single cube. Apply the
816+ # remaining preprocessor functions to this cube.
817+ settings .clear ()
818+ settings ['cmor_check_metadata' ] = {
819+ 'check_level' : self .session ['check_level' ],
820+ 'cmor_table' : self .facets ['project' ],
821+ 'mip' : self .facets ['mip' ],
822+ 'frequency' : self .facets ['frequency' ],
823+ 'short_name' : self .facets ['short_name' ],
824+ }
825+ if 'timerange' in self .facets :
826+ settings ['clip_timerange' ] = {
827+ 'timerange' : self .facets ['timerange' ],
828+ }
829+ settings ['fix_data' ] = {
830+ 'check_level' : self .session ['check_level' ],
831+ 'session' : self .session ,
832+ ** self .facets ,
833+ }
834+ settings ['cmor_check_data' ] = {
835+ 'check_level' : self .session ['check_level' ],
836+ 'cmor_table' : self .facets ['project' ],
837+ 'mip' : self .facets ['mip' ],
838+ 'frequency' : self .facets ['frequency' ],
839+ 'short_name' : self .facets ['short_name' ],
840+ }
841+ for step , kwargs in settings .items ():
829842 result = dask .delayed (preprocess )(
830843 result ,
831844 step ,
0 commit comments