2222)
2323from xarray .core import indexing
2424from xarray .core .common import contains_cftime_datetimes , is_np_datetime_like
25+ from xarray .core .duck_array_ops import asarray
2526from xarray .core .formatting import first_n_items , format_timestamp , last_item
27+ from xarray .core .parallelcompat import T_ChunkedArray , get_chunked_array_type
2628from xarray .core .pdcompat import nanosecond_precision_timestamp
27- from xarray .core .pycompat import is_duck_dask_array
29+ from xarray .core .pycompat import is_chunked_array , is_duck_dask_array
2830from xarray .core .utils import emit_user_level_warning
2931from xarray .core .variable import Variable
3032
3436 cftime = None
3537
3638if TYPE_CHECKING :
37- from xarray .core .types import CFCalendar
39+ from xarray .core .types import CFCalendar , T_DuckArray
3840
3941 T_Name = Union [Hashable , None ]
4042
@@ -667,12 +669,48 @@ def _division(deltas, delta, floor):
667669 return num
668670
669671
672+ def _cast_to_dtype_if_safe (num : np .ndarray , dtype : np .dtype ) -> np .ndarray :
673+ with warnings .catch_warnings ():
674+ warnings .filterwarnings ("ignore" , message = "overflow" )
675+ cast_num = np .asarray (num , dtype = dtype )
676+
677+ if np .issubdtype (dtype , np .integer ):
678+ if not (num == cast_num ).all ():
679+ if np .issubdtype (num .dtype , np .floating ):
680+ raise ValueError (
681+ f"Not possible to cast all encoded times from "
682+ f"{ num .dtype !r} to { dtype !r} without losing precision. "
683+ f"Consider modifying the units such that integer values "
684+ f"can be used, or removing the units and dtype encoding, "
685+ f"at which point xarray will make an appropriate choice."
686+ )
687+ else :
688+ raise OverflowError (
689+ f"Not possible to cast encoded times from "
690+ f"{ num .dtype !r} to { dtype !r} without overflow. Consider "
691+ f"removing the dtype encoding, at which point xarray will "
692+ f"make an appropriate choice, or explicitly switching to "
693+ "a larger integer dtype."
694+ )
695+ else :
696+ if np .isinf (cast_num ).any ():
697+ raise OverflowError (
698+ f"Not possible to cast encoded times from { num .dtype !r} to "
699+ f"{ dtype !r} without overflow. Consider removing the dtype "
700+ f"encoding, at which point xarray will make an appropriate "
701+ f"choice, or explicitly switching to a larger floating point "
702+ f"dtype."
703+ )
704+
705+ return cast_num
706+
707+
670708def encode_cf_datetime (
671- dates ,
709+ dates : T_DuckArray , # type: ignore
672710 units : str | None = None ,
673711 calendar : str | None = None ,
674712 dtype : np .dtype | None = None ,
675- ) -> tuple [np . ndarray , str , str ]:
713+ ) -> tuple [T_DuckArray , str , str ]:
676714 """Given an array of datetime objects, returns the tuple `(num, units,
677715 calendar)` suitable for a CF compliant time variable.
678716
@@ -682,7 +720,21 @@ def encode_cf_datetime(
682720 --------
683721 cftime.date2num
684722 """
685- dates = np .asarray (dates )
723+ dates = asarray (dates )
724+ if is_chunked_array (dates ):
725+ return _lazily_encode_cf_datetime (dates , units , calendar , dtype )
726+ else :
727+ return _eagerly_encode_cf_datetime (dates , units , calendar , dtype )
728+
729+
730+ def _eagerly_encode_cf_datetime (
731+ dates : T_DuckArray , # type: ignore
732+ units : str | None = None ,
733+ calendar : str | None = None ,
734+ dtype : np .dtype | None = None ,
735+ allow_units_modification : bool = True ,
736+ ) -> tuple [T_DuckArray , str , str ]:
737+ dates = asarray (dates )
686738
687739 data_units = infer_datetime_units (dates )
688740
@@ -731,7 +783,7 @@ def encode_cf_datetime(
731783 f"Set encoding['dtype'] to integer dtype to serialize to int64. "
732784 f"Set encoding['dtype'] to floating point dtype to silence this warning."
733785 )
734- elif np .issubdtype (dtype , np .integer ):
786+ elif np .issubdtype (dtype , np .integer ) and allow_units_modification :
735787 new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
736788 emit_user_level_warning (
737789 f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
@@ -752,12 +804,80 @@ def encode_cf_datetime(
752804 # we already covered for this in pandas-based flow
753805 num = cast_to_int_if_safe (num )
754806
755- return (num , units , calendar )
807+ if dtype is not None :
808+ num = _cast_to_dtype_if_safe (num , dtype )
809+
810+ return num , units , calendar
811+
812+
813+ def _encode_cf_datetime_within_map_blocks (
814+ dates : T_DuckArray , # type: ignore
815+ units : str ,
816+ calendar : str ,
817+ dtype : np .dtype ,
818+ ) -> T_DuckArray :
819+ num , * _ = _eagerly_encode_cf_datetime (
820+ dates , units , calendar , dtype , allow_units_modification = False
821+ )
822+ return num
823+
824+
825+ def _lazily_encode_cf_datetime (
826+ dates : T_ChunkedArray ,
827+ units : str | None = None ,
828+ calendar : str | None = None ,
829+ dtype : np .dtype | None = None ,
830+ ) -> tuple [T_ChunkedArray , str , str ]:
831+ if calendar is None :
832+ # This will only trigger minor compute if dates is an object dtype array.
833+ calendar = infer_calendar_name (dates )
834+
835+ if units is None and dtype is None :
836+ if dates .dtype == "O" :
837+ units = "microseconds since 1970-01-01"
838+ dtype = np .dtype ("int64" )
839+ else :
840+ units = "nanoseconds since 1970-01-01"
841+ dtype = np .dtype ("int64" )
842+
843+ if units is None or dtype is None :
844+ raise ValueError (
845+ f"When encoding chunked arrays of datetime values, both the units "
846+ f"and dtype must be prescribed or both must be unprescribed. "
847+ f"Prescribing only one or the other is not currently supported. "
848+ f"Got a units encoding of { units } and a dtype encoding of { dtype } ."
849+ )
850+
851+ chunkmanager = get_chunked_array_type (dates )
852+ num = chunkmanager .map_blocks (
853+ _encode_cf_datetime_within_map_blocks ,
854+ dates ,
855+ units ,
856+ calendar ,
857+ dtype ,
858+ dtype = dtype ,
859+ )
860+ return num , units , calendar
756861
757862
758863def encode_cf_timedelta (
759- timedeltas , units : str | None = None , dtype : np .dtype | None = None
760- ) -> tuple [np .ndarray , str ]:
864+ timedeltas : T_DuckArray , # type: ignore
865+ units : str | None = None ,
866+ dtype : np .dtype | None = None ,
867+ ) -> tuple [T_DuckArray , str ]:
868+ timedeltas = asarray (timedeltas )
869+ if is_chunked_array (timedeltas ):
870+ return _lazily_encode_cf_timedelta (timedeltas , units , dtype )
871+ else :
872+ return _eagerly_encode_cf_timedelta (timedeltas , units , dtype )
873+
874+
875+ def _eagerly_encode_cf_timedelta (
876+ timedeltas : T_DuckArray , # type: ignore
877+ units : str | None = None ,
878+ dtype : np .dtype | None = None ,
879+ allow_units_modification : bool = True ,
880+ ) -> tuple [T_DuckArray , str ]:
761881 data_units = infer_timedelta_units (timedeltas )
762882
763883 if units is None :
@@ -784,7 +904,7 @@ def encode_cf_timedelta(
784904 f"Set encoding['dtype'] to integer dtype to serialize to int64. "
785905 f"Set encoding['dtype'] to floating point dtype to silence this warning."
786906 )
787- elif np .issubdtype (dtype , np .integer ):
907+ elif np .issubdtype (dtype , np .integer ) and allow_units_modification :
788908 emit_user_level_warning (
789909 f"Timedeltas can't be serialized faithfully with requested units { units !r} . "
790910 f"Serializing with units { needed_units !r} instead. "
@@ -797,7 +917,49 @@ def encode_cf_timedelta(
797917
798918 num = _division (time_deltas , time_delta , floor_division )
799919 num = num .values .reshape (timedeltas .shape )
800- return (num , units )
920+
921+ if dtype is not None :
922+ num = _cast_to_dtype_if_safe (num , dtype )
923+
924+ return num , units
925+
926+
927+ def _encode_cf_timedelta_within_map_blocks (
928+ timedeltas : T_DuckArray , # type:ignore
929+ units : str ,
930+ dtype : np .dtype ,
931+ ) -> T_DuckArray :
932+ num , _ = _eagerly_encode_cf_timedelta (
933+ timedeltas , units , dtype , allow_units_modification = False
934+ )
935+ return num
936+
937+
938+ def _lazily_encode_cf_timedelta (
939+ timedeltas : T_ChunkedArray , units : str | None = None , dtype : np .dtype | None = None
940+ ) -> tuple [T_ChunkedArray , str ]:
941+ if units is None and dtype is None :
942+ units = "nanoseconds"
943+ dtype = np .dtype ("int64" )
944+
945+ if units is None or dtype is None :
946+ raise ValueError (
947+ f"When encoding chunked arrays of timedelta values, both the "
948+ f"units and dtype must be prescribed or both must be "
949+ f"unprescribed. Prescribing only one or the other is not "
950+ f"currently supported. Got a units encoding of { units } and a "
951+ f"dtype encoding of { dtype } ."
952+ )
953+
954+ chunkmanager = get_chunked_array_type (timedeltas )
955+ num = chunkmanager .map_blocks (
956+ _encode_cf_timedelta_within_map_blocks ,
957+ timedeltas ,
958+ units ,
959+ dtype ,
960+ dtype = dtype ,
961+ )
962+ return num , units
801963
802964
803965class CFDatetimeCoder (VariableCoder ):
0 commit comments