Skip to content

Commit 82d795e

Browse files
jlenhschlunma
andauthored
Add preprocessor to extract surface values from 3D atmospheric variables (#2641)
Co-authored-by: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Co-authored-by: Manuel Schlund <manuel.schlund@dlr.de>
1 parent d1453c6 commit 82d795e

12 files changed

Lines changed: 327 additions & 100 deletions

File tree

.zenodo.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,11 @@
219219
"affiliation": "ECCC, Canada",
220220
"name": "Garcia Perdomo, Karen",
221221
"orcid": "0009-0004-2333-3358"
222+
},
223+
{
224+
"affiliation": "SMHI, Sweden",
225+
"name": "Lenhardt, Julien",
226+
"orcid": "https://orcid.org/0000-0002-9949-3989"
222227
}
223228
],
224229
"description": "ESMValCore: A community tool for pre-processing data from Earth system models in CMIP and running analysis scripts.",

CITATION.cff

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,11 @@ authors:
223223
family-names: Garcia Perdomo
224224
given-names: Karen
225225
orcid: "https://orcid.org/0009-0004-2333-3358"
226+
-
227+
affiliation: "SMHI, Sweden"
228+
family-names: Lenhardt
229+
given-names: Julien
230+
orcid: "https://orcid.org/0000-0002-9949-3989"
226231

227232
cff-version: 1.2.0
228233
date-released: 2025-02-27

doc/recipe/preprocessor.rst

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ Preprocessor Variable s
311311
:ref:`weighting_landsea_fraction<land/sea fraction weighting>` [#f3]_ ``sftlf``, ``sftof`` land_area_fraction, sea_area_fraction
312312
:ref:`distance_metric<distance_metric>` [#f5]_ ``areacella``, ``areacello`` cell_area
313313
:ref:`histogram<histogram>` [#f5]_ ``areacella``, ``areacello`` cell_area
314+
:ref:`extract_surface_from_atm<extract_surface_from_atm>` [#f3]_ ``ps`` surface_air_pressure
314315
===================================================================== ============================== =====================================
315316

316317
.. [#f3] This preprocessor requires at least one of the mentioned supplementary
@@ -2118,7 +2119,6 @@ See also :func:`esmvalcore.preprocessor.extract_location`.
21182119

21192120
``zonal_statistics``
21202121
--------------------
2121-
21222122
The function calculates the zonal statistics by applying an operator
21232123
along the longitude coordinate.
21242124

@@ -2223,6 +2223,7 @@ The ``_volume.py`` module contains the following preprocessor functions:
22232223
* ``extract_transect``: Extract data along a line of constant latitude or
22242224
longitude.
22252225
* ``extract_trajectory``: Extract data along a specified trajectory.
2226+
* ``extract_surface_from_atm``: Extract atmospheric data at the surface.
22262227

22272228

22282229
``extract_volume``
@@ -2406,6 +2407,22 @@ Note that this function uses the expensive ``interpolate`` method from
24062407
See also :func:`esmvalcore.preprocessor.extract_trajectory`.
24072408

24082409

2410+
.. _extract_surface_from_atm:
2411+
2412+
``extract_surface_from_atm``
2413+
----------------------------
2414+
2415+
This function extracts data at the surface for an atmospheric variable.
2416+
2417+
The function returns the interpolated value of an input filed at the corresponding surface pressure
2418+
given by the surface air pressure ``ps``.
2419+
2420+
The required supplementary surface air pressure ``ps`` is attached to
2421+
the main dataset as described in :ref:`supplementary_variables`.
2422+
2423+
See also :func:`esmvalcore.preprocessor.extract_surface_from_atm`.
2424+
2425+
24092426
.. _cycles:
24102427

24112428
Cycles

esmvalcore/preprocessor/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
from ._volume import (
8787
axis_statistics,
8888
depth_integration,
89+
extract_surface_from_atm,
8990
extract_trajectory,
9091
extract_transect,
9192
extract_volume,
@@ -122,6 +123,8 @@
122123
"resample_time",
123124
# Level extraction
124125
"extract_levels",
126+
# Extract surface
127+
"extract_surface_from_atm",
125128
# Weighting
126129
"weighting_landsea_fraction",
127130
# Mask landsea (fx or Natural Earth)
Lines changed: 7 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,11 @@
11
"""Derivation of variable ``co2s``."""
22

3-
import dask.array as da
43
import iris
5-
import numpy as np
6-
import stratify
74

8-
from ._baseclass import DerivedVariableBase
9-
10-
11-
def _get_first_unmasked_data(array, axis):
12-
"""Get first unmasked value of an array along an axis."""
13-
mask = da.ma.getmaskarray(array)
14-
numerical_mask = da.where(mask, -1.0, 1.0)
15-
indices_first_positive = da.argmax(numerical_mask, axis=axis)
16-
indices = da.meshgrid(
17-
*[da.arange(array.shape[i]) for i in range(array.ndim) if i != axis],
18-
indexing="ij",
19-
)
5+
from esmvalcore.preprocessor._supplementary_vars import add_ancillary_variable
6+
from esmvalcore.preprocessor._volume import extract_surface_from_atm
207

21-
indices = list(indices)
22-
indices.insert(axis, indices_first_positive)
23-
first_unmasked_data = np.array(array)[tuple(indices)]
24-
return first_unmasked_data
8+
from ._baseclass import DerivedVariableBase
259

2610

2711
class DerivedVariable(DerivedVariableBase):
@@ -55,56 +39,9 @@ def calculate(cubes):
5539
ps_cube = cubes.extract_cube(
5640
iris.Constraint(name="surface_air_pressure")
5741
)
58-
59-
# Fill masked data if necessary (interpolation fails with masked data)
60-
(z_axis,) = co2_cube.coord_dims(
61-
co2_cube.coord(axis="Z", dim_coords=True)
62-
)
63-
mask = da.ma.getmaskarray(co2_cube.core_data())
64-
if mask.any():
65-
first_unmasked_data = _get_first_unmasked_data(
66-
co2_cube.core_data(), axis=z_axis
67-
)
68-
dim_map = [dim for dim in range(co2_cube.ndim) if dim != z_axis]
69-
first_unmasked_data = iris.util.broadcast_to_shape(
70-
first_unmasked_data, co2_cube.shape, dim_map
71-
)
72-
co2_cube.data = da.where(
73-
mask, first_unmasked_data, co2_cube.core_data()
74-
)
75-
76-
# Interpolation (not supported for dask arrays)
77-
air_pressure_coord = co2_cube.coord("air_pressure")
78-
original_levels = iris.util.broadcast_to_shape(
79-
air_pressure_coord.points,
80-
co2_cube.shape,
81-
co2_cube.coord_dims(air_pressure_coord),
82-
)
83-
target_levels = np.expand_dims(ps_cube.data, axis=z_axis)
84-
co2s_data = stratify.interpolate(
85-
target_levels,
86-
original_levels,
87-
co2_cube.data,
88-
axis=z_axis,
89-
interpolation="linear",
90-
extrapolation="linear",
91-
)
92-
co2s_data = np.squeeze(co2s_data, axis=z_axis)
93-
94-
# Construct co2s cube
95-
indices = [slice(None)] * co2_cube.ndim
96-
indices[z_axis] = 0
97-
co2s_cube = co2_cube[tuple(indices)]
98-
co2s_cube.data = co2s_data
99-
if co2s_cube.coords("air_pressure"):
100-
co2s_cube.remove_coord("air_pressure")
101-
ps_coord = iris.coords.AuxCoord(
102-
ps_cube.data,
103-
var_name="plev",
104-
standard_name="air_pressure",
105-
long_name="pressure",
106-
units=ps_cube.units,
107-
)
108-
co2s_cube.add_aux_coord(ps_coord, np.arange(co2s_cube.ndim))
42+
# Add ps as AncillaryVariable in the CO2 cube
43+
add_ancillary_variable(cube=co2_cube, ancillary_cube=ps_cube)
44+
# Extract surface from 3D atmospheric cube
45+
co2s_cube = extract_surface_from_atm(co2_cube)
10946
co2s_cube.convert_units("1e-6")
11047
return co2s_cube

esmvalcore/preprocessor/_regrid.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import dask.array as da
2020
import iris
21+
import iris.coords
2122
import numpy as np
2223
import stratify
2324
from geopy.geocoders import Nominatim
@@ -1020,7 +1021,7 @@ def _create_cube(src_cube, data, src_levels, levels):
10201021
z_coord = src_cube.coord(axis="z", dim_coords=True)
10211022
(z_dim,) = src_cube.coord_dims(z_coord)
10221023

1023-
if data.shape[z_dim] != levels.size:
1024+
if (len(levels.shape) == 1) and (data.shape[z_dim] != levels.size):
10241025
emsg = (
10251026
"Mismatch between data and levels for data dimension {!r}, "
10261027
"got data shape {!r} with levels shape {!r}."
@@ -1068,7 +1069,10 @@ def _create_cube(src_cube, data, src_levels, levels):
10681069
result.add_dim_coord(coord, z_dim)
10691070
except ValueError:
10701071
coord = iris.coords.AuxCoord(levels, **kwargs)
1071-
result.add_aux_coord(coord, z_dim)
1072+
result.add_aux_coord(
1073+
coord,
1074+
z_dim if len(levels.shape) == 1 else np.arange(len(coord.shape)),
1075+
)
10721076

10731077
# Collapse the z-dimension for the scalar case.
10741078
if levels.size == 1:
@@ -1151,7 +1155,24 @@ def _preserve_fx_vars(cube, result):
11511155
result.var_name,
11521156
)
11531157
else:
1154-
add_ancillary_variable(result, ancillary_var)
1158+
# Create cube and add coordinates to ancillary variable
1159+
ancillary_coords: list[
1160+
tuple[iris.coords.AncillaryVariable, int]
1161+
] = []
1162+
for i, coord in enumerate(cube.coords()):
1163+
if i in ancillary_dims:
1164+
coord_idx = len(ancillary_coords)
1165+
ancillary_coords.append((coord.copy(), coord_idx))
1166+
ancillary_cube = iris.cube.Cube(
1167+
ancillary_var.core_data(),
1168+
standard_name=ancillary_var.standard_name,
1169+
long_name=ancillary_var.long_name,
1170+
units=ancillary_var.units,
1171+
var_name=ancillary_var.var_name,
1172+
attributes=ancillary_var.attributes,
1173+
dim_coords_and_dims=ancillary_coords,
1174+
)
1175+
add_ancillary_variable(result, ancillary_cube)
11551176

11561177

11571178
def parse_vertical_scheme(scheme):
@@ -1288,8 +1309,10 @@ def extract_levels(
12881309
result = cube
12891310
# Set the levels to the requested values
12901311
src_levels.points = levels
1291-
elif len(src_levels.shape) == 1 and set(levels).issubset(
1292-
set(src_levels.points)
1312+
elif (
1313+
len(src_levels.shape) == 1
1314+
and len(levels.shape) == 1
1315+
and set(levels.flatten()).issubset(set(src_levels.points))
12931316
):
12941317
# If all target levels exist in the source cube, simply extract them.
12951318
name = src_levels.name()

esmvalcore/preprocessor/_supplementary_vars.py

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def wrapper(func):
4545

4646
def add_cell_measure(
4747
cube: Cube,
48-
cell_measure_cube: Cube,
48+
cell_measure_cube: Cube | iris.coords.CellMeasure,
4949
measure: Literal["area", "volume"],
5050
) -> None:
5151
"""Add cell measure to cube (in-place).
@@ -60,7 +60,7 @@ def add_cell_measure(
6060
cube:
6161
Iris cube with input data.
6262
cell_measure_cube:
63-
Iris cube with cell measure data.
63+
Iris cube or cell measure coord with cell measure data.
6464
measure:
6565
Name of the measure, can be 'area' or 'volume'.
6666
@@ -101,39 +101,73 @@ def add_cell_measure(
101101
)
102102

103103

104-
def add_ancillary_variable(cube: Cube, ancillary_cube: Cube) -> None:
104+
def add_ancillary_variable(
105+
cube: Cube, ancillary_cube: Cube | iris.coords.AncillaryVariable
106+
) -> None:
105107
"""Add ancillary variable to cube (in-place).
106108
107-
Note
108-
----
109-
This assumes that the ancillary variable spans the rightmost dimensions of
110-
the cube.
111-
112109
Parameters
113110
----------
114111
cube:
115112
Iris cube with input data.
116113
ancillary_cube:
117-
Iris cube with ancillary data.
114+
Iris cube or AncillaryVariable with ancillary data.
118115
119116
Returns
120117
-------
121118
iris.cube.Cube
122119
Cube with added ancillary variables
123120
"""
124-
coord_dims = tuple(range(cube.ndim - len(ancillary_cube.shape), cube.ndim))
125-
ancillary_data = ancillary_cube.core_data()
121+
try:
122+
ancillary_var = iris.coords.AncillaryVariable(
123+
ancillary_cube.core_data(),
124+
standard_name=ancillary_cube.standard_name,
125+
units=ancillary_cube.units,
126+
var_name=ancillary_cube.var_name,
127+
attributes=ancillary_cube.attributes,
128+
long_name=ancillary_cube.long_name,
129+
)
130+
except AttributeError as err:
131+
msg = (
132+
f"Failed to add {ancillary_cube} to {cube} as ancillary var."
133+
"ancillary_cube should be either an iris.cube.Cube or an "
134+
"iris.coords.AncillaryVariable object."
135+
)
136+
raise ValueError(msg) from err
137+
# Match the coordinates of the ancillary cube to coordinates and
138+
# dimensions in the input cube before adding the ancillary variable.
139+
data_dims: list[int | None] = []
140+
if isinstance(ancillary_cube, iris.coords.AncillaryVariable):
141+
start_dim = cube.ndim - len(ancillary_var.shape)
142+
data_dims = list(range(start_dim, cube.ndim))
143+
else:
144+
data_dims = [None] * ancillary_cube.ndim
145+
for coord in ancillary_cube.coords():
146+
try:
147+
for ancillary_dim, cube_dim in zip(
148+
ancillary_cube.coord_dims(coord),
149+
cube.coord_dims(coord),
150+
strict=False,
151+
):
152+
data_dims[ancillary_dim] = cube_dim
153+
except iris.exceptions.CoordinateNotFoundError:
154+
logger.debug(
155+
"%s from ancillary cube not found in cube coords.", coord
156+
)
157+
if None in data_dims:
158+
none_dims = ", ".join(
159+
str(i) for i, d in enumerate(data_dims) if d is None
160+
)
161+
msg = (
162+
f"Failed to add {ancillary_cube} to {cube} as ancillary var."
163+
f"No coordinate associated with ancillary cube dimensions"
164+
f"{none_dims}"
165+
)
166+
raise ValueError(msg)
126167
if ancillary_cube.has_lazy_data():
127-
cube_chunks = tuple(cube.lazy_data().chunks[d] for d in coord_dims)
128-
ancillary_data = ancillary_data.rechunk(cube_chunks)
129-
ancillary_var = iris.coords.AncillaryVariable(
130-
ancillary_data,
131-
standard_name=ancillary_cube.standard_name,
132-
units=ancillary_cube.units,
133-
var_name=ancillary_cube.var_name,
134-
attributes=ancillary_cube.attributes,
135-
)
136-
cube.add_ancillary_variable(ancillary_var, coord_dims)
168+
cube_chunks = tuple(cube.lazy_data().chunks[d] for d in data_dims)
169+
ancillary_var.data = ancillary_cube.lazy_data().rechunk(cube_chunks)
170+
cube.add_ancillary_variable(ancillary_var, data_dims)
137171
logger.debug(
138172
"Added %s as ancillary variable in cube of %s.",
139173
ancillary_cube.var_name,

0 commit comments

Comments
 (0)