Skip to content

Commit 24df29a

Browse files
committed
Improve dataset for L3
1 parent fbc988f commit 24df29a

6 files changed

Lines changed: 46 additions & 11 deletions

File tree

gpm/dataset/attrs.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
"Sensor",
5757
# DPRKuInfo, DPRKaInfo,
5858
"eqvWavelength",
59+
# IMERG
60+
# "title",
5961
)
6062

6163

@@ -183,7 +185,11 @@ def get_granule_attrs(dt):
183185
# Flatten attributes (without group)
184186
if _has_nested_dictionary(nested_attrs):
185187
attrs = {}
186-
_ = [attrs.update(group_attrs) for group, group_attrs in nested_attrs.items()]
188+
for k, v in nested_attrs.items():
189+
if isinstance(v, dict):
190+
attrs.update(v)
191+
else:
192+
attrs[k] = v
187193
else:
188194
attrs = nested_attrs
189195
# Subset only required attributes

gpm/dataset/coords.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,19 @@ def get_grid_coords(dt, scan_mode):
110110
"""
111111
attrs = decode_string(dt.attrs["FileHeader"])
112112
start_time = attrs["StartGranuleDateTime"][:-1] # 2016-03-09T10:30:00.000Z
113-
# end_time = attrs["StopGranuleDateTime"][:-1] # 2003-05-01T23:59:59.999Z
114-
time_interval = attrs["TimeInterval"]
115-
time_delta = get_time_delta_from_time_interval(time_interval)
116-
start_time = np.array([start_time]).astype("M8[ns]")
117-
end_time = start_time + time_delta
113+
end_time = attrs.get("StopGranuleDateTime", "")[:-1] # 2003-05-01T23:59:59.999Z
114+
time_interval = attrs.get("TimeInterval", "")
115+
if time_interval in {"ORBIT", ""}:
116+
start_time = np.array([start_time]).astype("M8[ns]")
117+
end_time = np.array([end_time]).astype("M8[ns]")
118+
elif time_interval == "MONTH":
119+
end_time = pd.to_datetime(start_time) + pd.DateOffset(months=1)
120+
start_time = np.array([start_time]).astype("M8[ns]")
121+
end_time = np.array([end_time]).astype("M8[ns]")
122+
else:
123+
time_delta = get_time_delta_from_time_interval(time_interval)
124+
start_time = np.array([start_time]).astype("M8[ns]")
125+
end_time = start_time + time_delta
118126

119127
# Define time coordinate
120128
time = xr.DataArray(end_time, dims="time")
@@ -128,18 +136,24 @@ def get_grid_coords(dt, scan_mode):
128136
time_bnds = np.concatenate((start_time, end_time)).reshape(1, 2)
129137
time_bnds = xr.DataArray(time_bnds, dims=("time", "nv"))
130138

139+
# Retrieve coordinates
140+
lon = dt[scan_mode].get("lon")
141+
lat = dt[scan_mode].get("lat")
142+
131143
# Define dictionary with coordinates (DataArray)
132-
return {
144+
coords_dict = {
133145
"time": time,
134-
"lon": dt[scan_mode]["lon"],
135-
"lat": dt[scan_mode]["lat"],
146+
"lon": lon,
147+
"lat": lat,
136148
"time_bnds": time_bnds,
137149
}
150+
coords_dict = {k: v for k, v in coords_dict.items() if v is not None}
151+
return coords_dict
138152

139153

140154
def get_coords(dt, scan_mode):
141155
"""Get coordinates from GPM objects."""
142-
return get_grid_coords(dt, scan_mode) if scan_mode in ["Grid"] else get_orbit_coords(dt, scan_mode)
156+
return get_grid_coords(dt, scan_mode) if scan_mode in ["Grid", "GRID", ""] else get_orbit_coords(dt, scan_mode)
143157

144158

145159
def _subset_dict_by_dataset(ds, dictionary):

gpm/dataset/dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,8 @@ def open_files(
565565
# Infer product from file
566566
product = _infer_product_name(dict_scan_modes[scan_modes[0]])
567567

568+
# TODO: Specialized sanitizer for L3 SLH, CSH, ...
569+
568570
# Warn if product is unknown
569571
if product is None:
570572
msg = "GPM-API didn't apply specialized variables decoding because product is unknown !"

gpm/dataset/datatree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def check_non_empty_granule(dt, filepath):
8888
"""Check that the datatree (or dataset) is not empty."""
8989
attrs = dt.attrs
9090
attrs = decode_string(attrs["FileHeader"])
91-
is_empty_granule = attrs["EmptyGranule"] != "NOT_EMPTY"
91+
is_empty_granule = attrs.get("EmptyGranule", "NOT_EMPTY") != "NOT_EMPTY"
9292
if is_empty_granule:
9393
raise ValueError(f"{filepath} is an EMPTY granule !")
9494

gpm/dataset/dimensions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,14 @@
9393
"npixelht": "cross_track",
9494
"npixelcs": "cross_track",
9595
"npixelfr": "cross_track", # S4 mode
96+
# L3 CSH, SLH
97+
"nlon": "lon",
98+
"nlat": "lat",
99+
# L3 DPR
100+
"ltL": "lat",
101+
"lnL": "lon",
102+
"ltH": "lat",
103+
"lnH": "lon",
96104
}
97105

98106
SPATIAL_DIMS = [

gpm/dataset/granule.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ def _get_scan_mode_dataset(
225225
)
226226
closer = ds._close
227227

228+
# L3 DPR: GridTime
229+
228230
# If GRID, time in coords and not a dimension of ds, add time dimension (L3 products)
229231
if "along_track" not in ds.dims and "time" in coords and "time" not in set(ds.dims):
230232
vars_to_expand = [
@@ -283,6 +285,9 @@ def get_scan_modes_datasets(filepath, groups, variables, decode_cf, chunks, pref
283285
]
284286
scan_modes = set(nodes) - set(invalid_nodes)
285287

288+
if dt.depth == 0: # IMERG L3 DAILY
289+
scan_modes = [""]
290+
286291
# Retrieve scan mode dataset (without cf decoding)
287292
dict_scan_modes = {}
288293
for scan_mode in scan_modes:

0 commit comments

Comments
 (0)