-
Notifications
You must be signed in to change notification settings - Fork 49
Expand file tree
/
Copy pathtest_dataset.py
More file actions
408 lines (319 loc) · 14.9 KB
/
test_dataset.py
File metadata and controls
408 lines (319 loc) · 14.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
import os
from pathlib import Path
import numpy.testing as nt
import xarray as xr
import uxarray as ux
from uxarray import UxDataset
import pytest
try:
import constants
except ImportError:
from . import constants
current_path = Path(os.path.dirname(os.path.realpath(__file__)))
gridfile_ne30 = current_path / "meshfiles" / "ugrid" / "outCSne30" / "outCSne30.ug"
dsfile_var2_ne30 = current_path / "meshfiles" / "ugrid" / "outCSne30" / "outCSne30_var2.nc"
gridfile_geoflow = current_path / "meshfiles" / "ugrid" / "geoflow-small" / "grid.nc"
dsfile_v1_geoflow = current_path / "meshfiles" / "ugrid" / "geoflow-small" / "v1.nc"
mpas_ds_path = current_path / 'meshfiles' / "mpas" / "QU" / 'mesh.QU.1920km.151026.nc'
def test_uxgrid_setget():
"""Load a dataset with its grid topology file using uxarray's
open_dataset call and check its grid object."""
uxds_var2_ne30 = ux.open_dataset(gridfile_ne30, dsfile_var2_ne30)
uxgrid_var2_ne30 = ux.open_grid(gridfile_ne30)
assert (uxds_var2_ne30.uxgrid == uxgrid_var2_ne30)
def test_integrate():
"""Load a dataset and calculate integrate()."""
uxds_var2_ne30 = ux.open_dataset(gridfile_ne30, dsfile_var2_ne30)
integrate_var2 = uxds_var2_ne30.integrate()
nt.assert_almost_equal(integrate_var2, constants.VAR2_INTG, decimal=3)
def test_info():
"""Tests custom info containing grid information."""
uxds_var2_geoflow = ux.open_dataset(gridfile_geoflow, dsfile_v1_geoflow)
import contextlib
import io
with contextlib.redirect_stdout(io.StringIO()):
try:
uxds_var2_geoflow.info(show_attrs=True)
except Exception as exc:
assert False, f"'uxds_var2_geoflow.info()' raised an exception: {exc}"
def test_ugrid_dim_names():
"""Tests the remapping of dimensions to the UGRID conventions."""
ugrid_dims = ["n_face", "n_node", "n_edge"]
uxds_remap = ux.open_dataset(mpas_ds_path, mpas_ds_path)
for dim in ugrid_dims:
assert dim in uxds_remap.dims
def test_get_dual():
"""Tests the creation of the dual mesh on a data set."""
uxds = ux.open_dataset(gridfile_ne30, dsfile_var2_ne30)
dual = uxds.get_dual()
assert isinstance(dual, UxDataset)
assert len(uxds.data_vars) == len(dual.data_vars)
def test_groupby_preserves_uxgrid():
"""Test that groupby operations preserve the uxgrid attribute."""
# Create a dataset from a file
uxds = ux.open_dataset(mpas_ds_path, mpas_ds_path)
original_grid = uxds.uxgrid
# Create bins from latitude values (extract data explicitly)
lat_bins = (uxds.latCell > 0).astype(int).values
# Add the bins as a coordinate
uxds = uxds.assign_coords({"lat_bins": ("n_face", lat_bins)})
# Test DataArray groupby preserves uxgrid
da_result = uxds.latCell.groupby(uxds.lat_bins).mean()
assert hasattr(da_result, "uxgrid")
assert da_result.uxgrid is not None
# Test Dataset groupby preserves uxgrid
ds_result = uxds.groupby(uxds.lat_bins).mean()
assert hasattr(ds_result, "uxgrid")
assert ds_result.uxgrid is not None
assert ds_result.uxgrid == original_grid
def test_groupby_bins_preserves_uxgrid():
"""Test that groupby_bins operations preserve the uxgrid attribute."""
# Create a dataset from a file
uxds = ux.open_dataset(mpas_ds_path, mpas_ds_path)
original_grid = uxds.uxgrid
# Create bins from latitude values (extract data explicitly)
lat_bins = [-90, -45, 0, 45, 90]
# Test DataArray groupby_bins preserves uxgrid
da_result = uxds.latCell.groupby_bins(uxds.latCell, bins=lat_bins).mean()
assert hasattr(da_result, "uxgrid")
assert da_result.uxgrid is not None
# Test Dataset groupby_bins preserves uxgrid
ds_result = uxds.groupby_bins(uxds.latCell, bins=lat_bins).mean()
assert hasattr(ds_result, "uxgrid")
assert ds_result.uxgrid is not None
assert ds_result.uxgrid == original_grid
def test_resample_preserves_uxgrid_and_reduces_time():
"""Test that resample operations preserve uxgrid and reduce time dimension."""
import numpy as np
import pandas as pd
import pytest
import xarray as xr
# Create a simple test with only time dimension
times = pd.date_range("2000-01-01", periods=12, freq="D")
temp_data = np.random.rand(12)
# Create a simple xarray Dataset
xr_ds = xr.Dataset(
{"temperature": ("time", temp_data)},
coords={"time": times}
)
# Open the minimal dataset with a real grid
try:
# Use existing test file we know works
uxgrid = ux.open_grid(gridfile_ne30)
# Create a UxDataset with this grid
uxds = ux.UxDataset(xr_ds, uxgrid=uxgrid)
print(f"Original dataset dims: {uxds.dims}")
print(f"Original dataset shape: {uxds.temperature.shape}")
# Test the resample method directly
print("Attempting resample...")
result = uxds.temperature.resample(time="1W").mean()
print(f"Resampled result dims: {result.dims}")
print(f"Resampled result shape: {result.shape}")
# Test assertions
assert hasattr(result, "uxgrid"), "uxgrid not preserved on resample"
assert result.uxgrid == uxds.uxgrid, "uxgrid not equal after resample"
assert len(result.time) < len(uxds.time), "time dimension not reduced"
except Exception as e:
import traceback
traceback.print_exc()
pytest.fail(f"Error in resample test: {e}")
def test_resample_preserves_uxgrid():
"""Test that resample preserves the uxgrid attribute."""
import numpy as np
import pandas as pd
import pytest
# Create a simple dataset with a time dimension
times = pd.date_range("2000-01-01", periods=12, freq="D")
data = np.random.rand(12)
# Create a simple xarray Dataset
ds = xr.Dataset(
{"temperature": ("time", data)},
coords={"time": times}
)
# Create a UxDataset with a real grid
uxds = ux.open_dataset(gridfile_ne30, gridfile_ne30)
original_uxgrid = uxds.uxgrid
# Create a new UxDataset with our time data and the real grid
uxds_time = ux.UxDataset(ds, uxgrid=original_uxgrid)
# Test DataArray resample preserves uxgrid
da_result = uxds_time.temperature.resample(time="1W").mean()
assert hasattr(da_result, "uxgrid"), "uxgrid not preserved on DataArray resample"
assert da_result.uxgrid is original_uxgrid, "uxgrid not identical after DataArray resample"
# Test Dataset resample preserves uxgrid
ds_result = uxds_time.resample(time="1W").mean()
assert hasattr(ds_result, "uxgrid"), "uxgrid not preserved on Dataset resample"
assert ds_result.uxgrid is original_uxgrid, "uxgrid not identical after Dataset resample"
def test_resample_reduces_time_dimension():
"""Test that resample properly reduces the time dimension."""
import numpy as np
import pandas as pd
import pytest
# Create dataset with daily data for a year
times = pd.date_range("2000-01-01", periods=365, freq="D")
data = np.random.rand(365)
# Create a simple xarray Dataset
ds = xr.Dataset(
{"temperature": ("time", data)},
coords={"time": times}
)
# Create a UxDataset
uxds = ux.UxDataset(ds, uxgrid=ux.open_grid(gridfile_ne30))
# Test monthly resampling reduces from 365 days to 12 months
monthly = uxds.resample(time="1M").mean()
assert "time" in monthly.dims, "time dimension missing after resample"
assert monthly.dims["time"] < uxds.dims["time"], "time dimension not reduced"
assert monthly.dims["time"] <= 12, "monthly resampling should give 12 or fewer time points"
def test_resample_with_cftime():
"""Test that resample works with cftime objects."""
import numpy as np
import pytest
try:
import cftime
except ImportError:
pytest.skip("cftime package not available")
# Create a dataset with cftime DatetimeNoLeap objects
times = [cftime.DatetimeNoLeap(2000, month, 15) for month in range(1, 13)]
data = np.random.rand(12)
# Create a simple xarray Dataset with cftime
ds = xr.Dataset(
{"temperature": ("time", data)},
coords={"time": times}
)
# Create a UxDataset
uxds = ux.UxDataset(ds, uxgrid=ux.open_grid(gridfile_ne30))
# Test that quarterly resampling works with cftime
quarterly = uxds.resample(time="Q").mean()
assert hasattr(quarterly, "uxgrid"), "uxgrid not preserved with cftime resampling"
assert "time" in quarterly.dims, "time dimension missing after cftime resample"
assert quarterly.dims["time"] < uxds.dims["time"], "time dimension not reduced with cftime"
def test_rolling_preserves_uxgrid():
"""Test that rolling operations preserve the uxgrid attribute."""
import numpy as np
import pandas as pd
# Create a dataset with time dimension
times = pd.date_range("2000-01-01", periods=30, freq="D")
data = np.random.rand(30)
# Create a simple xarray Dataset
ds = xr.Dataset(
{"temperature": ("time", data)},
coords={"time": times}
)
# Create a UxDataset with a real grid
uxds = ux.UxDataset(ds, uxgrid=ux.open_grid(gridfile_ne30))
original_uxgrid = uxds.uxgrid
# Test DataArray rolling preserves uxgrid
da_rolling = uxds.temperature.rolling(time=7)
da_result = da_rolling.mean()
assert hasattr(da_result, "uxgrid"), "uxgrid not preserved on DataArray rolling"
assert da_result.uxgrid is original_uxgrid, "uxgrid not identical after DataArray rolling"
# Test Dataset rolling preserves uxgrid
ds_rolling = uxds.rolling(time=7)
ds_result = ds_rolling.mean()
assert hasattr(ds_result, "uxgrid"), "uxgrid not preserved on Dataset rolling"
assert ds_result.uxgrid is original_uxgrid, "uxgrid not identical after Dataset rolling"
# Test that rolling window operations work correctly
assert len(da_result.time) == len(uxds.time), "rolling should preserve time dimension length"
assert not np.isnan(da_result.values[6:]).any(), "rolling mean should have valid values after window size"
def test_coarsen_preserves_uxgrid():
"""Test that coarsen operations preserve the uxgrid attribute."""
import numpy as np
import pandas as pd
# Create a dataset with time dimension (multiple of coarsen factor)
times = pd.date_range("2000-01-01", periods=24, freq="D")
data = np.random.rand(24)
# Create a simple xarray Dataset
ds = xr.Dataset(
{"temperature": ("time", data)},
coords={"time": times}
)
# Create a UxDataset with a real grid
uxds = ux.UxDataset(ds, uxgrid=ux.open_grid(gridfile_ne30))
original_uxgrid = uxds.uxgrid
# Test DataArray coarsen preserves uxgrid
da_coarsen = uxds.temperature.coarsen(time=3)
da_result = da_coarsen.mean()
assert hasattr(da_result, "uxgrid"), "uxgrid not preserved on DataArray coarsen"
assert da_result.uxgrid is original_uxgrid, "uxgrid not identical after DataArray coarsen"
# Test Dataset coarsen preserves uxgrid
ds_coarsen = uxds.coarsen(time=3)
ds_result = ds_coarsen.mean()
assert hasattr(ds_result, "uxgrid"), "uxgrid not preserved on Dataset coarsen"
assert ds_result.uxgrid is original_uxgrid, "uxgrid not identical after Dataset coarsen"
# Test that coarsen reduces dimension correctly
assert len(da_result.time) == 8, "coarsen by 3 should reduce 24 points to 8"
assert ds_result.dims["time"] == 8, "coarsen should reduce time dimension"
def test_weighted_preserves_uxgrid():
"""Test that weighted operations preserve the uxgrid attribute."""
import numpy as np
import pandas as pd
# Create a dataset with time and face dimensions
times = pd.date_range("2000-01-01", periods=10, freq="D")
# Open a real dataset to get face dimension
uxds_base = ux.open_dataset(gridfile_ne30, dsfile_var2_ne30)
n_face = uxds_base.dims["n_face"]
# Create data with time and face dimensions
temp_data = np.random.rand(10, n_face)
weights_data = np.random.rand(10) # weights along time
# Create a Dataset with both variables
ds = xr.Dataset(
{
"temperature": (["time", "n_face"], temp_data),
"weights": ("time", weights_data)
},
coords={"time": times}
)
# Create a UxDataset
uxds = ux.UxDataset(ds, uxgrid=uxds_base.uxgrid)
original_uxgrid = uxds.uxgrid
# Test DataArray weighted preserves uxgrid
da_weighted = uxds.temperature.weighted(uxds.weights)
da_result = da_weighted.mean("time")
assert hasattr(da_result, "uxgrid"), "uxgrid not preserved on DataArray weighted"
assert da_result.uxgrid is original_uxgrid, "uxgrid not identical after DataArray weighted"
# Test Dataset weighted preserves uxgrid
ds_weighted = uxds.weighted(uxds.weights)
ds_result = ds_weighted.mean("time")
assert hasattr(ds_result, "uxgrid"), "uxgrid not preserved on Dataset weighted"
assert ds_result.uxgrid is original_uxgrid, "uxgrid not identical after Dataset weighted"
# Test that weighted operations reduce dimensions correctly
assert "time" not in da_result.dims, "weighted mean over time should remove time dimension"
assert "n_face" in da_result.dims, "face dimension should be preserved"
assert da_result.shape == (n_face,), "result should only have face dimension"
def test_cumulative_preserves_uxgrid():
"""Test that cumulative operations preserve the uxgrid attribute."""
import numpy as np
import pandas as pd
# Create a dataset with time dimension
times = pd.date_range("2000-01-01", periods=10, freq="D")
data = np.random.rand(10)
# Create a simple xarray Dataset
ds = xr.Dataset(
{"temperature": ("time", data)},
coords={"time": times}
)
# Create a UxDataset with a real grid
uxds = ux.UxDataset(ds, uxgrid=ux.open_grid(gridfile_ne30))
original_uxgrid = uxds.uxgrid
# Test DataArray cumulative preserves uxgrid
da_cumulative = uxds.temperature.cumulative("time")
da_result = da_cumulative.sum()
assert hasattr(da_result, "uxgrid"), "uxgrid not preserved on DataArray cumulative"
assert da_result.uxgrid is original_uxgrid, "uxgrid not identical after DataArray cumulative"
# Test Dataset cumulative preserves uxgrid
ds_cumulative = uxds.cumulative("time")
ds_result = ds_cumulative.sum()
assert hasattr(ds_result, "uxgrid"), "uxgrid not preserved on Dataset cumulative"
assert ds_result.uxgrid is original_uxgrid, "uxgrid not identical after Dataset cumulative"
# Test that cumulative preserves dimension length
assert len(da_result.time) == len(uxds.time), "cumulative should preserve time dimension length"
# Uncomment the following test if you want to include it, ensuring you handle potential failures.
# def test_read_from_https():
# """Tests reading a dataset from a HTTPS link."""
# import requests
#
# small_file_480km = requests.get(
# "https://web.lcrc.anl.gov/public/e3sm/inputdata/share/meshes/mpas/ocean/oQU480.230422.nc"
# ).content
#
# ds_small_480km = ux.open_dataset(small_file_480km, small_file_480km)
# assert isinstance(ds_small_480km, ux.core.dataset.UxDataset)