Skip to content

Commit c1218ac

Browse files
authored
Merge pull request #223 from smwoodman/grid
gridding: allow user depth bins and fix profile times
2 parents f704c92 + 5dbd71b commit c1218ac

3 files changed

Lines changed: 80 additions & 16 deletions

File tree

pyglider/ncprocess.py

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,14 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml, force=False):
187187

188188

189189
def make_gridfiles(
190-
inname, outdir, deploymentyaml, *, fnamesuffix='', dz=1, starttime='1970-01-01'
190+
inname,
191+
outdir,
192+
deploymentyaml,
193+
*,
194+
fnamesuffix='',
195+
depth_bins=None,
196+
dz=1,
197+
starttime='1970-01-01',
191198
):
192199
"""
193200
Turn a timeseries netCDF file into a vertically gridded netCDF.
@@ -204,15 +211,21 @@ def make_gridfiles(
204211
location of deployment yaml file for the netCDF file. This should
205212
be the same yaml file that was used to make the timeseries file.
206213
214+
depth_bins : array, default = None
215+
User-defined depth bins, for instance ``np.arange(0, 1000.1, 1)``.
216+
If not None, these are the depth bins into which the data will be
217+
gridded. If None, ``dz`` is used to generate bins between 0 and 1100m
218+
207219
dz : float, default = 1
208-
Vertical grid spacing in meters.
220+
Vertical grid spacing in meters. Ignored if ``depth_bins`` is not None
209221
210222
Returns
211223
-------
212224
outname : str
213-
Name of gridded netCDF file. The gridded netCDF file has coordinates of
225+
Name of gridded netCDF file. The gridded netCDF file has dimensions of
214226
'depth' and 'profile', so each variable is gridded in depth bins and by
215-
profile number. Each profile has a time, latitude, and longitude.
227+
profile number. Each profile has a time, latitude, and longitude.
228+
The depth values are the bin centers
216229
"""
217230
try:
218231
os.mkdir(outdir)
@@ -236,8 +249,27 @@ def make_gridfiles(
236249
_log.debug(profile_bins)
237250
Nprofiles = len(profiles)
238251
_log.info(f'Nprofiles {Nprofiles}')
239-
depth_bins = np.arange(0, 1100.1, dz)
240-
depths = depth_bins[:-1] + 0.5
252+
253+
if depth_bins is None:
254+
# calculate depth bins using dz
255+
depth_bins = np.arange(0, 1100.1, dz)
256+
else:
257+
# sanity check user-provided bins
258+
if (
259+
depth_bins.ndim != 1
260+
or not np.all(np.isfinite(depth_bins))
261+
or not np.issubdtype(depth_bins.dtype, np.number)
262+
):
263+
raise ValueError('Depth bins must be a 1D array of finite numbers')
264+
if len(depth_bins) < 2:
265+
raise ValueError('There must be at least two depth bins edges')
266+
if not np.all(np.diff(depth_bins) > 0):
267+
raise ValueError('Depth bin edges must be strictly increasing and non-overlapping')
268+
269+
# calculate bin centers
270+
depths = 0.5*(depth_bins[:-1] + depth_bins[1:])
271+
_log.debug(f'depth bins and centers {depth_bins} {{depths}}')
272+
241273
xdimname = 'time'
242274
dsout = xr.Dataset(
243275
coords={'depth': ('depth', depths), 'profile': (xdimname, profiles)}
@@ -247,10 +279,12 @@ def make_gridfiles(
247279
'long_name': 'Depth',
248280
'standard_name': 'depth',
249281
'positive': 'down',
282+
'source': ds.depth.attrs["source"],
250283
'coverage_content_type': 'coordinate',
251284
'comment': 'center of depth bins',
252285
}
253286

287+
# Bin by profile index, for the mean time, lat, and lon values for each profile
254288
ds['time_1970'] = ds.temperature.copy()
255289
ds['time_1970'].values = ds.time.values.astype(np.float64)
256290
for td in ('time_1970', 'longitude', 'latitude'):
@@ -266,11 +300,24 @@ def make_gridfiles(
266300
dat = dat.astype('timedelta64[ns]') + np.datetime64('1970-01-01T00:00:00')
267301
_log.info(f'{td} {len(dat)}')
268302
dsout[td] = (('time'), dat, ds[td].attrs)
269-
ds.drop('time_1970')
303+
304+
# Bin by profile index, for the profile start (min) and end (max) times
305+
profile_lookup = {'profile_time_start': "min", 'profile_time_end': "max"}
270306
good = np.where(~np.isnan(ds['time']) & (ds['profile_index'] % 1 == 0))[0]
271-
_log.info(f'Done times! {len(dat)}')
272-
dsout['profile_time_start'] = ((xdimname), dat, profile_meta['profile_time_start'])
273-
dsout['profile_time_end'] = ((xdimname), dat, profile_meta['profile_time_end'])
307+
for td, bin_stat in profile_lookup.items():
308+
_log.debug(f'td, bin_stat {td}, {bin_stat}')
309+
dat, xedges, binnumber = stats.binned_statistic(
310+
ds['profile_index'].values[good],
311+
ds['time_1970'].values[good],
312+
statistic=bin_stat,
313+
bins=[profile_bins],
314+
)
315+
dat = dat.astype('timedelta64[ns]') + np.datetime64('1970-01-01T00:00:00')
316+
_log.info(f'{td} {len(dat)}')
317+
dsout[td] = ((xdimname), dat, profile_meta[td])
318+
319+
ds = ds.drop('time_1970')
320+
_log.info(f'Done times!')
274321

275322
for k in ds.keys():
276323
if k in ['time', 'profile', 'longitude', 'latitude', 'depth'] or 'time' in k:
@@ -316,6 +363,7 @@ def make_gridfiles(
316363
dsout = dsout.drop(['water_velocity_eastward', 'water_velocity_northward'])
317364
dsout.attrs = ds.attrs
318365
dsout.attrs.pop('cdm_data_type')
366+
319367
# fix to be ISO parsable:
320368
if len(dsout.attrs['deployment_start']) > 18:
321369
dsout.attrs['deployment_start'] = dsout.attrs['deployment_start'][:19]
@@ -330,6 +378,15 @@ def make_gridfiles(
330378
dsout['profile_time_end'].attrs.pop('standard_name')
331379
except:
332380
pass
381+
# remove, so they can be encoded later:
382+
try:
383+
dsout['profile_time_start'].attrs.pop('units')
384+
dsout['profile_time_end'].attrs.pop('units')
385+
dsout['profile_time_start'].attrs.pop('_FillValue')
386+
dsout['profile_time_end'].attrs.pop('_FillValue')
387+
except:
388+
pass
389+
333390
# set some attributes for cf guidance
334391
# see H.6.2. Profiles along a single trajectory
335392
# https://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/aphs06.html
@@ -347,15 +404,18 @@ def make_gridfiles(
347404
outname = outdir + '/' + ds.attrs['deployment_name'] + '_grid' + fnamesuffix + '.nc'
348405
_log.info('Writing %s', outname)
349406
# timeunits = 'nanoseconds since 1970-01-01T00:00:00Z'
407+
time_encoding = {
408+
'units': 'seconds since 1970-01-01T00:00:00Z',
409+
'_FillValue': np.nan,
410+
'calendar': 'gregorian',
411+
'dtype': 'float64',
412+
}
350413
dsout.to_netcdf(
351414
outname,
352415
encoding={
353-
'time': {
354-
'units': 'seconds since 1970-01-01T00:00:00Z',
355-
'_FillValue': np.nan,
356-
'calendar': 'gregorian',
357-
'dtype': 'float64',
358-
}
416+
'time': time_encoding,
417+
'profile_time_start': time_encoding,
418+
'profile_time_end': time_encoding,
359419
},
360420
)
361421
_log.info('Done gridding')

pyglider/slocum.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,7 @@ def binary_to_timeseries(
954954
_t, _ = dbd.get(ncvar[name]['source'])
955955
tg_ind = utils.find_gaps(_t, time, maxgap)
956956
val[tg_ind] = np.nan
957+
_log.debug('number of gaps %s', np.count_nonzero(tg_ind))
957958

958959
val = utils._zero_screen(val)
959960
val = convert(val)

pyglider/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,9 @@ def find_gaps(sample_time, timebase, maxgap):
682682
Return an index into *timebase* where True are times in gaps of *sample_time* larger
683683
than maxgap.
684684
"""
685+
# make sure sample times are strictly increasing
686+
sample_time = np.sort(sample_time)
687+
685688
# figure out which sample each time in time base belongs to:
686689
time_index = np.searchsorted(sample_time, timebase, side='right')
687690
time_index = np.clip(time_index, 0, len(sample_time) - 1)

0 commit comments

Comments
 (0)