@@ -187,7 +187,14 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml, force=False):
187187
188188
189189def make_gridfiles (
190- inname , outdir , deploymentyaml , * , fnamesuffix = '' , dz = 1 , starttime = '1970-01-01'
190+ inname ,
191+ outdir ,
192+ deploymentyaml ,
193+ * ,
194+ fnamesuffix = '' ,
195+ depth_bins = None ,
196+ dz = 1 ,
197+ starttime = '1970-01-01' ,
191198):
192199 """
193200 Turn a timeseries netCDF file into a vertically gridded netCDF.
@@ -204,15 +211,21 @@ def make_gridfiles(
204211 location of deployment yaml file for the netCDF file. This should
205212 be the same yaml file that was used to make the timeseries file.
206213
214+ depth_bins : array, default = None
215+ User-defined depth bins, for instance ``np.arange(0, 1000.1, 1)``.
216+ If not None, these are the depth bins into which the data will be
217+ gridded. If None, ``dz`` is used to generate bins between 0 and 1100m
218+
207219 dz : float, default = 1
208- Vertical grid spacing in meters.
220+ Vertical grid spacing in meters. Ignored if ``depth_bins`` is not None
209221
210222 Returns
211223 -------
212224 outname : str
213- Name of gridded netCDF file. The gridded netCDF file has coordinates of
225+ Name of gridded netCDF file. The gridded netCDF file has dimensions of
214226 'depth' and 'profile', so each variable is gridded in depth bins and by
215- profile number. Each profile has a time, latitude, and longitude.
227+ profile number. Each profile has a time, latitude, and longitude.
228+ The depth values are the bin centers
216229 """
217230 try :
218231 os .mkdir (outdir )
@@ -236,8 +249,27 @@ def make_gridfiles(
236249 _log .debug (profile_bins )
237250 Nprofiles = len (profiles )
238251 _log .info (f'Nprofiles { Nprofiles } ' )
239- depth_bins = np .arange (0 , 1100.1 , dz )
240- depths = depth_bins [:- 1 ] + 0.5
252+
253+ if depth_bins is None :
254+ # calculate depth bins using dz
255+ depth_bins = np .arange (0 , 1100.1 , dz )
256+ else :
257+ # sanity check user-provided bins
258+ if (
259+ depth_bins .ndim != 1
260+ or not np .all (np .isfinite (depth_bins ))
261+ or not np .issubdtype (depth_bins .dtype , np .number )
262+ ):
263+ raise ValueError ('Depth bins must be a 1D array of finite numbers' )
264+ if len (depth_bins ) < 2 :
265+ raise ValueError ('There must be at least two depth bins edges' )
266+ if not np .all (np .diff (depth_bins ) > 0 ):
267+ raise ValueError ('Depth bin edges must be strictly increasing and non-overlapping' )
268+
269+ # calculate bin centers
270+ depths = 0.5 * (depth_bins [:- 1 ] + depth_bins [1 :])
271+ _log .debug (f'depth bins and centers { depth_bins } {{depths}}' )
272+
241273 xdimname = 'time'
242274 dsout = xr .Dataset (
243275 coords = {'depth' : ('depth' , depths ), 'profile' : (xdimname , profiles )}
@@ -247,10 +279,12 @@ def make_gridfiles(
247279 'long_name' : 'Depth' ,
248280 'standard_name' : 'depth' ,
249281 'positive' : 'down' ,
282+ 'source' : ds .depth .attrs ["source" ],
250283 'coverage_content_type' : 'coordinate' ,
251284 'comment' : 'center of depth bins' ,
252285 }
253286
287+ # Bin by profile index, for the mean time, lat, and lon values for each profile
254288 ds ['time_1970' ] = ds .temperature .copy ()
255289 ds ['time_1970' ].values = ds .time .values .astype (np .float64 )
256290 for td in ('time_1970' , 'longitude' , 'latitude' ):
@@ -266,11 +300,24 @@ def make_gridfiles(
266300 dat = dat .astype ('timedelta64[ns]' ) + np .datetime64 ('1970-01-01T00:00:00' )
267301 _log .info (f'{ td } { len (dat )} ' )
268302 dsout [td ] = (('time' ), dat , ds [td ].attrs )
269- ds .drop ('time_1970' )
303+
304+ # Bin by profile index, for the profile start (min) and end (max) times
305+ profile_lookup = {'profile_time_start' : "min" , 'profile_time_end' : "max" }
270306 good = np .where (~ np .isnan (ds ['time' ]) & (ds ['profile_index' ] % 1 == 0 ))[0 ]
271- _log .info (f'Done times! { len (dat )} ' )
272- dsout ['profile_time_start' ] = ((xdimname ), dat , profile_meta ['profile_time_start' ])
273- dsout ['profile_time_end' ] = ((xdimname ), dat , profile_meta ['profile_time_end' ])
307+ for td , bin_stat in profile_lookup .items ():
308+ _log .debug (f'td, bin_stat { td } , { bin_stat } ' )
309+ dat , xedges , binnumber = stats .binned_statistic (
310+ ds ['profile_index' ].values [good ],
311+ ds ['time_1970' ].values [good ],
312+ statistic = bin_stat ,
313+ bins = [profile_bins ],
314+ )
315+ dat = dat .astype ('timedelta64[ns]' ) + np .datetime64 ('1970-01-01T00:00:00' )
316+ _log .info (f'{ td } { len (dat )} ' )
317+ dsout [td ] = ((xdimname ), dat , profile_meta [td ])
318+
319+ ds = ds .drop ('time_1970' )
320+ _log .info (f'Done times!' )
274321
275322 for k in ds .keys ():
276323 if k in ['time' , 'profile' , 'longitude' , 'latitude' , 'depth' ] or 'time' in k :
@@ -316,6 +363,7 @@ def make_gridfiles(
316363 dsout = dsout .drop (['water_velocity_eastward' , 'water_velocity_northward' ])
317364 dsout .attrs = ds .attrs
318365 dsout .attrs .pop ('cdm_data_type' )
366+
319367 # fix to be ISO parsable:
320368 if len (dsout .attrs ['deployment_start' ]) > 18 :
321369 dsout .attrs ['deployment_start' ] = dsout .attrs ['deployment_start' ][:19 ]
@@ -330,6 +378,15 @@ def make_gridfiles(
330378 dsout ['profile_time_end' ].attrs .pop ('standard_name' )
331379 except :
332380 pass
381+ # remove, so they can be encoded later:
382+ try :
383+ dsout ['profile_time_start' ].attrs .pop ('units' )
384+ dsout ['profile_time_end' ].attrs .pop ('units' )
385+ dsout ['profile_time_start' ].attrs .pop ('_FillValue' )
386+ dsout ['profile_time_end' ].attrs .pop ('_FillValue' )
387+ except :
388+ pass
389+
333390 # set some attributes for cf guidance
334391 # see H.6.2. Profiles along a single trajectory
335392 # https://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/aphs06.html
@@ -347,15 +404,18 @@ def make_gridfiles(
347404 outname = outdir + '/' + ds .attrs ['deployment_name' ] + '_grid' + fnamesuffix + '.nc'
348405 _log .info ('Writing %s' , outname )
349406 # timeunits = 'nanoseconds since 1970-01-01T00:00:00Z'
407+ time_encoding = {
408+ 'units' : 'seconds since 1970-01-01T00:00:00Z' ,
409+ '_FillValue' : np .nan ,
410+ 'calendar' : 'gregorian' ,
411+ 'dtype' : 'float64' ,
412+ }
350413 dsout .to_netcdf (
351414 outname ,
352415 encoding = {
353- 'time' : {
354- 'units' : 'seconds since 1970-01-01T00:00:00Z' ,
355- '_FillValue' : np .nan ,
356- 'calendar' : 'gregorian' ,
357- 'dtype' : 'float64' ,
358- }
416+ 'time' : time_encoding ,
417+ 'profile_time_start' : time_encoding ,
418+ 'profile_time_end' : time_encoding ,
359419 },
360420 )
361421 _log .info ('Done gridding' )
0 commit comments