Merge branch 'master' into new_xarray

delandmeterp · delandmeterp · commit a2dec03b3d9c · 2018-10-03T16:26:59.000+02:00
diff --git a/parcels/field.py b/parcels/field.py
@@ -119,6 +119,7 @@ def __init__(self, name, data, lon=None, lat=None, depth=None, time=None, grid=N
         self.dimensions = kwargs.pop('dimensions', None)
         self.indices = kwargs.pop('indices', None)
         self.dataFiles = kwargs.pop('dataFiles', None)
+        self.netcdf_engine = kwargs.pop('netcdf_engine', 'netcdf4')
         self.loaded_time_indices = []
 
     @classmethod
@@ -151,6 +152,8 @@ def from_netcdf(cls, filenames, variable, dimensions, indices=None, grid=None,
                It is advised not to fully load the data, since in that case Parcels deals with
                a better memory management during particle set execution.
                full_load is however sometimes necessary for plotting the fields.
+        :param netcdf_engine: engine to use for netcdf reading in xarray. Default is 'netcdf',
+               but in cases where this doesn't work, setting netcdf_engine='scipy' could help
         """
 
         if not isinstance(filenames, Iterable) or isinstance(filenames, str):
@@ -174,15 +177,16 @@ def from_netcdf(cls, filenames, variable, dimensions, indices=None, grid=None,
             depth_filename = filenames[0]
 
         indices = {} if indices is None else indices.copy()
-        with NetcdfFileBuffer(lonlat_filename, dimensions, indices) as filebuffer:
+        netcdf_engine = kwargs.pop('netcdf_engine', 'netcdf4')
+        with NetcdfFileBuffer(lonlat_filename, dimensions, indices, netcdf_engine) as filebuffer:
             lon, lat = filebuffer.read_lonlat
             indices = filebuffer.indices
             # Check if parcels_mesh has been explicitly set in file
             if 'parcels_mesh' in filebuffer.dataset.attrs:
                 mesh = filebuffer.dataset.attrs['parcels_mesh']
 
         if 'depth' in dimensions:
-            with NetcdfFileBuffer(depth_filename, dimensions, indices) as filebuffer:
+            with NetcdfFileBuffer(depth_filename, dimensions, indices, netcdf_engine) as filebuffer:
                 depth = filebuffer.read_depth
         else:
             indices['depth'] = [0]
@@ -197,7 +201,7 @@ def from_netcdf(cls, filenames, variable, dimensions, indices=None, grid=None,
             timeslices = []
             dataFiles = []
             for fname in data_filenames:
-                with NetcdfFileBuffer(fname, dimensions, indices) as filebuffer:
+                with NetcdfFileBuffer(fname, dimensions, indices, netcdf_engine) as filebuffer:
                     ftime = filebuffer.time
                     timeslices.append(ftime)
                     dataFiles.append([fname] * len(ftime))
@@ -231,7 +235,7 @@ def from_netcdf(cls, filenames, variable, dimensions, indices=None, grid=None,
             data = np.empty((grid.tdim, grid.zdim, grid.ydim, grid.xdim), dtype=np.float32)
             ti = 0
             for tslice, fname in zip(grid.timeslices, data_filenames):
-                with NetcdfFileBuffer(fname, dimensions, indices) as filebuffer:
+                with NetcdfFileBuffer(fname, dimensions, indices, netcdf_engine) as filebuffer:
                     # If Field.from_netcdf is called directly, it may not have a 'data' dimension
                     # In that case, assume that 'name' is the data dimension
                     filebuffer.name = filebuffer.parse_name(dimensions, variable)
@@ -257,6 +261,7 @@ def from_netcdf(cls, filenames, variable, dimensions, indices=None, grid=None,
         kwargs['dimensions'] = dimensions.copy()
         kwargs['indices'] = indices
         kwargs['time_periodic'] = time_periodic
+        kwargs['netcdf_engine'] = netcdf_engine
 
         return cls(variable, data, grid=grid,
                    allow_time_extrapolation=allow_time_extrapolation, **kwargs)
@@ -858,7 +863,7 @@ def advancetime(self, field_new, advanceForward):
 
     def computeTimeChunk(self, data, tindex):
         g = self.grid
-        with NetcdfFileBuffer(self.dataFiles[g.ti+tindex], self.dimensions, self.indices) as filebuffer:
+        with NetcdfFileBuffer(self.dataFiles[g.ti+tindex], self.dimensions, self.indices, self.netcdf_engine) as filebuffer:
             filebuffer.name = filebuffer.parse_name(self.dimensions, self.name)
             time_data = filebuffer.time
             time_data = g.time_origin.reltime(time_data)
@@ -1130,20 +1135,21 @@ def __getitem__(self, key):
 class NetcdfFileBuffer(object):
     """ Class that encapsulates and manages deferred access to file data. """
 
-    def __init__(self, filename, dimensions, indices):
+    def __init__(self, filename, dimensions, indices, netcdf_engine):
         self.filename = filename
         self.dimensions = dimensions  # Dict with dimension keyes for file data
         self.indices = indices
         self.dataset = None
+        self.netcdf_engine = netcdf_engine
 
     def __enter__(self):
         try:
-            self.dataset = xr.open_dataset(str(self.filename), decode_cf=True)
+            self.dataset = xr.open_dataset(str(self.filename), decode_cf=True, engine=self.netcdf_engine)
             self.dataset['decoded'] = True
         except:
             logger.warning_once("File %s could not be decoded properly by xarray (version %s).\n         It will be opened with no decoding. Filling values might be wrongly parsed."
                                 % (self.filename, xr.__version__))
-            self.dataset = xr.open_dataset(str(self.filename), decode_cf=False)
+            self.dataset = xr.open_dataset(str(self.filename), decode_cf=False, engine=self.netcdf_engine)
             self.dataset['decoded'] = False
         for inds in self.indices.values():
             if type(inds) not in [list, range]:
@@ -1166,8 +1172,8 @@ def parse_name(self, dimensions, variable):
 
     @property
     def read_lonlat(self):
-        lon = getattr(self.dataset, self.dimensions['lon'])
-        lat = getattr(self.dataset, self.dimensions['lat'])
+        lon = self.dataset[self.dimensions['lon']]
+        lat = self.dataset[self.dimensions['lat']]
         xdim = lon.size if len(lon.shape) == 1 else lon.shape[-1]
         ydim = lat.size if len(lat.shape) == 1 else lat.shape[-2]
         self.indices['lon'] = self.indices['lon'] if 'lon' in self.indices else range(xdim)
@@ -1195,7 +1201,7 @@ def read_lonlat(self):
     @property
     def read_depth(self):
         if 'depth' in self.dimensions:
-            depth = getattr(self.dataset, self.dimensions['depth'])
+            depth = self.dataset[self.dimensions['depth']]
             depthsize = depth.size if len(depth.shape) == 1 else depth.shape[-3]
             self.indices['depth'] = self.indices['depth'] if 'depth' in self.indices else range(depthsize)
             if len(depth.shape) == 1:
@@ -1211,7 +1217,7 @@ def read_depth(self):
 
     @property
     def data(self):
-        data = getattr(self.dataset, self.name)
+        data = self.dataset[self.name]
         if len(data.shape) == 2:
             data = data[self.indices['lat'], self.indices['lon']]
         elif len(data.shape) == 3:
@@ -1229,15 +1235,15 @@ def data(self):
     @property
     def time(self):
         try:
-            time_da = getattr(self.dataset, self.dimensions['time'])
+            time_da = self.dataset[self.dimensions['time']]
             if self.dataset['decoded'] and 'Unit' not in time_da.attrs:
                 time = np.array([time_da]) if len(time_da.shape) == 0 else np.array(time_da)
             else:
                 if 'units' not in time_da.attrs and 'Unit' in time_da.attrs:
                     time_da.attrs['units'] = time_da.attrs['Unit']
                 ds = xr.Dataset({self.dimensions['time']: time_da})
                 ds = xr.decode_cf(ds)
-                da = getattr(ds, self.dimensions['time'])
+                da = ds[self.dimensions['time']]
                 time = np.array([da]) if len(da.shape) == 0 else np.array(da)
             if isinstance(time[0], datetime.datetime):
                 raise NotImplementedError('Parcels currently only parses dates ranging from 1678 AD to 2262 AD, which are stored by xarray as np.datetime64. If you need a wider date range, please open an Issue on the parcels github page.')
diff --git a/parcels/fieldset.py b/parcels/fieldset.py
@@ -189,6 +189,8 @@ def from_netcdf(cls, filenames, variables, dimensions, indices=None,
                It is advised not to fully load the data, since in that case Parcels deals with
                a better memory management during particle set execution.
                full_load is however sometimes necessary for plotting the fields.
+        :param netcdf_engine: engine to use for netcdf reading in xarray. Default is 'netcdf',
+               but in cases where this doesn't work, setting netcdf_engine='scipy' could help
         """
 
         fields = {}
diff --git a/parcels/scripts/plottrajectoriesfile.py b/parcels/scripts/plottrajectoriesfile.py
@@ -100,7 +100,7 @@ def plotTrajectoriesFile(filename, mode='2d', tracerfile=None, tracerfield='P',
         else:
             scat = ax.scatter(lon[b], lat[b], s=20, color='k')
         ttl = ax.set_title('Particles' + titlestr + ' at time ' + str(plottimes[0]))
-        frames = np.arange(1, len(plottimes))
+        frames = np.arange(0, len(plottimes))
 
         def animate(t):
             b = time == plottimes[t]
diff --git a/tests/test_particle_sets.py b/tests/test_particle_sets.py
@@ -161,7 +161,7 @@ def IncrLon(particle, fieldset, time, dt):
     for k in range(samplevar.shape[1]):
         assert np.allclose([p for p in samplevar[:, k] if np.isfinite(p)], k)
     filesize = os.path.getsize(str(outfilepath+".nc"))
-    assert filesize < 1024 * 60  # test that chunking leads to filesize less than 60KB
+    assert filesize < 1024 * 65  # test that chunking leads to filesize less than 65KB
 
 
 def test_pset_repeatdt_check_dt(fieldset):