From aee7d2525c8ff9239361e1a193b1002dda1558de Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 21 Jan 2020 14:59:23 +1100
Subject: [PATCH 01/59] Create addCO2.py
---
ocean_dp/processing/addCO2.py | 90 +++++++++++++++++++++++++++++++++++
1 file changed, 90 insertions(+)
create mode 100755 ocean_dp/processing/addCO2.py
diff --git a/ocean_dp/processing/addCO2.py b/ocean_dp/processing/addCO2.py
new file mode 100755
index 0000000..6f095be
--- /dev/null
+++ b/ocean_dp/processing/addCO2.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan 21 14:58:20 2020
+
+@author: tru050
+"""
+
+
+
+
+from netCDF4 import Dataset
+import sys
+import gsw
+import numpy as np
+from datetime import datetime
+import pandas
+
+# addCO2 takes a SOTS FV02 gridded product netCDFfile as an input, and adds
+# CO2 data (delivered from NOAA in a csv file) to the netCDFfile
+def addCO2(netCDFfile):
+
+ # Import the SOTS netcdf
+ ds = Dataset(netCDFfile, 'a')
+
+ # Extract the time variable, in serial date numbers (days since 01/01/1950)
+ var_time = ds.variables["TIME"]
+
+ # Convert the variable object to an array
+ netcdf_serials = np.array(var_time[:])
+
+ # Read in the CO2 csv file, ignoring the first five rows
+ dcsv = pandas.read_csv('SOFS_prelimdata_Nov2019.csv',header=5)
+
+ # Convert the dataframe to an array
+ dc = dcsv.to_numpy()
+
+ csv_dates = []
+
+ # Create a list of datetimes from the csv
+ for i in range(len(dc)):
+
+ csv_dates.append(datetime.strptime(dc[i,0],'%m/%d/%Y %H:%M'))
+
+ # Calculate the difference between the csv dates and 01/01/1950 in order
+ # to convert them to the serial date format of the netcdf
+ time_offset_1950 = datetime(1950,1,1,0,0,0)
+
+ csv_delta= []
+
+ for i in range(len(dc)):
+
+ csv_delta.append(csv_dates[i] - time_offset_1950)
+
+
+ # Convert the datetimes from the csv into an array of serial date numbers
+ csv_serials = []
+
+ for i in range(len(dc)):
+
+ csv_serials.append(csv_delta[i].days + csv_delta[i].seconds/86400)
+
+ csv_serials = np.array(csv_serials)
+
+ # Find the indices of timestamps of the csv file that are in the deployment
+ # period of the netcdf file
+ matching_index = (netcdf_serials[0] <= csv_serials) & (csv_serials <= netcdf_serials[-1])
+
+ new_vars = ['XCO2_PRES','XCO2_OCEAN','XCO2_AIR','XCO2_PSAL','XCO2_SSTEMP']
+
+ # For each of the variables in the csv file (except time), linearly
+ # interpolate to the timestamps of the netcdf file
+ for i in range(0,len(new_vars)):
+
+ np.interp(netcdf_serials,csv_serials[matching_index],np.array(dcsv[dcsv.columns[i+1]])[matching_index].astype('float64'))
+
+ ncVarOut = ds.createVariable(new_vars[i], "f4", ("TIME",), fill_value=np.nan, zlib=True)
+
+
+
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
+
+ ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + " : added ")
+
+ ds.close()
+
\ No newline at end of file
From fea30ed80bd728b79bfe25c470515ac63db5dc11 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 21 Jan 2020 16:17:41 +1100
Subject: [PATCH 02/59] Update addCO2.py
---
ocean_dp/processing/addCO2.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/ocean_dp/processing/addCO2.py b/ocean_dp/processing/addCO2.py
index 6f095be..8a14af8 100755
--- a/ocean_dp/processing/addCO2.py
+++ b/ocean_dp/processing/addCO2.py
@@ -30,7 +30,7 @@ def addCO2(netCDFfile):
netcdf_serials = np.array(var_time[:])
# Read in the CO2 csv file, ignoring the first five rows
- dcsv = pandas.read_csv('SOFS_prelimdata_Nov2019.csv',header=5)
+ dcsv = pandas.read_csv('SOFS_prelimdata_Nov2019test.csv',header=5)
# Convert the dataframe to an array
dc = dcsv.to_numpy()
@@ -68,15 +68,19 @@ def addCO2(netCDFfile):
new_vars = ['XCO2_PRES','XCO2_OCEAN','XCO2_AIR','XCO2_PSAL','XCO2_SSTEMP']
+ new_units = ['kPa','umol/mol','umol/mol','Presumed PSU - not specified','deg C']
+
# For each of the variables in the csv file (except time), linearly
# interpolate to the timestamps of the netcdf file
for i in range(0,len(new_vars)):
-
- np.interp(netcdf_serials,csv_serials[matching_index],np.array(dcsv[dcsv.columns[i+1]])[matching_index].astype('float64'))
ncVarOut = ds.createVariable(new_vars[i], "f4", ("TIME",), fill_value=np.nan, zlib=True)
+ ncVarOut[:] = np.interp(netcdf_serials,csv_serials[matching_index],np.array(dcsv[dcsv.columns[i+1]])[matching_index].astype('float64'))
+
+ ncVarOut.units = new_units[i]
+ ncVarOut.comment = "imported from 'SOFS_prelimdata_Nov2019.csv'"
# update the history attribute
try:
@@ -84,7 +88,7 @@ def addCO2(netCDFfile):
except AttributeError:
hist = ""
- ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + " : added ")
+ ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + " : added 'XCO2_PRES','XCO2_OCEAN','XCO2_AIR','XCO2_PSAL','XCO2_SSTEMP' from 'SOFS_prelimdata_Nov2019.csv'")
ds.close()
\ No newline at end of file
From 71470a317c942331e61977bad5f2e30b93759b73 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 22 Jan 2020 09:55:49 +1100
Subject: [PATCH 03/59] Update copyDataset.py
---
ocean_dp/aggregation/copyDataset.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ocean_dp/aggregation/copyDataset.py b/ocean_dp/aggregation/copyDataset.py
index 0079d13..154757f 100644
--- a/ocean_dp/aggregation/copyDataset.py
+++ b/ocean_dp/aggregation/copyDataset.py
@@ -244,8 +244,8 @@ def aggregate(files, varNames):
filen = 0
- # variables we want regardless
- varNames += ['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH']
+ # variables we want regardless
+ varNames = [varNames]+['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH']
# remove any duplicates
varNamesOut = set(varNames)
From 7145c3ebd80702ece428d2d6d07afc53c166371b Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 22 Jan 2020 15:08:31 +1100
Subject: [PATCH 04/59] Create add_interp_press.m
---
matlab/add_interp_press.m | 125 ++++++++++++++++++++++++++++++++++++++
1 file changed, 125 insertions(+)
create mode 100644 matlab/add_interp_press.m
diff --git a/matlab/add_interp_press.m b/matlab/add_interp_press.m
new file mode 100644
index 0000000..6b139ce
--- /dev/null
+++ b/matlab/add_interp_press.m
@@ -0,0 +1,125 @@
+% SOTS Pressure interpolator
+
+% This code imports pressure data from an aggregated file (constructed by
+% P.Jansen), and creates interpolated pressure records for FV00 raw
+% instrument files - firstly by interpolating along time series of pressure
+% readings in the aggregate file to find pressures at each time in a
+% particular FV00 file, and secondly by interpolating down nominal depths at
+% each timestamp to find a pressure value for each FV00 timestamp.
+
+% Ben Weeding - ben.weeding.26@gmail.com
+
+%% Load the filenames
+
+fv00_files = dir('*FV00*.nc');
+agg_files = dir('*Aggregate*.nc');
+
+%% Load the pressure data
+
+agg_pres = ncread(agg_files.name,'PRES');
+agg_pres_info = ncinfo(agg_files.name, 'PRES');
+agg_instrument_index = ncread(agg_files.name,'instrument_index');
+agg_nominal_depth = ncread(agg_files.name,'NOMINAL_DEPTH');
+agg_time = ncread(agg_files.name,'TIME');
+
+%% Create a scattered interpolant from the aggregate data
+
+% This was an error, as it interpolated in 2D space rather than twice in 1D
+
+
+% Subsampled every 10 points for speed of execution at this point
+%scat_interp_pres = scatteredInterpolant(agg_time(1:10:end),agg_nominal_depth(agg_instrument_index(1:10:end)+1),agg_pres(1:10:end));
+
+%% Interpolate the pressure and write the data into the FV00 file
+
+% Loop through each of the fv00 files
+for i=1:length(fv00_files)
+
+ disp(fv00_files(i).name)
+
+ % Extract the content from the FV00 file
+ fv00_contents = ncinfo(fv00_files(i).name);
+
+ % Check if the FV00 file contains pressure data, run the interpolation
+ % code if not
+
+ if (sum(contains({fv00_contents.Variables(:).Name}, 'PRES')) == 0)
+
+ % Load the FV00 data requiring pressure
+ %'days since 1950-01-01 00:00:00 UTC' for minilog T
+ fv00_time = ncread(fv00_files(i).name,'TIME');
+ fv00_depth = ncread(fv00_files(i).name,'NOMINAL_DEPTH');
+
+ % Interpolate the agg pressure records at each nominal depth to
+ % provide pressure values at each timestamp in the current FV00
+ % file
+
+ interp_agg_pres = nan(length(agg_nominal_depth),length(fv00_time));
+
+ % Loop through each nominal depth in the aggregate file, and get pressure for the FV00 file's time
+ for j = 1:length(agg_nominal_depth)
+
+ % Select the relevant time and pressures
+
+ time_selection = agg_time(agg_instrument_index == (j-1));
+ pres_selection = agg_pres(agg_instrument_index == (j-1));
+
+ % Interpolate along each nominal depth
+
+ interp_agg_pres(j,:) = interp1(time_selection,pres_selection,fv00_time);
+ end
+
+ % At each timestamp in the FV00 record, interpolate a pressure
+ % value based on the FV00 nominal depth, and the interpolated
+ % pressures in interp_agg_pres. Using spline interpolation here to
+ % deal with NaN values that occur in failed pressure sensors.
+ pres_interp_dummy = nan(size(fv00_time));
+
+ for l = 1:length(fv00_time)
+
+ pres_interp_dummy(l) = interp1(agg_nominal_depth,interp_agg_pres(:,l),fv00_depth,'spline');
+
+ end
+
+ pres_interp = pres_interp_dummy;
+
+ % Create an FV01 version of the current FV00 file
+
+ % Create the new FV01 file name
+
+ fv01_name = strrep(fv00_files(i).name,'FV00','FV01');
+ fv01_name(end-10:end-3)=datestr(now,'yyyymmdd');
+
+ % Write the FV00 data into the FV01 file
+ ncwriteschema(fv01_name, fv00_contents);
+
+ % copy variable data to new file
+ for v = fv00_contents.Variables
+ ncwrite(fv01_name, v.Name, ncread(fv00_files(i).name, v.Name));
+ end
+
+ % Modify the global attributes of the file to record processing,
+ % and add to the file history
+
+ ncwriteatt(fv01_name,'/','file_version','Level 1 - partially processed');
+ hist = ncreadatt(fv00_files(i).name, '/', 'history');
+ ncwriteatt(fv01_name,'/','history',[hist newline datestr(now,'yyyy-mm-dd') ' : Added interpolated pressure from ' agg_files.name]);
+
+ % Add and populate a PRES variable to the FV01 file
+ nccreate(fv01_name, 'PRES', 'Dimensions',{'TIME',size(pres_interp,1)}, 'FillValue',NaN);
+ ncwrite(fv01_name, 'PRES', pres_interp);
+
+ % copy attributes from agg file to output file
+ pres_atts = agg_pres_info.Attributes; % get all attribtes from the aggregate file
+ for k=1:length(pres_atts)
+ if (strcmp(pres_atts(k).Name, '_FillValue') == 0)
+ ncwriteatt(fv01_name, 'PRES', pres_atts(k).Name, pres_atts(k).Value);
+ end
+ end
+
+ % Add the relevant attributes to the PRES variable, including a
+ % comment noting that the data has been linearly interpolated
+ ncwriteatt(fv01_name, 'PRES', 'comment','pressure data has been linearly interpolated from surrounding pressure sensors');
+
+ end
+end
\ No newline at end of file
From 8d81ba9c925d5f6fda53d3e14bb1161cefd6ba21 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 30 Jan 2020 12:28:58 +1100
Subject: [PATCH 05/59] Update add_interp_press.m
---
matlab/add_interp_press.m | 158 ++++++++++++++++++++++++++++++++++----
1 file changed, 142 insertions(+), 16 deletions(-)
diff --git a/matlab/add_interp_press.m b/matlab/add_interp_press.m
index 6b139ce..5e5ff5a 100644
--- a/matlab/add_interp_press.m
+++ b/matlab/add_interp_press.m
@@ -22,13 +22,16 @@
agg_nominal_depth = ncread(agg_files.name,'NOMINAL_DEPTH');
agg_time = ncread(agg_files.name,'TIME');
-%% Create a scattered interpolant from the aggregate data
-
-% This was an error, as it interpolated in 2D space rather than twice in 1D
+% Here we prevent the use of bad data from Pulse 8
+if strfind(fv00_files(1).name,'Pulse-8')
+
+ agg_pres(agg_instrument_index==2 & agg_time+datenum(1950,1,1,0,0,0) >= datenum('30-01-2012 05:00','dd-mm-yyyy HH:MM'))=NaN;
+
+ %agg_pres(agg_instrument_index==2)=NaN;
+
+end
-% Subsampled every 10 points for speed of execution at this point
-%scat_interp_pres = scatteredInterpolant(agg_time(1:10:end),agg_nominal_depth(agg_instrument_index(1:10:end)+1),agg_pres(1:10:end));
%% Interpolate the pressure and write the data into the FV00 file
@@ -54,10 +57,16 @@
% provide pressure values at each timestamp in the current FV00
% file
- interp_agg_pres = nan(length(agg_nominal_depth),length(fv00_time));
+ interp_agg_pres = nan(length(agg_nominal_depth)+1,length(fv00_time));
+
+ % Include a row of zeros to set surface depth as 0 dbar
+
+ interp_agg_pres(1,:) = zeros(size(fv00_time));
+
+ agg_nominal_depth_with_0 = [0; agg_nominal_depth];
% Loop through each nominal depth in the aggregate file, and get pressure for the FV00 file's time
- for j = 1:length(agg_nominal_depth)
+ for j = 1:(length(agg_nominal_depth))
% Select the relevant time and pressures
@@ -66,20 +75,36 @@
% Interpolate along each nominal depth
- interp_agg_pres(j,:) = interp1(time_selection,pres_selection,fv00_time);
- end
+ interp_agg_pres(j+1,:) = interp1(time_selection,pres_selection,fv00_time);
+ end
+
+ % Sort the nominal depths and pressures
+
+ [agg_nominal_depth_with_0,sort_idx] = sort(agg_nominal_depth_with_0);
+
+ interp_agg_pres = interp_agg_pres(sort_idx,:);
+
+
+ % Linearly interpolate at each timestamp to replace NaN values
+
+ interp_agg_pres = fillmissing(interp_agg_pres,'linear','SamplePoints',agg_nominal_depth_with_0);
+
% At each timestamp in the FV00 record, interpolate a pressure
% value based on the FV00 nominal depth, and the interpolated
- % pressures in interp_agg_pres. Using spline interpolation here to
- % deal with NaN values that occur in failed pressure sensors.
- pres_interp_dummy = nan(size(fv00_time));
+ % pressures in interp_agg_pres.
+ pres_interp_dummy = nan(size(fv00_time));
- for l = 1:length(fv00_time)
+
+ for l = 1:length(fv00_time)
+
+ if sum(~isnan(interp_agg_pres(:,l))) > 1
- pres_interp_dummy(l) = interp1(agg_nominal_depth,interp_agg_pres(:,l),fv00_depth,'spline');
+ pres_interp_dummy(l) = interp1(agg_nominal_depth_with_0,interp_agg_pres(:,l),fv00_depth);
- end
+ end
+
+ end
pres_interp = pres_interp_dummy;
@@ -119,7 +144,108 @@
% Add the relevant attributes to the PRES variable, including a
% comment noting that the data has been linearly interpolated
- ncwriteatt(fv01_name, 'PRES', 'comment','pressure data has been linearly interpolated from surrounding pressure sensors');
+ ncwriteatt(fv01_name, 'PRES', 'comment','pressure data has been interpolated from surrounding pressure sensors');
+
+ else
+
+ % Load the FV00 data containing pressure
+ %'days since 1950-01-01 00:00:00 UTC' for minilog T
+ fv00_time = ncread(fv00_files(i).name,'TIME');
+ fv00_depth = ncread(fv00_files(i).name,'NOMINAL_DEPTH');
+ fv00_pres = ncread(fv00_files(i).name,'PRES');
+
+ % Remove bad data in pulse 8
+
+ if strfind(fv00_files(i).name,'Pulse-8-2011-SBE16plusV2-01606330-34m')
+
+ fv00_pres(4442:end) = NaN;
+
+ end
+
+ % Interpolate the agg pressure records at each nominal depth to
+ % provide pressure values at each timestamp in the current FV00
+ % file
+
+ interp_agg_pres = nan(length(agg_nominal_depth)+1,length(fv00_time));
+
+ % Include a row of zeros to set surface depth as 0 dbar
+
+ interp_agg_pres(1,:) = zeros(size(fv00_time));
+
+ agg_nominal_depth_with_0 = [0; agg_nominal_depth];
+
+ % Loop through each nominal depth in the aggregate file, and get pressure for the FV00 file's time
+ for j = 1:(length(agg_nominal_depth))
+
+ % Select the relevant time and pressures
+
+ time_selection = agg_time(agg_instrument_index == (j-1));
+ pres_selection = agg_pres(agg_instrument_index == (j-1));
+
+ % Interpolate along each nominal depth
+
+ interp_agg_pres(j+1,:) = interp1(time_selection,pres_selection,fv00_time);
+ end
+
+ % Sort the nominal depths and pressures
+
+ [agg_nominal_depth_with_0,sort_idx] = sort(agg_nominal_depth_with_0);
+
+ interp_agg_pres = interp_agg_pres(sort_idx,:);
+
+
+ % Linearly interpolate at each timestamp to replace NaN values
+
+ interp_agg_pres = fillmissing(interp_agg_pres,'linear','SamplePoints',agg_nominal_depth_with_0);
+
+ for j = 1:length(fv00_pres)
+
+ if isnan(fv00_pres(j))
+
+ fv00_pres(j) = interp_agg_pres(agg_nominal_depth_with_0==fv00_depth,j);
+
+ end
+
+ end
+
+ % Create an FV01 version of the current FV00 file
+
+ % Create the new FV01 file name
+
+ fv01_name = strrep(fv00_files(i).name,'FV00','FV01');
+ fv01_name(end-10:end-3)=datestr(now,'yyyymmdd');
+
+ % Write the FV00 data into the FV01 file
+ ncwriteschema(fv01_name, fv00_contents);
+
+ % copy variable data to new file
+ for v = fv00_contents.Variables
+ ncwrite(fv01_name, v.Name, ncread(fv00_files(i).name, v.Name));
+ end
+
+ % Modify the global attributes of the file to record processing,
+ % and add to the file history
+
+ ncwriteatt(fv01_name,'/','file_version','Level 1 - partially processed');
+ hist = ncreadatt(fv00_files(i).name, '/', 'history');
+ ncwriteatt(fv01_name,'/','history',[hist newline datestr(now,'yyyy-mm-dd') ' : Filled missing pressure with interpolated pressure from ' agg_files.name]);
+
+ % Add and populate a PRES variable to the FV01 file
+ %nccreate(fv01_name, 'PRES', 'Dimensions',{'TIME',size(fv00_pres,1)}, 'FillValue',NaN);
+ ncwrite(fv01_name, 'PRES', fv00_pres);
+
+ % copy attributes from agg file to output file
+ pres_atts = agg_pres_info.Attributes; % get all attribtes from the aggregate file
+ for k=1:length(pres_atts)
+ if (strcmp(pres_atts(k).Name, '_FillValue') == 0)
+ ncwriteatt(fv01_name, 'PRES', pres_atts(k).Name, pres_atts(k).Value);
+ end
+ end
+
+ % Add the relevant attributes to the PRES variable, including a
+ % comment noting that the data has been linearly interpolated
+ ncwriteatt(fv01_name, 'PRES', 'comment','originally missing pressure data has been interpolated from surrounding pressure sensors');
+
end
end
\ No newline at end of file
From 3e0f9a960df28430a7f0670ccfbd00fc3cb0d776 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 5 Feb 2020 14:45:48 +1100
Subject: [PATCH 06/59] Create pressure_interpolator.py
---
ocean_dp/processing/pressure_interpolator.py | 115 +++++++++++++++++++
1 file changed, 115 insertions(+)
create mode 100755 ocean_dp/processing/pressure_interpolator.py
diff --git a/ocean_dp/processing/pressure_interpolator.py b/ocean_dp/processing/pressure_interpolator.py
new file mode 100755
index 0000000..311d03d
--- /dev/null
+++ b/ocean_dp/processing/pressure_interpolator.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Feb 4 11:05:16 2020
+
+@author: tru050
+"""
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import numpy as np
+import sys
+from netCDF4 import Dataset
+import numpy
+import argparse
+import glob
+import pandas as pd
+import scipy
+
+def pressure_interpolator:
+
+ # Load the filenames of the FV00 files in the current folder
+ fv00_files = glob.glob('*FV00*.nc');
+
+ # Extract the aggregate file data
+ agg = Dataset(glob.glob('*Aggregate*.nc')[0], mode="r")
+
+ # Loop through each of the FV00 files
+ for i in fv00_files:
+
+ # Extract the contents of the current file
+ fv00_contents = Dataset(i, mode="r")
+
+ # Check the current file doesn't contain pressure to run the following
+ # interpolator
+ if not 'PRES' in fv00_contents.variables:
+
+ # Create a NaN array to fill with pressure values
+ interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv00_contents.variables["TIME"])),np.nan)
+
+ # Set the first row as zeros to set 0m as 0dbar
+ interp_agg_pres[0,:] = 0
+
+ # Set the last row to 5000 to set 5000m as 5000dbar (~seafloor),
+ # only for interpolation in cases where the deepest sensor has failed
+ #interp_agg_pres[-1,:] = 5000
+
+ # Create a new array representing the nominal depths of the agg file,
+ # including the 0m values
+ agg_nominal_depths = np.insert(np.array(agg.variables["NOMINAL_DEPTH"][:]),0,0)
+
+ # For each nominal depth, interpolate the agg data at the FV00 times
+ for j in range(1,len(agg_nominal_depths)):
+
+ time_selection = agg.variables["TIME"][agg.variables["instrument_index"][:]==(j-1)]
+
+ pres_selection = agg.variables["PRES"][agg.variables["instrument_index"][:]==(j-1)]
+
+ interp_agg_pres[j,:] = np.interp(fv00_contents.variables["TIME"][:],time_selection,pres_selection)
+
+ # Sort the nominal depths and pressures according to nominal depth
+ interp_agg_pres = interp_agg_pres[np.argsort(agg_nominal_depths),:]
+
+ agg_nominal_depths.sort()
+
+ # If there are any NaN values, linearly interpolate profilewise
+ if np.isnan(np.sum(interp_agg_pres)):
+
+ # Make a dataframe of the interpolated pressure to handle NaNs easily
+ interp_agg_pres_df = pd.DataFrame(data=interp_agg_pres,index=agg_nominal_depths)
+
+ # Find all the columns where the lowest element is NaN
+ nan_cols = interp_agg_pres_df[interp_agg_pres_df[-1:].isna()].tolist()
+
+ # Select each column containing an NaN as the deepest value
+ for j in nan_cols:
+
+ # Find the shallowest nominal depth that isn't NaN
+ shallowest_val = pd.Series.last_valid_index(interp_agg_pres_df.iloc[:,j])
+
+ # Find the index of that nominal depth
+ shallowest_idx = interp_agg_pres_df.index.tolist().index(shallowest_val)
+
+ # Starting at the shallowest NaN in a continous block of NaNs to the bottom
+ for k in range(shallowest_idx+1,len(interp_agg_pres_df)):
+
+ # Linearly interpolate from shallow to deep, based on a nominal depth difference of 1m equating to 1dbar
+ interp_agg_pres_df.iloc[k,j] = interp_agg_pres_df.iloc[k-1,j]+np.diff(interp_agg_pres_df.index)[k-1]
+
+ # Linearly interpolate any remaining NaNs
+ interp_agg_pres_df = interp_agg_pres_df.interpolate(method="index")
+
+ # Convert the DataFrame back to an array
+ interp_agg_pres = interp_agg_pres_df.to_numpy()
+
+ # Create a NaN array to receive the FV00 interpolated pressures
+ interp_fv00_pres = np.full((np.shape(fv00_contents.variables["TIME"][:])),np.nan)
+
+ # At each timestamp, interpolate pressure for the FV00 data
+ for j in range(len(fv00_contents.variables["TIME"])):
+
+ interp_fv00_pres[j] = np.interp(fv00_contents.variables["NOMINAL_DEPTH"][0],agg_nominal_depths,interp_agg_pres[:,j])
+
+
+
+
+
+
+
+
+
+
From dde748f5b90ff75b8f6874584a07e03b294fd678 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 5 Feb 2020 14:45:56 +1100
Subject: [PATCH 07/59] Update add_interp_press.m
---
matlab/add_interp_press.m | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/matlab/add_interp_press.m b/matlab/add_interp_press.m
index 5e5ff5a..5ba9817 100644
--- a/matlab/add_interp_press.m
+++ b/matlab/add_interp_press.m
@@ -134,6 +134,30 @@
nccreate(fv01_name, 'PRES', 'Dimensions',{'TIME',size(pres_interp,1)}, 'FillValue',NaN);
ncwrite(fv01_name, 'PRES', pres_interp);
+ % Add quality control variables to the FV01 file, assigning 8 to
+ % interpolated data in line with Argo
+ for v = fv00_contents.Variables
+
+ if ~isempty(v.Dimensions)
+
+ nccreate(fv01_name, v.Name + "_quality_control",'Dimensions',{v.Dimensions.Name,v.Dimensions.Length},'FillValue',99);
+
+ ncwriteatt(fv01_name,v.Name + "_quality_control",'long_name',"quality_code for"+v.Name);
+
+ ncwriteatt(fv01_name,v.Name,'ancillary_variables',v.Name + "_quality_control");
+
+ if contains(v.Name,'PRES')
+
+ ncwrite(fv01_name, v.Name + "_quality_control",8*ones(size(fv00_time)));
+
+ end
+
+ end
+
+ end
+
+
+
% copy attributes from agg file to output file
pres_atts = agg_pres_info.Attributes; % get all attribtes from the aggregate file
for k=1:length(pres_atts)
From b7afd52237c021633a0992603e9057ae2f6a930c Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 5 Feb 2020 14:46:00 +1100
Subject: [PATCH 08/59] Update copyDataset.py
---
ocean_dp/aggregation/copyDataset.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/ocean_dp/aggregation/copyDataset.py b/ocean_dp/aggregation/copyDataset.py
index 154757f..d23fe7d 100644
--- a/ocean_dp/aggregation/copyDataset.py
+++ b/ocean_dp/aggregation/copyDataset.py
@@ -15,7 +15,6 @@
# similar more general tool project https://ncagg.readthedocs.io/en/latest/ (does not work on python3 2019-10-01)
# has configurable way of dealing with attributes
-
# file sets to test against
# http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/NRS/NRSKAI/Temperature/catalog.html
# http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/NRS/NRSKAI/Biogeochem_profiles/catalog.html
@@ -344,7 +343,7 @@ def aggregate(files, varNames):
dMin = maVariableAll.max(0)
ncOut.setncattr("geospatial_vertical_max", dMax)
ncOut.setncattr("geospatial_vertical_min", dMin)
-
+
dsIn.close() # we're done with the varList now
ncOut.close()
From a9c85a9d413dccd7ddfb651efab7ce1a68656d1f Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 5 Feb 2020 14:46:02 +1100
Subject: [PATCH 09/59] Create flatline_test.py
---
ocean_dp/qc/flatline_test.py | 153 +++++++++++++++++++++++++++++++++++
1 file changed, 153 insertions(+)
create mode 100755 ocean_dp/qc/flatline_test.py
diff --git a/ocean_dp/qc/flatline_test.py b/ocean_dp/qc/flatline_test.py
new file mode 100755
index 0000000..a4e4ec5
--- /dev/null
+++ b/ocean_dp/qc/flatline_test.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Feb 3 14:10:41 2020
+
+@author: tru050
+"""
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+#!/usr/bin/python3
+
+# add_qc_flags
+# Copyright (C) 2020 Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# add QC variables to file
+
+
+def add_qc(netCDFfile):
+
+ new_name = [] # list of new file names
+
+ # loop over all file names given
+ for fn in netCDFfile[1:]:
+ ds = Dataset(fn, 'a')
+
+ # read the variable names from the netCDF dataset
+ vars = ds.variables
+
+ # create a list of variables, don't include the 'TIME' variable
+ # TODO: detect 'TIME' variable using the standard name 'time'
+ to_add = []
+ for v in vars:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+
+ # for each variable, add a new ancillary variable _quality_control to each which has 'TIME' as a dimension
+ for v in to_add:
+ if "TIME" in vars[v].dimensions:
+ # print("time dim ", v)
+
+ ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True) # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
+ ncVarOut[:] = np.zeros(vars[v].shape)
+ ncVarOut.long_name = "quality_code for " + v
+
+ vars[v].ancillary_variables = v + "_quality_control"
+
+ # update the file version attribute
+ ds.file_version = "Level 1 - Quality Controlled Data"
+
+ ds.close()
+
+ # rename the file FV00 to FV01 (imos specific)
+ fn_new = fn.replace("FV00", "FV01")
+ new_name.append(fn_new)
+
+ if fn_new != fn:
+ # copy file
+ os.copy(fn, fn_new)
+
+ print(fn_new)
+
+ return new_name
+
+
+if __name__ == "__main__":
+ add_qc(sys.argv)
+
+##############################################################################
+
+def flatline_test(*target_files,target_vars=[],window=3):
+
+ # If files aren't specified, take all the .nc files in the current folder
+ if not target_files:
+
+ target_files = glob.glob('*.nc')
+
+ # Loop through each files in target_files
+ for current_file in target_files:
+
+
+ # Print each filename
+ print("input file %s" % current_file)
+
+ # Extract netcdf data into nc
+ nc = Dataset(current_file, mode="r")
+
+ # Extract time
+ ncTime = nc.get_variables_by_attributes(standard_name='time')
+
+ # If target_vars aren't user specified, set it to all the variables of
+ # the current_file, removing TIME
+ if target_vars == []:
+
+ target_vars = list(nc.variables.keys())
+
+ target_vars.remove('TIME')
+
+ # Check if file contains quality control variables, and if not create
+
+ if not any("_quality_control" in i for i in target_vars:
+
+ # insert _quality_control variables into file?
+ # should this be done now, or should we assume it
+ # will have already been done?
+
+
+ # For each variable, extract the data
+ for current_var in target_vars:
+
+ var_data = np.array(nc.variables[current_var])
+
+ for i in 0:(len(var_data)-window+1):
+
+ # This is true if 'window' elements in a row are equal
+ if len(set(var_data[i:(i+window)])) == 1
+
+ # set corresponding QC value to...
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
From 1193875bda18d662cef5ca5bccf42f48f65032f1 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 12 Feb 2020 16:03:17 +1100
Subject: [PATCH 10/59] Selects deployed data
---
ocean_dp/processing/pressure_interpolator.py | 75 +++++++++++-
ocean_dp/qc/add_qc_flags.py | 61 +++++++---
ocean_dp/qc/select_in_water.py | 114 +++++++++++++++++++
3 files changed, 227 insertions(+), 23 deletions(-)
create mode 100755 ocean_dp/qc/select_in_water.py
diff --git a/ocean_dp/processing/pressure_interpolator.py b/ocean_dp/processing/pressure_interpolator.py
index 311d03d..cb929af 100755
--- a/ocean_dp/processing/pressure_interpolator.py
+++ b/ocean_dp/processing/pressure_interpolator.py
@@ -19,6 +19,8 @@
import glob
import pandas as pd
import scipy
+import os
+import shutil
def pressure_interpolator:
@@ -44,10 +46,6 @@ def pressure_interpolator:
# Set the first row as zeros to set 0m as 0dbar
interp_agg_pres[0,:] = 0
- # Set the last row to 5000 to set 5000m as 5000dbar (~seafloor),
- # only for interpolation in cases where the deepest sensor has failed
- #interp_agg_pres[-1,:] = 5000
-
# Create a new array representing the nominal depths of the agg file,
# including the 0m values
agg_nominal_depths = np.insert(np.array(agg.variables["NOMINAL_DEPTH"][:]),0,0)
@@ -103,9 +101,76 @@ def pressure_interpolator:
for j in range(len(fv00_contents.variables["TIME"])):
interp_fv00_pres[j] = np.interp(fv00_contents.variables["NOMINAL_DEPTH"][0],agg_nominal_depths,interp_agg_pres[:,j])
-
+
+ # Use methods from add_qc_flags to make a new netcdf?
+
+ # Deal with files that already contain pressure, but may contain NaNs
+ else:
+ # Create a NaN array to fill with pressure values
+ interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv00_contents.variables["TIME"])),np.nan)
+
+ # Set the first row as zeros to set 0m as 0dbar
+ interp_agg_pres[0,:] = 0
+
+ # Set the last row to 5000 to set 5000m as 5000dbar (~seafloor),
+ # only for interpolation in cases where the deepest sensor has failed
+ #interp_agg_pres[-1,:] = 5000
+
+ # Create a new array representing the nominal depths of the agg file,
+ # including the 0m values
+ agg_nominal_depths = np.insert(np.array(agg.variables["NOMINAL_DEPTH"][:]),0,0)
+
+ # For each nominal depth, interpolate the agg data at the FV00 times
+ for j in range(1,len(agg_nominal_depths)):
+
+ time_selection = agg.variables["TIME"][agg.variables["instrument_index"][:]==(j-1)]
+
+ pres_selection = agg.variables["PRES"][agg.variables["instrument_index"][:]==(j-1)]
+
+ interp_agg_pres[j,:] = np.interp(fv00_contents.variables["TIME"][:],time_selection,pres_selection)
+
+ # Sort the nominal depths and pressures according to nominal depth
+ interp_agg_pres = interp_agg_pres[np.argsort(agg_nominal_depths),:]
+
+ agg_nominal_depths.sort()
+ # If there are any NaN values, linearly interpolate profilewise
+ if np.isnan(np.sum(interp_agg_pres)):
+
+ # Make a dataframe of the interpolated pressure to handle NaNs easily
+ interp_agg_pres_df = pd.DataFrame(data=interp_agg_pres,index=agg_nominal_depths)
+
+ # Find all the columns where the lowest element is NaN
+ nan_cols = interp_agg_pres_df[interp_agg_pres_df[-1:].isna()].tolist()
+
+ # Select each column containing an NaN as the deepest value
+ for j in nan_cols:
+
+ # Find the shallowest nominal depth that isn't NaN
+ shallowest_val = pd.Series.last_valid_index(interp_agg_pres_df.iloc[:,j])
+
+ # Find the index of that nominal depth
+ shallowest_idx = interp_agg_pres_df.index.tolist().index(shallowest_val)
+ # Starting at the shallowest NaN in a continous block of NaNs to the bottom
+ for k in range(shallowest_idx+1,len(interp_agg_pres_df)):
+
+ # Linearly interpolate from shallow to deep, based on a nominal depth difference of 1m equating to 1dbar
+ interp_agg_pres_df.iloc[k,j] = interp_agg_pres_df.iloc[k-1,j]+np.diff(interp_agg_pres_df.index)[k-1]
+
+ # Linearly interpolate any remaining NaNs
+ interp_agg_pres_df = interp_agg_pres_df.interpolate(method="index")
+
+ # Convert the DataFrame back to an array
+ interp_agg_pres = interp_agg_pres_df.to_numpy()
+
+ # Create a NaN array to receive the FV00 interpolated pressures
+ interp_fv00_pres = np.full((np.shape(fv00_contents.variables["TIME"][:])),np.nan)
+
+ # Extract the interpolated pressures (NaNs removed) to store in netCDF4
+ interp_fv00_pres = interp_agg_pres_df[interp_agg_pres_df.index==fv00_contents.variables["NOMINAL_DEPTH"][:]]
+
+ #
diff --git a/ocean_dp/qc/add_qc_flags.py b/ocean_dp/qc/add_qc_flags.py
index 7d00c63..bd68234 100644
--- a/ocean_dp/qc/add_qc_flags.py
+++ b/ocean_dp/qc/add_qc_flags.py
@@ -18,11 +18,12 @@
from netCDF4 import Dataset, num2date
import sys
-
+from datetime import datetime
import numpy as np
from dateutil import parser
import pytz
import os
+import shutil
# add QC variables to file
@@ -32,8 +33,31 @@ def add_qc(netCDFfile):
new_name = [] # list of new file names
# loop over all file names given
- for fn in netCDFfile[1:]:
- ds = Dataset(fn, 'a')
+ for fn in netCDFfile:
+
+ # rename the file FV00 to FV01 (imos specific)
+ fn_new = fn.replace("FV00", "FV01")
+
+ # Change the creation date in the filename to today
+ now=datetime.utcnow()
+
+
+
+ fn_new = "".join((fn_new[0:-11],now.strftime("%Y%m%d"),fn_new[-3::]))
+
+ # Add the new file name to the list of new file names
+ new_name.append(fn_new)
+
+ # If a new (different) filename has been successfully generated, make
+ # a copy of the old file with the new name
+ if fn_new != fn:
+ # copy file
+ shutil.copy(fn, fn_new)
+
+
+ print(fn_new)
+
+ ds = Dataset(fn_new, 'a')
# read the variable names from the netCDF dataset
vars = ds.variables
@@ -51,29 +75,30 @@ def add_qc(netCDFfile):
if "TIME" in vars[v].dimensions:
# print("time dim ", v)
- ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True) # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
- ncVarOut[:] = np.zeros(vars[v].shape)
- ncVarOut.long_name = "quality_code for " + v
+ if v+"_quality_control" not in ds.variables:
+ ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True) # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
+ ncVarOut[:] = np.zeros(vars[v].shape)
+ ncVarOut.long_name = "quality_code for " + v
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9])
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
- vars[v].ancillary_variables = v + "_quality_control"
+ vars[v].ancillary_variables = v + "_quality_control"
- # update the file version attribute
+ # update the global attributes
ds.file_version = "Level 1 - Quality Controlled Data"
+
+ ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ ds.history += ' ' + now.strftime("%Y%m%d:") + ' converted to FV01 file, quality_control variables added.'
- ds.close()
-
- # rename the file FV00 to FV01 (imos specific)
- fn_new = fn.replace("FV00", "FV01")
- new_name.append(fn_new)
+ # ADD quality control attributes!!
- if fn_new != fn:
- # copy file
- os.copy(fn, fn_new)
+ ds.close()
- print(fn_new)
return new_name
if __name__ == "__main__":
- add_qc(sys.argv)
+ add_qc(sys.argv[1:])
\ No newline at end of file
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
new file mode 100755
index 0000000..b600695
--- /dev/null
+++ b/ocean_dp/qc/select_in_water.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 12 09:49:26 2020
+
+@author: tru050
+"""
+from dateutil.parser import parse
+from netCDF4 import Dataset, num2date, date2num
+from datetime import datetime, timedelta
+import sys
+from datetime import datetime
+import numpy as np
+from dateutil import parser
+import pytz
+import os
+import shutil
+
+def select_in_water(netCDFfile):
+
+ new_name = [] # list of new file names
+
+ # loop over all file names given
+ for fn in netCDFfile:
+
+ # Change the creation date in the filename to today
+ now=datetime.utcnow()
+
+ fn_new = fn.replace("FV00", "FV01")
+
+ fn_new = "".join((fn_new[0:-11],now.strftime("%Y%m%d"),fn_new[-3::]))
+
+ # Add the new file name to the list of new file names
+ new_name.append(fn_new)
+
+ # Load the original netcdf file
+ ods = Dataset(fn,'a')
+
+ # Extract the time dimension, and the deployment start and end
+ time = np.array(ods.variables['TIME'][:])
+
+ inw = parse(ods.time_deployment_start)
+
+ outw = parse(ods.time_deployment_end)
+
+ # Convert the start and end to the number format used in TIME
+ inw_num = date2num(inw.replace(tzinfo=None),units = ods.variables['TIME'].units)
+
+ outw_num = date2num(outw.replace(tzinfo=None),units = ods.variables['TIME'].units)
+
+ # Create logical index of deployed times
+
+ deployed = np.logical_and(time>=inw_num,time<=outw_num)
+
+ # Determine the length of the new time dimension
+
+ time_dim = len(time[deployed])
+
+ # Create the new netcdf file
+ ds = Dataset(fn_new, "w", format="NETCDF4")
+
+ TIME = ds.createDimension("TIME",time_dim)
+
+ # Copy global attributes
+
+ for att in ods.ncattrs():
+
+ ds.setncattr(att,ods.getncattr(att))
+
+ # Copy variables
+
+ for v_name, varin in ods.variables.items():
+
+ varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
+
+ # Copy variable attributes
+ varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
+
+ # Fill variables with deployed data
+
+ if np.array(varin[:]).size == 1:
+
+ varout[:] = varin[:]
+
+ else:
+
+ varout[:] = np.array(varin[:])[deployed]
+
+ ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ ds.history += ' ' + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
+
+ ds.close()
+
+ ods.close()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
From 774f7dabd15ba1cbdff4e26fe72bb4b3a02fc18a Mon Sep 17 00:00:00 2001
From: Peter Jansen
Date: Wed, 12 Feb 2020 16:30:46 +1100
Subject: [PATCH 11/59] Update select_in_water.py
minor clean ups, should look at using split for file name manipulation.
---
ocean_dp/qc/select_in_water.py | 85 ++++++++++++++++------------------
1 file changed, 39 insertions(+), 46 deletions(-)
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
index b600695..f23df57 100755
--- a/ocean_dp/qc/select_in_water.py
+++ b/ocean_dp/qc/select_in_water.py
@@ -1,10 +1,21 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 12 09:49:26 2020
+#!/usr/bin/python3
+
+# ocean_dp
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
-@author: tru050
-"""
from dateutil.parser import parse
from netCDF4 import Dataset, num2date, date2num
from datetime import datetime, timedelta
@@ -16,19 +27,19 @@
import os
import shutil
-def select_in_water(netCDFfile):
+
+def select_in_water(netCDFfiles):
new_name = [] # list of new file names
# loop over all file names given
- for fn in netCDFfile:
+ for fn in netCDFfiles:
# Change the creation date in the filename to today
now=datetime.utcnow()
- fn_new = fn.replace("FV00", "FV01")
-
- fn_new = "".join((fn_new[0:-11],now.strftime("%Y%m%d"),fn_new[-3::]))
+ fn_new = fn.replace("FV00", "FV01")
+ fn_new = "".join((fn_new[0:-11],now.strftime("%Y%m%d"),fn_new[-3::])) # might be better to use split("-") here, and manybe even a check for IMOS file name
# Add the new file name to the list of new file names
new_name.append(fn_new)
@@ -40,20 +51,16 @@ def select_in_water(netCDFfile):
time = np.array(ods.variables['TIME'][:])
inw = parse(ods.time_deployment_start)
-
outw = parse(ods.time_deployment_end)
# Convert the start and end to the number format used in TIME
inw_num = date2num(inw.replace(tzinfo=None),units = ods.variables['TIME'].units)
-
outw_num = date2num(outw.replace(tzinfo=None),units = ods.variables['TIME'].units)
- # Create logical index of deployed times
-
+ # Create logical index of deployed times
deployed = np.logical_and(time>=inw_num,time<=outw_num)
- # Determine the length of the new time dimension
-
+ # Determine the length of the new time dimension
time_dim = len(time[deployed])
# Create the new netcdf file
@@ -61,14 +68,12 @@ def select_in_water(netCDFfile):
TIME = ds.createDimension("TIME",time_dim)
- # Copy global attributes
-
+ # Copy global attributes
for att in ods.ncattrs():
ds.setncattr(att,ods.getncattr(att))
- # Copy variables
-
+ # Copy variables
for v_name, varin in ods.variables.items():
varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
@@ -76,8 +81,7 @@ def select_in_water(netCDFfile):
# Copy variable attributes
varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
- # Fill variables with deployed data
-
+ # Fill variables with deployed data
if np.array(varin[:]).size == 1:
varout[:] = varin[:]
@@ -86,29 +90,18 @@ def select_in_water(netCDFfile):
varout[:] = np.array(varin[:])[deployed]
- ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+ ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
- ds.history += ' ' + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
-
- ds.close()
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
+ ds.history += hist + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
+ ds.close()
ods.close()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
+
+
+if __name__ == "__main__":
+ select_in_water(sys.argv[1:])
From 40e89b2f800d0fd7c5d2241b78140316fdf9133f Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 13 Feb 2020 15:39:04 +1100
Subject: [PATCH 12/59] Update select_in_water.py
---
ocean_dp/qc/select_in_water.py | 122 ++++++++++++++++++---------------
1 file changed, 65 insertions(+), 57 deletions(-)
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
index f23df57..6d5b7fb 100755
--- a/ocean_dp/qc/select_in_water.py
+++ b/ocean_dp/qc/select_in_water.py
@@ -35,72 +35,80 @@ def select_in_water(netCDFfiles):
# loop over all file names given
for fn in netCDFfiles:
- # Change the creation date in the filename to today
- now=datetime.utcnow()
+ # Check the file is an IMOS formatted file
+ if fn.split('_')[0]=='IMOS'
- fn_new = fn.replace("FV00", "FV01")
- fn_new = "".join((fn_new[0:-11],now.strftime("%Y%m%d"),fn_new[-3::])) # might be better to use split("-") here, and manybe even a check for IMOS file name
-
- # Add the new file name to the list of new file names
- new_name.append(fn_new)
+ # Change the creation date in the filename to today
+ now=datetime.utcnow()
- # Load the original netcdf file
- ods = Dataset(fn,'a')
-
- # Extract the time dimension, and the deployment start and end
- time = np.array(ods.variables['TIME'][:])
-
- inw = parse(ods.time_deployment_start)
- outw = parse(ods.time_deployment_end)
-
- # Convert the start and end to the number format used in TIME
- inw_num = date2num(inw.replace(tzinfo=None),units = ods.variables['TIME'].units)
- outw_num = date2num(outw.replace(tzinfo=None),units = ods.variables['TIME'].units)
-
- # Create logical index of deployed times
- deployed = np.logical_and(time>=inw_num,time<=outw_num)
-
- # Determine the length of the new time dimension
- time_dim = len(time[deployed])
-
- # Create the new netcdf file
- ds = Dataset(fn_new, "w", format="NETCDF4")
-
- TIME = ds.createDimension("TIME",time_dim)
-
- # Copy global attributes
- for att in ods.ncattrs():
+ fn_new = fn.replace("FV00", "FV01")
- ds.setncattr(att,ods.getncattr(att))
-
- # Copy variables
- for v_name, varin in ods.variables.items():
+ fn_new_split = fn_new.split('_')
- varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
-
- # Copy variable attributes
- varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
-
- # Fill variables with deployed data
- if np.array(varin[:]).size == 1:
-
- varout[:] = varin[:]
+ fn_new_split[-1] = "C-" + now.strftime("%Y%m%d")
+
+ fn_new = '_'.join(fn_new_split)
+
+ # Add the new file name to the list of new file names
+ new_name.append(fn_new)
- else:
+ # Load the original netcdf file
+ ods = Dataset(fn,'a')
+
+ # Extract the time dimension, and the deployment start and end
+ time = np.array(ods.variables['TIME'][:])
+
+ inw = parse(ods.time_deployment_start)
+ outw = parse(ods.time_deployment_end)
+
+ # Convert the start and end to the number format used in TIME
+ inw_num = date2num(inw.replace(tzinfo=None),units = ods.variables['TIME'].units)
+ outw_num = date2num(outw.replace(tzinfo=None),units = ods.variables['TIME'].units)
+
+ # Create logical index of deployed times
+ deployed = np.logical_and(time>=inw_num,time<=outw_num)
+
+ # Determine the length of the new time dimension
+ time_dim = len(time[deployed])
+
+ # Create the new netcdf file
+ ds = Dataset(fn_new, "w", format="NETCDF4")
+
+ TIME = ds.createDimension("TIME",time_dim)
+
+ # Copy global attributes
+ for att in ods.ncattrs():
- varout[:] = np.array(varin[:])[deployed]
+ ds.setncattr(att,ods.getncattr(att))
+
+ # Copy variables
+ for v_name, varin in ods.variables.items():
- ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+ varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
- # update the history attribute
- try:
- hist = ds.history + "\n"
- except AttributeError:
- hist = ""
- ds.history += hist + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
+ # Copy variable attributes
+ varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
- ds.close()
- ods.close()
+ # Fill variables with deployed data
+ if np.array(varin[:]).size == 1:
+
+ varout[:] = varin[:]
+
+ else:
+
+ varout[:] = np.array(varin[:])[deployed]
+
+ ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
+ ds.history += hist + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
+
+ ds.close()
+ ods.close()
if __name__ == "__main__":
From 1e2f36b5818d4f6c97e4c15af5c28baa667a452d Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 13 Feb 2020 15:39:28 +1100
Subject: [PATCH 13/59] Update in_water with split and IMOS test
---
ocean_dp/qc/select_in_water.py | 171 ++++++++++++++++-----------------
ocean_dp/qc/spike_test | 8 ++
2 files changed, 93 insertions(+), 86 deletions(-)
create mode 100755 ocean_dp/qc/spike_test
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
index b600695..6925d12 100755
--- a/ocean_dp/qc/select_in_water.py
+++ b/ocean_dp/qc/select_in_water.py
@@ -1,10 +1,19 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 12 09:49:26 2020
+# ocean_dp
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
-@author: tru050
-"""
from dateutil.parser import parse
from netCDF4 import Dataset, num2date, date2num
from datetime import datetime, timedelta
@@ -16,99 +25,89 @@
import os
import shutil
-def select_in_water(netCDFfile):
+
+def select_in_water(netCDFfiles):
new_name = [] # list of new file names
# loop over all file names given
- for fn in netCDFfile:
-
- # Change the creation date in the filename to today
- now=datetime.utcnow()
+ for fn in netCDFfiles:
- fn_new = fn.replace("FV00", "FV01")
+ # Check the file is an IMOS formatted file
+ if fn.split('_')[0]=='IMOS'
- fn_new = "".join((fn_new[0:-11],now.strftime("%Y%m%d"),fn_new[-3::]))
-
- # Add the new file name to the list of new file names
- new_name.append(fn_new)
+ # Change the creation date in the filename to today
+ now=datetime.utcnow()
- # Load the original netcdf file
- ods = Dataset(fn,'a')
-
- # Extract the time dimension, and the deployment start and end
- time = np.array(ods.variables['TIME'][:])
-
- inw = parse(ods.time_deployment_start)
-
- outw = parse(ods.time_deployment_end)
-
- # Convert the start and end to the number format used in TIME
- inw_num = date2num(inw.replace(tzinfo=None),units = ods.variables['TIME'].units)
-
- outw_num = date2num(outw.replace(tzinfo=None),units = ods.variables['TIME'].units)
-
- # Create logical index of deployed times
-
- deployed = np.logical_and(time>=inw_num,time<=outw_num)
-
- # Determine the length of the new time dimension
-
- time_dim = len(time[deployed])
-
- # Create the new netcdf file
- ds = Dataset(fn_new, "w", format="NETCDF4")
-
- TIME = ds.createDimension("TIME",time_dim)
-
- # Copy global attributes
-
- for att in ods.ncattrs():
+ fn_new = fn.replace("FV00", "FV01")
- ds.setncattr(att,ods.getncattr(att))
-
- # Copy variables
+ fn_new_split = fn_new.split('_')
- for v_name, varin in ods.variables.items():
+ fn_new_split[-1] = "C-" + now.strftime("%Y%m%d")
- varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
-
- # Copy variable attributes
- varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
-
- # Fill variables with deployed data
+ fn_new = '_'.join(fn_new_split)
- if np.array(varin[:]).size == 1:
+ # Add the new file name to the list of new file names
+ new_name.append(fn_new)
- varout[:] = varin[:]
-
- else:
-
- varout[:] = np.array(varin[:])[deployed]
+ # Load the original netcdf file
+ ods = Dataset(fn,'a')
+
+ # Extract the time dimension, and the deployment start and end
+ time = np.array(ods.variables['TIME'][:])
+
+ inw = parse(ods.time_deployment_start)
+ outw = parse(ods.time_deployment_end)
+
+ # Convert the start and end to the number format used in TIME
+ inw_num = date2num(inw.replace(tzinfo=None),units = ods.variables['TIME'].units)
+ outw_num = date2num(outw.replace(tzinfo=None),units = ods.variables['TIME'].units)
+
+ # Create logical index of deployed times
+ deployed = np.logical_and(time>=inw_num,time<=outw_num)
+
+ # Determine the length of the new time dimension
+ time_dim = len(time[deployed])
+
+ # Create the new netcdf file
+ ds = Dataset(fn_new, "w", format="NETCDF4")
+
+ TIME = ds.createDimension("TIME",time_dim)
+
+ # Copy global attributes
+ for att in ods.ncattrs():
- ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
-
- ds.history += ' ' + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
-
- ds.close()
-
- ods.close()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ ds.setncattr(att,ods.getncattr(att))
+ # Copy variables
+ for v_name, varin in ods.variables.items():
+
+ varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
+
+ # Copy variable attributes
+ varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
+
+ # Fill variables with deployed data
+ if np.array(varin[:]).size == 1:
+
+ varout[:] = varin[:]
+
+ else:
+
+ varout[:] = np.array(varin[:])[deployed]
+
+ ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
+ ds.history += hist + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
-
\ No newline at end of file
+ ds.close()
+ ods.close()
+
+
+if __name__ == "__main__":
+ select_in_water(sys.argv[1:])
\ No newline at end of file
diff --git a/ocean_dp/qc/spike_test b/ocean_dp/qc/spike_test
new file mode 100755
index 0000000..0bb13ea
--- /dev/null
+++ b/ocean_dp/qc/spike_test
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 12 16:28:38 2020
+
+@author: tru050
+"""
+
From af33590364f1b1d6a98e90e75e43f2cae9b79378 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Fri, 14 Feb 2020 14:41:11 +1100
Subject: [PATCH 14/59] Create netcdf_gen.py
---
ocean_dp/qc/netcdf_gen.py | 118 ++++++++++++++++++++++++++++++++++++++
1 file changed, 118 insertions(+)
create mode 100755 ocean_dp/qc/netcdf_gen.py
diff --git a/ocean_dp/qc/netcdf_gen.py b/ocean_dp/qc/netcdf_gen.py
new file mode 100755
index 0000000..f687f83
--- /dev/null
+++ b/ocean_dp/qc/netcdf_gen.py
@@ -0,0 +1,118 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from dateutil.parser import parse
+from netCDF4 import Dataset, num2date, date2num
+from datetime import datetime, timedelta
+import sys
+from datetime import datetime
+import numpy as np
+from dateutil import parser
+import pytz
+import os
+import shutil
+
+# Provide the function with a filename (don't include .nc), a nominal depth,
+# and pairs of names and arrays containing the data to be included as variables.
+# A time dimension/variable is created by default, starting at 01/01/2020 using
+# 1 hour timestamps
+
+# For example, netcdf_gen('test',30,'PRES',pres_data,'TEMP',temp_data)
+
+def netcdf_gen(file_name,nominal_depth,*args):
+
+ # Convert the args tuple to a list
+ args = list(args)
+
+ # Check the args are paired
+ if len(args) % 2 == 0:
+
+ # Assign the names and data to lists
+ var_names = args[0::2]
+
+ var_data = args[1::2]
+
+ # Check if first of each pair is a string
+ if all(isinstance(x, str) for x in var_names):
+
+ # Check if second of each pair are all equal in shape
+ if all(np.shape(var_data[1]) == np.shape(x) for x in var_data):
+
+ # Create the netcdf with IMOS tag
+ ds = Dataset("IMOS_" + file_name + ".nc","w", format="NETCDF4")
+
+ # Create time dimension with length to match data
+ time_dim = ds.createDimension("TIME", len(var_data[0]))
+
+ time_var = ds.createVariable("TIME","f8",("TIME"))
+
+ ds.variables['TIME'][:] = np.arange(25567,25567+(1/24)*len(var_data[1]),1/24)
+
+ time_atts = ['long_name','time','units','days since 1950-01-01 00:00:00 UTC',
+ 'calendar','gregorian','axis','T','standard_name','time','valid_max',
+ 90000,'valid_min',0]
+
+ for att_name,att_value in zip(time_atts[0::2],time_atts[1::2]):
+
+ time_var.setncattr(att_name,att_value)
+
+ # Create the nominal depth variable
+ nom_depth_var = ds.createVariable("NOMINAL_DEPTH","f8")
+
+ ds.variables["NOMINAL_DEPTH"] = nominal_depth
+
+ nom_dep_atts = ['long_name','nominal depth','units','m',
+ 'positive','down','axis','Z','standard_name','depth','valid_max',
+ 12000,'valid_min',-5,'reference_datum','sea surface']
+
+ for att_name,att_value in zip(nom_dep_atts[0::2],nom_dep_atts[1::2]):
+
+ nom_depth_var.setncattr(att_name,att_value)
+
+ # Create variables from input data
+ for name_in,data_in in zip(var_names,var_data):
+
+ ds.createVariable(name_in,"f8",("TIME"))
+
+ ds.variables[name_in][:] = data_in
+
+ ds.close()
+
+ else:
+ print('Data arrays not of equal length')
+
+
+ else:
+ print('Labels not in string format')
+
+ else:
+ print('Data not passed in pairs')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From c61b8ba5ddd223ef3d32db43683a327e92a4b57a Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Mon, 17 Feb 2020 16:19:55 +1100
Subject: [PATCH 15/59] Update select_in_water.py
---
ocean_dp/qc/select_in_water.py | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
index 02ccb49..68f57be 100755
--- a/ocean_dp/qc/select_in_water.py
+++ b/ocean_dp/qc/select_in_water.py
@@ -27,6 +27,7 @@
import os
import shutil
+# Submit argument as a list
def select_in_water(netCDFfiles):
@@ -36,7 +37,7 @@ def select_in_water(netCDFfiles):
for fn in netCDFfiles:
# Check the file is an IMOS formatted file
- if fn.split('_')[0]=='IMOS'
+ if fn.split('_')[0]=='IMOS':
# Change the creation date in the filename to today
now=datetime.utcnow()
@@ -45,7 +46,7 @@ def select_in_water(netCDFfiles):
fn_new_split = fn_new.split('_')
- fn_new_split[-1] = "C-" + now.strftime("%Y%m%d")
+ fn_new_split[-1] = "C-" + now.strftime("%Y%m%d") + ".nc"
fn_new = '_'.join(fn_new_split)
@@ -100,6 +101,12 @@ def select_in_water(netCDFfiles):
ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+ # update the time coverage attributes
+
+ ds.time_coverage_start = ods.time_deployment_start
+
+ ds.time_coverage_end = ods.time_deployment_end
+
# update the history attribute
try:
hist = ds.history + "\n"
From 04badbfce75d0df9f21055b11854ae33516d81cd Mon Sep 17 00:00:00 2001
From: Peter Jansen
Date: Tue, 18 Feb 2020 09:31:04 +1100
Subject: [PATCH 16/59] Update select_in_water.py
Remove some white space,
able to operate on non IMOS file names,
TODO: find TIME dimension (or the dimension of the TIME variable)
---
ocean_dp/qc/select_in_water.py | 148 ++++++++++++++++-----------------
1 file changed, 72 insertions(+), 76 deletions(-)
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
index 68f57be..5118ae8 100755
--- a/ocean_dp/qc/select_in_water.py
+++ b/ocean_dp/qc/select_in_water.py
@@ -32,91 +32,87 @@
def select_in_water(netCDFfiles):
new_name = [] # list of new file names
+ now = datetime.utcnow()
# loop over all file names given
for fn in netCDFfiles:
# Check the file is an IMOS formatted file
- if fn.split('_')[0]=='IMOS':
-
+ if fn.split('_')[0]=='IMOS':
+ fn_new = fn_new.replace("FV00", "FV01")
+ fn_new_split = fn_new.split('_')
# Change the creation date in the filename to today
- now=datetime.utcnow()
-
- fn_new = fn.replace("FV00", "FV01")
-
- fn_new_split = fn_new.split('_')
-
- fn_new_split[-1] = "C-" + now.strftime("%Y%m%d") + ".nc"
-
+ fn_new_split[-1] = "C-" + now.strftime("%Y%m%d") + ".nc"
fn_new = '_'.join(fn_new_split)
+ else:
+ fn_new = fn.replace(".nc", "-trim.nc")
- # Add the new file name to the list of new file names
- new_name.append(fn_new)
-
- # Load the original netcdf file
- ods = Dataset(fn,'a')
-
- # Extract the time dimension, and the deployment start and end
- time = np.array(ods.variables['TIME'][:])
-
- inw = parse(ods.time_deployment_start)
- outw = parse(ods.time_deployment_end)
-
- # Convert the start and end to the number format used in TIME
- inw_num = date2num(inw.replace(tzinfo=None),units = ods.variables['TIME'].units)
- outw_num = date2num(outw.replace(tzinfo=None),units = ods.variables['TIME'].units)
-
- # Create logical index of deployed times
- deployed = np.logical_and(time>=inw_num,time<=outw_num)
-
- # Determine the length of the new time dimension
- time_dim = len(time[deployed])
-
- # Create the new netcdf file
- ds = Dataset(fn_new, "w", format="NETCDF4")
-
- TIME = ds.createDimension("TIME",time_dim)
-
- # Copy global attributes
- for att in ods.ncattrs():
-
- ds.setncattr(att,ods.getncattr(att))
-
- # Copy variables
- for v_name, varin in ods.variables.items():
-
- varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
-
- # Copy variable attributes
- varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
+ # Add the new file name to the list of new file names
+ new_name.append(fn_new)
+
+ # Load the original netcdf file
+ ods = Dataset(fn,'a')
+
+ # Extract the time dimension, and the deployment start and end
+ # TODO: check this works
+ v = nc.get_variables_by_attributes(standard_name='time')
+ time = np.array(v[0][:])
+
+ inw = parse(ods.time_deployment_start)
+ outw = parse(ods.time_deployment_end)
+
+ # Convert the start and end to the number format used in TIME
+ inw_num = date2num(inw.replace(tzinfo=None), units=ods.variables['TIME'].units)
+ outw_num = date2num(outw.replace(tzinfo=None), units=ods.variables['TIME'].units)
+
+ # Create logical index of deployed times
+ deployed = np.logical_and(time>=inw_num, time<=outw_num)
+
+ # Determine the length of the new time dimension
+ time_dim_len = len(time[deployed])
+
+ # Create the new netcdf file
+ ds = Dataset(fn_new, "w", format="NETCDF4")
+
+ new_time_dim = ds.createDimension("TIME", time_dim_len)
+
+ # Copy global attributes
+ for att in ods.ncattrs():
+ ds.setncattr(att, ods.getncattr(att))
+
+ # Copy variables
+ for v_name, varin in ods.variables.items():
+
+ varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
+
+ # Copy variable attributes
+ varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
+
+ # Fill variables with deployed data
+ # TODO: should check if the dimensions for the variable include TIME, and truncate that dimension
+ if np.array(varin[:]).size == 1:
+ varout[:] = varin[:]
+ else:
+ varout[:] = np.array(varin[:])[deployed]
+
+ ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ # update the time coverage attributes
+ ds.time_coverage_start = ods.time_deployment_start
+ ds.time_coverage_end = ods.time_deployment_end
+
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
+ ds.history += hist + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
+
+ ds.close()
+ ods.close()
- # Fill variables with deployed data
- if np.array(varin[:]).size == 1:
-
- varout[:] = varin[:]
-
- else:
-
- varout[:] = np.array(varin[:])[deployed]
-
- ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
-
- # update the time coverage attributes
-
- ds.time_coverage_start = ods.time_deployment_start
-
- ds.time_coverage_end = ods.time_deployment_end
-
- # update the history attribute
- try:
- hist = ds.history + "\n"
- except AttributeError:
- hist = ""
- ds.history += hist + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'
-
- ds.close()
- ods.close()
+ return new_name
if __name__ == "__main__":
- select_in_water(sys.argv[1:])
\ No newline at end of file
+ select_in_water(sys.argv[1:])
From e5559ca5e31d0dd90edcd2b1748c6523eac34976 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 19 Feb 2020 11:25:52 +1100
Subject: [PATCH 17/59] Update pressure_interpolator.py
---
ocean_dp/processing/pressure_interpolator.py | 184 ++++++++++++-------
1 file changed, 122 insertions(+), 62 deletions(-)
diff --git a/ocean_dp/processing/pressure_interpolator.py b/ocean_dp/processing/pressure_interpolator.py
index cb929af..ed764c3 100755
--- a/ocean_dp/processing/pressure_interpolator.py
+++ b/ocean_dp/processing/pressure_interpolator.py
@@ -22,26 +22,54 @@
import os
import shutil
-def pressure_interpolator:
-
- # Load the filenames of the FV00 files in the current folder
- fv00_files = glob.glob('*FV00*.nc');
+# Supply netCDFfiles as a ['list'] of files, agg as a 'string'
- # Extract the aggregate file data
- agg = Dataset(glob.glob('*Aggregate*.nc')[0], mode="r")
+def pressure_interpolator(netCDFfiles = None,agg = None):
+
+ if netCDFfiles==None:
- # Loop through each of the FV00 files
- for i in fv00_files:
+ # Load the filenames of the fv01 files in the current folder
+ netCDFfiles = glob.glob('*FV01*.nc')
+
+ if agg ==None:
- # Extract the contents of the current file
- fv00_contents = Dataset(i, mode="r")
+ # Extract the aggregate file data
+ agg = Dataset(glob.glob('*Aggregate*.nc')[0], mode="r")
+
+ else:
+
+ agg = Dataset(glob.glob(agg)[0], mode="r")
+
+ # Loop through each of the fv01 files
+ for fn in netCDFfiles:
+
+ # Change the creation date in the filename to today
+ now=datetime.utcnow()
+
+ fn_new_split = fn.split('_')
+
+ fn_new_split[-1] = "C-" + now.strftime("%Y%m%d") + ".nc"
+
+ fn_new_split[2] += 'IP'
+
+ fn_new = '_'.join(fn_new_split)
+
+
+ # If a new (different) filename has been successfully generated, make
+ # a copy of the old file with the new name
+ if fn_new != fn:
+ # copy file
+ shutil.copy(fn, fn_new)
+
+ # Open and work in the new copy
+ fv01_contents = Dataset(fn_new,mode='a')
# Check the current file doesn't contain pressure to run the following
# interpolator
- if not 'PRES' in fv00_contents.variables:
+ if not 'PRES' in fv01_contents.variables:
# Create a NaN array to fill with pressure values
- interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv00_contents.variables["TIME"])),np.nan)
+ interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv01_contents.variables["TIME"])),np.nan)
# Set the first row as zeros to set 0m as 0dbar
interp_agg_pres[0,:] = 0
@@ -50,14 +78,14 @@ def pressure_interpolator:
# including the 0m values
agg_nominal_depths = np.insert(np.array(agg.variables["NOMINAL_DEPTH"][:]),0,0)
- # For each nominal depth, interpolate the agg data at the FV00 times
+ # For each nominal depth, interpolate the agg data at the fv01 times
for j in range(1,len(agg_nominal_depths)):
time_selection = agg.variables["TIME"][agg.variables["instrument_index"][:]==(j-1)]
pres_selection = agg.variables["PRES"][agg.variables["instrument_index"][:]==(j-1)]
- interp_agg_pres[j,:] = np.interp(fv00_contents.variables["TIME"][:],time_selection,pres_selection)
+ interp_agg_pres[j,:] = np.interp(fv01_contents.variables["TIME"][:],time_selection,pres_selection)
# Sort the nominal depths and pressures according to nominal depth
interp_agg_pres = interp_agg_pres[np.argsort(agg_nominal_depths),:]
@@ -71,7 +99,7 @@ def pressure_interpolator:
interp_agg_pres_df = pd.DataFrame(data=interp_agg_pres,index=agg_nominal_depths)
# Find all the columns where the lowest element is NaN
- nan_cols = interp_agg_pres_df[interp_agg_pres_df[-1:].isna()].tolist()
+ nan_cols = np.where(interp_agg_pres_df.iloc[-1].isna())
# Select each column containing an NaN as the deepest value
for j in nan_cols:
@@ -94,85 +122,117 @@ def pressure_interpolator:
# Convert the DataFrame back to an array
interp_agg_pres = interp_agg_pres_df.to_numpy()
- # Create a NaN array to receive the FV00 interpolated pressures
- interp_fv00_pres = np.full((np.shape(fv00_contents.variables["TIME"][:])),np.nan)
+ # Create a NaN array to receive the fv01 interpolated pressures
+ interp_fv01_pres = np.full((np.shape(fv01_contents.variables["TIME"][:])),np.nan)
- # At each timestamp, interpolate pressure for the FV00 data
- for j in range(len(fv00_contents.variables["TIME"])):
+ # At each timestamp, interpolate pressure for the fv01 data
+ for j in range(len(fv01_contents.variables["TIME"])):
- interp_fv00_pres[j] = np.interp(fv00_contents.variables["NOMINAL_DEPTH"][0],agg_nominal_depths,interp_agg_pres[:,j])
+ interp_fv01_pres[j] = np.interp(fv01_contents.variables["NOMINAL_DEPTH"][0],agg_nominal_depths,interp_agg_pres[:,j])
- # Use methods from add_qc_flags to make a new netcdf?
+ # Create the PRES and PRES_quality_control variables, and their attributes
+
+ pres_var = fv01_contents.createVariable('PRES','f8',fv01_contents.variables['TIME'].dimensions,fill_value=99, zlib=True)
+
+ pres_atts = ['long_name','sea_water_pressure_due_to_sea_water','units','dbar',
+ 'standard_name','coordinates','TIME LATITUDE LONGITUDE NOMINAL_DEPTH','sea_water_pressure_due_to_sea_water','valid_max',
+ 12000,'valid_min',-15]
+
+ for att_name,att_value in zip(pres_atts[0::2],pres_atts[1::2]):
+
+ pres_var.setncattr(att_name,att_value)
+
+ pres_var[:] = interp_fv01_pres
+
+
+ pres_qc_var = fv01_contents.createVariable('PRES_quality_control','i1',fv01_contents.variables['TIME'].dimensions,fill_value=99, zlib=True)
+
+ pres_qc_var.long_name = "quality_code for PRES"
+
+ pres_qc_var.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9])
+
+ pres_qc_var.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ pres_qc_var[:] = 7
+
+ pres_var.ancillary_variables = "PRES_quality_control"
+
+ # Close the netcdf files
+
+ fv01_contents.close()
+
+ agg.close()
- # Deal with files that already contain pressure, but may contain NaNs
- else:
- # Create a NaN array to fill with pressure values
- interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv00_contents.variables["TIME"])),np.nan)
+ # Deal with files that already contain pressure, but contain NaNs
+ elif any(np.isnan(agg.variables['PRES'][:])):
+
+ # Create a NaN array to fill with pressure values
+ interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv01_contents.variables["TIME"])),np.nan)
# Set the first row as zeros to set 0m as 0dbar
interp_agg_pres[0,:] = 0
- # Set the last row to 5000 to set 5000m as 5000dbar (~seafloor),
- # only for interpolation in cases where the deepest sensor has failed
- #interp_agg_pres[-1,:] = 5000
-
# Create a new array representing the nominal depths of the agg file,
# including the 0m values
agg_nominal_depths = np.insert(np.array(agg.variables["NOMINAL_DEPTH"][:]),0,0)
- # For each nominal depth, interpolate the agg data at the FV00 times
+ # For each nominal depth, interpolate the agg data at the fv01 times
for j in range(1,len(agg_nominal_depths)):
time_selection = agg.variables["TIME"][agg.variables["instrument_index"][:]==(j-1)]
pres_selection = agg.variables["PRES"][agg.variables["instrument_index"][:]==(j-1)]
- interp_agg_pres[j,:] = np.interp(fv00_contents.variables["TIME"][:],time_selection,pres_selection)
+ interp_agg_pres[j,:] = np.interp(fv01_contents.variables["TIME"][:],time_selection,pres_selection)
# Sort the nominal depths and pressures according to nominal depth
interp_agg_pres = interp_agg_pres[np.argsort(agg_nominal_depths),:]
agg_nominal_depths.sort()
+
+ # Make a dataframe of the interpolated pressure to handle NaNs easily
+ interp_agg_pres_df = pd.DataFrame(data=interp_agg_pres,index=agg_nominal_depths)
- # If there are any NaN values, linearly interpolate profilewise
- if np.isnan(np.sum(interp_agg_pres)):
+ # Find all the columns where the lowest element is NaN
+ nan_cols = np.where(interp_agg_pres_df.iloc[-1].isna())
+
+ # Select each column containing an NaN as the deepest value
+ for j in nan_cols:
- # Make a dataframe of the interpolated pressure to handle NaNs easily
- interp_agg_pres_df = pd.DataFrame(data=interp_agg_pres,index=agg_nominal_depths)
+ # Find the shallowest nominal depth that isn't NaN
+ shallowest_val = pd.Series.last_valid_index(interp_agg_pres_df.iloc[:,j])
- # Find all the columns where the lowest element is NaN
- nan_cols = interp_agg_pres_df[interp_agg_pres_df[-1:].isna()].tolist()
+ # Find the index of that nominal depth
+ shallowest_idx = interp_agg_pres_df.index.tolist().index(shallowest_val)
- # Select each column containing an NaN as the deepest value
- for j in nan_cols:
+ # Starting at the shallowest NaN in a continous block of NaNs to the bottom
+ for k in range(shallowest_idx+1,len(interp_agg_pres_df)):
- # Find the shallowest nominal depth that isn't NaN
- shallowest_val = pd.Series.last_valid_index(interp_agg_pres_df.iloc[:,j])
-
- # Find the index of that nominal depth
- shallowest_idx = interp_agg_pres_df.index.tolist().index(shallowest_val)
+ # Linearly interpolate from shallow to deep, based on a nominal depth difference of 1m equating to 1dbar
+ interp_agg_pres_df.iloc[k,j] = interp_agg_pres_df.iloc[k-1,j]+np.diff(interp_agg_pres_df.index)[k-1]
- # Starting at the shallowest NaN in a continous block of NaNs to the bottom
- for k in range(shallowest_idx+1,len(interp_agg_pres_df)):
-
- # Linearly interpolate from shallow to deep, based on a nominal depth difference of 1m equating to 1dbar
- interp_agg_pres_df.iloc[k,j] = interp_agg_pres_df.iloc[k-1,j]+np.diff(interp_agg_pres_df.index)[k-1]
-
- # Linearly interpolate any remaining NaNs
- interp_agg_pres_df = interp_agg_pres_df.interpolate(method="index")
-
- # Convert the DataFrame back to an array
- interp_agg_pres = interp_agg_pres_df.to_numpy()
-
- # Create a NaN array to receive the FV00 interpolated pressures
- interp_fv00_pres = np.full((np.shape(fv00_contents.variables["TIME"][:])),np.nan)
+ # Linearly interpolate any remaining NaNs
+ interp_agg_pres_df = interp_agg_pres_df.interpolate(method="index")
+ # Convert the DataFrame back to an array
+ interp_agg_pres = interp_agg_pres_df.to_numpy()
+
+ # Create a NaN array to receive the fv01 interpolated pressures
+ interp_fv01_pres = np.full((np.shape(fv01_contents.variables["TIME"][:])),np.nan)
+
# Extract the interpolated pressures (NaNs removed) to store in netCDF4
- interp_fv00_pres = interp_agg_pres_df[interp_agg_pres_df.index==fv00_contents.variables["NOMINAL_DEPTH"][:]]
+ interp_fv01_pres = interp_agg_pres_df[interp_agg_pres_df.index==fv01_contents.variables["NOMINAL_DEPTH"][:]]
+
+ # Find indices where the netcdf data and interpolated data don't match (where the NaNs are in the netcdf)
+ nan_rep_idx = np.where(interp_fv01_pres!=fv01_contents.variables['PRES'][:])[1]
+
+ fv01_contents.variables['PRES_quality_control'][nan_rep_idx] = 7
+
+ # Insert pressure value with NaNs interpolated back into netcdf
+ fv01_contents.variables['PRES'][:] = interp_fv01_pres
+
+ fv01_contents.close()
- #
-
-
From 500830bb2173269f0e6bc5d27d39317f3d4584a9 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 19 Feb 2020 15:40:05 +1100
Subject: [PATCH 18/59] Update pressure_interpolator.py
---
ocean_dp/processing/pressure_interpolator.py | 34 ++++++++++++++++----
1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/ocean_dp/processing/pressure_interpolator.py b/ocean_dp/processing/pressure_interpolator.py
index ed764c3..af248c2 100755
--- a/ocean_dp/processing/pressure_interpolator.py
+++ b/ocean_dp/processing/pressure_interpolator.py
@@ -24,14 +24,18 @@
# Supply netCDFfiles as a ['list'] of files, agg as a 'string'
-def pressure_interpolator(netCDFfiles = None,agg = None):
+def pressure_interpolator(netCDFfiles = [],agg = []):
- if netCDFfiles==None:
+ if netCDFfiles==[]:
+
+ print('netcdffiles = none')
# Load the filenames of the fv01 files in the current folder
netCDFfiles = glob.glob('*FV01*.nc')
- if agg ==None:
+ if agg == []:
+
+ print('agg = none')
# Extract the aggregate file data
agg = Dataset(glob.glob('*Aggregate*.nc')[0], mode="r")
@@ -43,6 +47,8 @@ def pressure_interpolator(netCDFfiles = None,agg = None):
# Loop through each of the fv01 files
for fn in netCDFfiles:
+ print('File selected is '+fn)
+
# Change the creation date in the filename to today
now=datetime.utcnow()
@@ -58,16 +64,25 @@ def pressure_interpolator(netCDFfiles = None,agg = None):
# If a new (different) filename has been successfully generated, make
# a copy of the old file with the new name
if fn_new != fn:
+
+ print('copying file')
# copy file
shutil.copy(fn, fn_new)
# Open and work in the new copy
fv01_contents = Dataset(fn_new,mode='a')
+ print('copied file opened')
+
# Check the current file doesn't contain pressure to run the following
# interpolator
if not 'PRES' in fv01_contents.variables:
+ print("file doesn't contain pressure")
+
+ print(fv01_contents.variables.keys())
+ print(agg.variables.keys())
+
# Create a NaN array to fill with pressure values
interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv01_contents.variables["TIME"])),np.nan)
@@ -160,11 +175,11 @@ def pressure_interpolator(netCDFfiles = None,agg = None):
# Close the netcdf files
fv01_contents.close()
-
- agg.close()
# Deal with files that already contain pressure, but contain NaNs
- elif any(np.isnan(agg.variables['PRES'][:])):
+ elif any(np.isnan(np.array(fv01_contents.variables['PRES'][:]))):
+
+ print("file contains pressure and agg contains NaNs")
# Create a NaN array to fill with pressure values
interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv01_contents.variables["TIME"])),np.nan)
@@ -226,13 +241,20 @@ def pressure_interpolator(netCDFfiles = None,agg = None):
# Find indices where the netcdf data and interpolated data don't match (where the NaNs are in the netcdf)
nan_rep_idx = np.where(interp_fv01_pres!=fv01_contents.variables['PRES'][:])[1]
+ #
fv01_contents.variables['PRES_quality_control'][nan_rep_idx] = 7
+ print('QC altered in original press')
+
# Insert pressure value with NaNs interpolated back into netcdf
fv01_contents.variables['PRES'][:] = interp_fv01_pres
+ print('press altered in orginal press')
+
fv01_contents.close()
+ agg.close()
+
From 959426b9c483ac157c42f4eaf70c61ad437f30d1 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 19 Feb 2020 16:13:13 +1100
Subject: [PATCH 19/59] Update select_in_water.py
---
ocean_dp/qc/select_in_water.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
index 5118ae8..16cf613 100755
--- a/ocean_dp/qc/select_in_water.py
+++ b/ocean_dp/qc/select_in_water.py
@@ -39,7 +39,7 @@ def select_in_water(netCDFfiles):
# Check the file is an IMOS formatted file
if fn.split('_')[0]=='IMOS':
- fn_new = fn_new.replace("FV00", "FV01")
+ fn_new = fn.replace("FV00", "FV01")
fn_new_split = fn_new.split('_')
# Change the creation date in the filename to today
fn_new_split[-1] = "C-" + now.strftime("%Y%m%d") + ".nc"
@@ -55,7 +55,7 @@ def select_in_water(netCDFfiles):
# Extract the time dimension, and the deployment start and end
# TODO: check this works
- v = nc.get_variables_by_attributes(standard_name='time')
+ v = ods.get_variables_by_attributes(standard_name='time')
time = np.array(v[0][:])
inw = parse(ods.time_deployment_start)
From d8cbcd493728d642d3ba4e07b2bd003a703791e0 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 20 Feb 2020 08:22:39 +1100
Subject: [PATCH 20/59] Update copyDataset.py
---
ocean_dp/aggregation/copyDataset.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ocean_dp/aggregation/copyDataset.py b/ocean_dp/aggregation/copyDataset.py
index d23fe7d..95c5998 100644
--- a/ocean_dp/aggregation/copyDataset.py
+++ b/ocean_dp/aggregation/copyDataset.py
@@ -21,7 +21,7 @@
# http://thredds.aodn.org.au/thredds/catalog/IMOS/ABOS/DA/EAC2000/catalog.html
from dateutil.parser import parse
-
+
def aggregate(files, varNames):
# split this into createCatalog - copy needed information into structure
@@ -244,7 +244,7 @@ def aggregate(files, varNames):
filen = 0
# variables we want regardless
- varNames = [varNames]+['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH']
+ varNames.extend(['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH'])
# remove any duplicates
varNamesOut = set(varNames)
From 20fe907d30a01acbb916ddb8190e70e9778916b3 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 20 Feb 2020 08:48:17 +1100
Subject: [PATCH 21/59] Update netcdf_gen.py
---
ocean_dp/qc/netcdf_gen.py | 229 +++++++++++++++++++-------------------
1 file changed, 112 insertions(+), 117 deletions(-)
diff --git a/ocean_dp/qc/netcdf_gen.py b/ocean_dp/qc/netcdf_gen.py
index f687f83..d20f998 100755
--- a/ocean_dp/qc/netcdf_gen.py
+++ b/ocean_dp/qc/netcdf_gen.py
@@ -1,118 +1,113 @@
-# Copyright (C) 2020 Ben Weeding
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-from dateutil.parser import parse
-from netCDF4 import Dataset, num2date, date2num
-from datetime import datetime, timedelta
-import sys
-from datetime import datetime
-import numpy as np
-from dateutil import parser
-import pytz
-import os
-import shutil
-
-# Provide the function with a filename (don't include .nc), a nominal depth,
-# and pairs of names and arrays containing the data to be included as variables.
-# A time dimension/variable is created by default, starting at 01/01/2020 using
-# 1 hour timestamps
-
-# For example, netcdf_gen('test',30,'PRES',pres_data,'TEMP',temp_data)
-
-def netcdf_gen(file_name,nominal_depth,*args):
-
- # Convert the args tuple to a list
- args = list(args)
-
- # Check the args are paired
- if len(args) % 2 == 0:
-
- # Assign the names and data to lists
- var_names = args[0::2]
-
- var_data = args[1::2]
-
- # Check if first of each pair is a string
- if all(isinstance(x, str) for x in var_names):
-
- # Check if second of each pair are all equal in shape
- if all(np.shape(var_data[1]) == np.shape(x) for x in var_data):
-
- # Create the netcdf with IMOS tag
- ds = Dataset("IMOS_" + file_name + ".nc","w", format="NETCDF4")
-
- # Create time dimension with length to match data
- time_dim = ds.createDimension("TIME", len(var_data[0]))
-
- time_var = ds.createVariable("TIME","f8",("TIME"))
-
- ds.variables['TIME'][:] = np.arange(25567,25567+(1/24)*len(var_data[1]),1/24)
-
- time_atts = ['long_name','time','units','days since 1950-01-01 00:00:00 UTC',
- 'calendar','gregorian','axis','T','standard_name','time','valid_max',
- 90000,'valid_min',0]
-
- for att_name,att_value in zip(time_atts[0::2],time_atts[1::2]):
-
- time_var.setncattr(att_name,att_value)
-
- # Create the nominal depth variable
- nom_depth_var = ds.createVariable("NOMINAL_DEPTH","f8")
-
- ds.variables["NOMINAL_DEPTH"] = nominal_depth
-
- nom_dep_atts = ['long_name','nominal depth','units','m',
- 'positive','down','axis','Z','standard_name','depth','valid_max',
- 12000,'valid_min',-5,'reference_datum','sea surface']
-
- for att_name,att_value in zip(nom_dep_atts[0::2],nom_dep_atts[1::2]):
-
- nom_depth_var.setncattr(att_name,att_value)
-
- # Create variables from input data
- for name_in,data_in in zip(var_names,var_data):
-
- ds.createVariable(name_in,"f8",("TIME"))
-
- ds.variables[name_in][:] = data_in
-
- ds.close()
-
- else:
- print('Data arrays not of equal length')
-
-
- else:
- print('Labels not in string format')
-
- else:
- print('Data not passed in pairs')
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ # Copyright (C) 2020 Ben Weeding
+ #
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ #
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ # GNU General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with this program. If not, see .
+ from netCDF4 import Dataset, date2num
+ import sys
+ from datetime import datetime
+ import numpy as np
+
+ # Provide the function with a filename (don't include .nc), a nominal depth,
+ # and pairs of names and arrays containing the data to be included as variables.
+ # A time dimension/variable is created by default, starting at 01/01/2020 using
+ # 1 hour timestamps
+
+ # For example, netcdf_gen('test',30,'PRES',pres_data,'TEMP',temp_data)
+ # from the command line gen_test_data.py test 30 PRES 10,20,30 TEMP 11,12,NaN
+
+ def netcdf_gen(file_name, nominal_depth, *args):
+ # Convert the args tuple to a list
+ args = list(args)
+ #print(args, type(args[1]))
+
+ file_name = "IMOS_" + file_name + ".nc" # if we insist on not wanting to pass these
+
+ # deal with passing nominal depth as a string
+ if isinstance(nominal_depth, str):
+ nominal_depth = float(nominal_depth)
+ print('nominal depth :', nominal_depth)
+
+ # Check the args are paired
+ if len(args) % 2 == 0:
+
+ # Assign the names and data to lists
+ var_names = args[0::2]
+
+ # deal with passing data as a string list of values
+ if isinstance(args[1], str):
+ var_data = [[float(b) for b in a.split(',')] for a in args[1::2]]
+ #print('var_data split', var_data)
+ else:
+ var_data = args[1::2]
+
+ # Check if first of each pair is a string
+ if all(isinstance(x, str) for x in var_names):
+
+ # Check if second of each pair are all equal in shape
+ if all(np.shape(var_data[1]) == np.shape(x) for x in var_data):
+
+ # Create the netcdf with IMOS tag
+ ds = Dataset(file_name, "w", format="NETCDF4")
+
+ # Create time dimension with length to match data
+ time_dim = ds.createDimension("TIME", len(var_data[0]))
+
+ time_var = ds.createVariable("TIME", "f8", ("TIME"))
+
+ time_var.setncattr('long_name', 'time')
+ time_var.setncattr('standard_name', 'time')
+ time_var.setncattr('units', 'days since 1950-01-01 00:00:00 UTC')
+ time_var.setncattr('calendar', 'gregorian')
+ time_var.setncattr('axis', 'T')
+ time_var.setncattr('valid_max', 90000)
+ time_var.setncattr('valid_min', 0)
+
+ t0 = date2num(datetime(2020, 1, 1), units=time_var.units)
+ ds.variables['TIME'][:] = np.arange(t0, t0 + (1 / 24) * len(var_data[1]), 1 / 24)
+
+ # Create the nominal depth variable
+ nom_depth_var = ds.createVariable("NOMINAL_DEPTH", "f8")
+ nom_depth_var.setncattr('long_name', 'nominal depth')
+ nom_depth_var.setncattr('units', 'dbar')
+ nom_depth_var.setncattr('positive', 'down')
+ nom_depth_var.setncattr('axis', 'Z')
+ nom_depth_var.setncattr('valid_max', 12000)
+ nom_depth_var.setncattr('valid_min', -5)
+ nom_depth_var.setncattr('reference_datum', 'sea surface')
+
+ ds.variables["NOMINAL_DEPTH"][:] = nominal_depth
+
+ # Create variables from input data
+ for name_in, data_in in zip(var_names, var_data):
+ ds.createVariable(name_in, "f8", ("TIME"))
+ ds.variables[name_in][:] = data_in
+
+ ds.close()
+ print("generated ", file_name)
+
+ return (file_name)
+
+ else:
+ print('Data arrays not of equal length')
+
+
+ else:
+ print('Labels not in string format')
+
+ else:
+ print('Data not passed in pairs')
+
+
+ if __name__ == "__main__":
+ netcdf_gen(sys.argv[1], sys.argv[2], *sys.argv[3:])
\ No newline at end of file
From cd6fc8ad08473ebb44a58db05a000f845222521f Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 20 Feb 2020 14:25:14 +1100
Subject: [PATCH 22/59] Update netcdf_gen.py
---
ocean_dp/qc/netcdf_gen.py | 155 ++++++++++++++++++++------------------
1 file changed, 81 insertions(+), 74 deletions(-)
diff --git a/ocean_dp/qc/netcdf_gen.py b/ocean_dp/qc/netcdf_gen.py
index d20f998..7eae159 100755
--- a/ocean_dp/qc/netcdf_gen.py
+++ b/ocean_dp/qc/netcdf_gen.py
@@ -13,101 +13,108 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
- from netCDF4 import Dataset, date2num
- import sys
- from datetime import datetime
- import numpy as np
+from netCDF4 import Dataset, date2num
+import sys
+from datetime import datetime
+import numpy as np
- # Provide the function with a filename (don't include .nc), a nominal depth,
- # and pairs of names and arrays containing the data to be included as variables.
- # A time dimension/variable is created by default, starting at 01/01/2020 using
- # 1 hour timestamps
+ # Provide the function with a filename (don't include .nc), a nominal depth,
+# and pairs of names and arrays containing the data to be included as variables.
+# A time dimension/variable is created by default, starting at 01/01/2020 using
+# 1 hour timestamps
- # For example, netcdf_gen('test',30,'PRES',pres_data,'TEMP',temp_data)
- # from the command line gen_test_data.py test 30 PRES 10,20,30 TEMP 11,12,NaN
+ # For example, netcdf_gen('test',30,'PRES',pres_data,'TEMP',temp_data)
+# from the command line gen_test_data.py test 30 PRES 10,20,30 TEMP 11,12,NaN
- def netcdf_gen(file_name, nominal_depth, *args):
- # Convert the args tuple to a list
- args = list(args)
- #print(args, type(args[1]))
+def netcdf_gen(file_name, nominal_depth, *args):
+ # Convert the args tuple to a list
+ args = list(args)
+ #print(args, type(args[1]))
- file_name = "IMOS_" + file_name + ".nc" # if we insist on not wanting to pass these
+ file_name = "IMOS_" + file_name + ".nc" # if we insist on not wanting to pass these
- # deal with passing nominal depth as a string
- if isinstance(nominal_depth, str):
- nominal_depth = float(nominal_depth)
- print('nominal depth :', nominal_depth)
+ # deal with passing nominal depth as a string
+ if isinstance(nominal_depth, str):
+ nominal_depth = float(nominal_depth)
+ print('nominal depth :', nominal_depth)
- # Check the args are paired
- if len(args) % 2 == 0:
+ # Check the args are paired
+ if len(args) % 2 == 0:
- # Assign the names and data to lists
- var_names = args[0::2]
+ # Assign the names and data to lists
+ var_names = args[0::2]
- # deal with passing data as a string list of values
- if isinstance(args[1], str):
- var_data = [[float(b) for b in a.split(',')] for a in args[1::2]]
- #print('var_data split', var_data)
- else:
- var_data = args[1::2]
+ # deal with passing data as a string list of values
+ if isinstance(args[1], str):
+ var_data = [[float(b) for b in a.split(',')] for a in args[1::2]]
+ #print('var_data split', var_data)
+ else:
+ var_data = args[1::2]
- # Check if first of each pair is a string
- if all(isinstance(x, str) for x in var_names):
+ # Check if first of each pair is a string
+ if all(isinstance(x, str) for x in var_names):
- # Check if second of each pair are all equal in shape
- if all(np.shape(var_data[1]) == np.shape(x) for x in var_data):
+ # Check if second of each pair are all equal in shape
+ if all(np.shape(var_data[0]) == np.shape(x) for x in var_data):
- # Create the netcdf with IMOS tag
- ds = Dataset(file_name, "w", format="NETCDF4")
+ # Create the netcdf with IMOS tag
+ ds = Dataset(file_name, "w", format="NETCDF4")
- # Create time dimension with length to match data
- time_dim = ds.createDimension("TIME", len(var_data[0]))
+ # Create time dimension with length to match data
+ time_dim = ds.createDimension("TIME", len(var_data[0]))
- time_var = ds.createVariable("TIME", "f8", ("TIME"))
+ time_var = ds.createVariable("TIME", "f8", ("TIME"))
- time_var.setncattr('long_name', 'time')
- time_var.setncattr('standard_name', 'time')
- time_var.setncattr('units', 'days since 1950-01-01 00:00:00 UTC')
- time_var.setncattr('calendar', 'gregorian')
- time_var.setncattr('axis', 'T')
- time_var.setncattr('valid_max', 90000)
- time_var.setncattr('valid_min', 0)
+ time_var.setncattr('long_name', 'time')
+ time_var.setncattr('standard_name', 'time')
+ time_var.setncattr('units', 'days since 1950-01-01 00:00:00 UTC')
+ time_var.setncattr('calendar', 'gregorian')
+ time_var.setncattr('axis', 'T')
+ time_var.setncattr('valid_max', 90000)
+ time_var.setncattr('valid_min', 0)
- t0 = date2num(datetime(2020, 1, 1), units=time_var.units)
- ds.variables['TIME'][:] = np.arange(t0, t0 + (1 / 24) * len(var_data[1]), 1 / 24)
+ t0 = date2num(datetime(2020, 1, 1), units=time_var.units)
+ ds.variables['TIME'][:] = np.arange(t0, t0 + (1 / 24) * len(var_data[1]), 1 / 24)
- # Create the nominal depth variable
- nom_depth_var = ds.createVariable("NOMINAL_DEPTH", "f8")
- nom_depth_var.setncattr('long_name', 'nominal depth')
- nom_depth_var.setncattr('units', 'dbar')
- nom_depth_var.setncattr('positive', 'down')
- nom_depth_var.setncattr('axis', 'Z')
- nom_depth_var.setncattr('valid_max', 12000)
- nom_depth_var.setncattr('valid_min', -5)
- nom_depth_var.setncattr('reference_datum', 'sea surface')
+ # Create the nominal depth variable
+ nom_depth_var = ds.createVariable("NOMINAL_DEPTH", "f8")
+ nom_depth_var.setncattr('long_name', 'nominal depth')
+ nom_depth_var.setncattr('units', 'dbar')
+ nom_depth_var.setncattr('positive', 'down')
+ nom_depth_var.setncattr('axis', 'Z')
+ nom_depth_var.setncattr('valid_max', 12000)
+ nom_depth_var.setncattr('valid_min', -5)
+ nom_depth_var.setncattr('reference_datum', 'sea surface')
- ds.variables["NOMINAL_DEPTH"][:] = nominal_depth
+ ds.variables["NOMINAL_DEPTH"][:] = nominal_depth
- # Create variables from input data
- for name_in, data_in in zip(var_names, var_data):
- ds.createVariable(name_in, "f8", ("TIME"))
- ds.variables[name_in][:] = data_in
+ # Create variables from input data
+ for name_in, data_in in zip(var_names, var_data):
+ ds.createVariable(name_in, "f8", ("TIME"))
+ ds.variables[name_in][:] = data_in
- ds.close()
- print("generated ", file_name)
+ ds.close()
+ print("generated ", file_name)
- return (file_name)
+ return (file_name)
- else:
- print('Data arrays not of equal length')
+ else:
+ print('Data arrays not of equal length')
-
- else:
- print('Labels not in string format')
- else:
- print('Data not passed in pairs')
+ else:
+ print('Labels not in string format')
-
- if __name__ == "__main__":
- netcdf_gen(sys.argv[1], sys.argv[2], *sys.argv[3:])
\ No newline at end of file
+ else:
+ print('Data not passed in pairs')
+
+
+if __name__ == "__main__":
+ netcdf_gen(sys.argv[1], sys.argv[2], *sys.argv[3:])
+
+
+
+
+
+
+
\ No newline at end of file
From b6d6168086b3a6d71c7c944d41f5c461d0af3d5a Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 20 Feb 2020 15:07:14 +1100
Subject: [PATCH 23/59] Update netcdf_gen.py
---
ocean_dp/qc/netcdf_gen.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ocean_dp/qc/netcdf_gen.py b/ocean_dp/qc/netcdf_gen.py
index 7eae159..ef403cf 100755
--- a/ocean_dp/qc/netcdf_gen.py
+++ b/ocean_dp/qc/netcdf_gen.py
@@ -74,7 +74,7 @@ def netcdf_gen(file_name, nominal_depth, *args):
time_var.setncattr('valid_min', 0)
t0 = date2num(datetime(2020, 1, 1), units=time_var.units)
- ds.variables['TIME'][:] = np.arange(t0, t0 + (1 / 24) * len(var_data[1]), 1 / 24)
+ ds.variables['TIME'][:] = np.linspace(t0,t0 + (1 / 24) * (len(var_data[0]-1)),num=len(var_data[0]))
# Create the nominal depth variable
nom_depth_var = ds.createVariable("NOMINAL_DEPTH", "f8")
From 2d5bc85a946355b8ee2e96bad1e5093acbac6470 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 20 Feb 2020 16:19:53 +1100
Subject: [PATCH 24/59] adds qc variables
---
ocean_dp/qc/add_qc_flags.py | 5 +++-
ocean_dp/qc/netcdf_gen.py | 42 ++++++++++++++++++++++++++++++++++
ocean_dp/qc/select_in_water.py | 2 ++
3 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/ocean_dp/qc/add_qc_flags.py b/ocean_dp/qc/add_qc_flags.py
index bd68234..1d1fa0e 100644
--- a/ocean_dp/qc/add_qc_flags.py
+++ b/ocean_dp/qc/add_qc_flags.py
@@ -101,4 +101,7 @@ def add_qc(netCDFfile):
if __name__ == "__main__":
- add_qc(sys.argv[1:])
\ No newline at end of file
+ add_qc(sys.argv[1:])
+
+
+
\ No newline at end of file
diff --git a/ocean_dp/qc/netcdf_gen.py b/ocean_dp/qc/netcdf_gen.py
index ef403cf..32cb37c 100755
--- a/ocean_dp/qc/netcdf_gen.py
+++ b/ocean_dp/qc/netcdf_gen.py
@@ -17,6 +17,7 @@
import sys
from datetime import datetime
import numpy as np
+import shutil
# Provide the function with a filename (don't include .nc), a nominal depth,
# and pairs of names and arrays containing the data to be included as variables.
@@ -26,6 +27,8 @@
# For example, netcdf_gen('test',30,'PRES',pres_data,'TEMP',temp_data)
# from the command line gen_test_data.py test 30 PRES 10,20,30 TEMP 11,12,NaN
+
+
def netcdf_gen(file_name, nominal_depth, *args):
# Convert the args tuple to a list
args = list(args)
@@ -93,10 +96,49 @@ def netcdf_gen(file_name, nominal_depth, *args):
ds.createVariable(name_in, "f8", ("TIME"))
ds.variables[name_in][:] = data_in
+ # read the variable names from the netCDF dataset
+ vars = ds.variables
+
+ # create a list of variables, don't include the 'TIME' variable
+ # TODO: detect 'TIME' variable using the standard name 'time'
+ to_add = []
+
+ for v in vars:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+
+ # for each variable, add a new ancillary variable _quality_control to each which has 'TIME' as a dimension
+ for v in to_add:
+ if "TIME" in vars[v].dimensions:
+ # print("time dim ", v)
+
+ if v+"_quality_control" not in ds.variables:
+ ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True) # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
+ ncVarOut[:] = np.zeros(vars[v].shape)
+ ncVarOut.long_name = "quality_code for " + v
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9])
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+
+ vars[v].ancillary_variables = v + "_quality_control"
+
+ # update the global attributes
+ ds.file_version = "Level 1 - Quality Controlled Data"
+
+ ds.history = datetime.utcnow().strftime("%Y%m%d:") + ' converted to FV01 file, quality_control variables added.'
+
+ # ADD quality control attributes!!
+
ds.close()
+
+ #add_qc(file_name)
+
print("generated ", file_name)
return (file_name)
+
+
else:
print('Data arrays not of equal length')
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
index 16cf613..8c296c1 100755
--- a/ocean_dp/qc/select_in_water.py
+++ b/ocean_dp/qc/select_in_water.py
@@ -29,6 +29,8 @@
# Submit argument as a list
+
+
def select_in_water(netCDFfiles):
new_name = [] # list of new file names
From a5bdf2fef52227be1829dbd1e279e5847a6b8126 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Fri, 21 Feb 2020 12:00:00 +1100
Subject: [PATCH 25/59] Update flatline_test.py
Removed the add_qc part, modified so it's possibl to set files, target variables, window length, and the qc flag assigned. Tested with files made using netcdf_gen.
---
ocean_dp/qc/flatline_test.py | 142 +++++++++++------------------------
1 file changed, 44 insertions(+), 98 deletions(-)
diff --git a/ocean_dp/qc/flatline_test.py b/ocean_dp/qc/flatline_test.py
index a4e4ec5..074db32 100755
--- a/ocean_dp/qc/flatline_test.py
+++ b/ocean_dp/qc/flatline_test.py
@@ -1,28 +1,4 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 3 14:10:41 2020
-
-@author: tru050
-"""
-
-import re
-from datetime import datetime, timedelta
-from netCDF4 import num2date, date2num
-from netCDF4 import stringtochar
-import numpy.ma as ma
-import sys
-from netCDF4 import Dataset
-import numpy as np
-import argparse
-import glob
-import pytz
-import os
-
-#!/usr/bin/python3
-
-# add_qc_flags
-# Copyright (C) 2020 Peter Jansen
+# Copyright (C) 2020 Ben Weeding
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -37,111 +13,81 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-# add QC variables to file
-
-
-def add_qc(netCDFfile):
-
- new_name = [] # list of new file names
-
- # loop over all file names given
- for fn in netCDFfile[1:]:
- ds = Dataset(fn, 'a')
-
- # read the variable names from the netCDF dataset
- vars = ds.variables
-
- # create a list of variables, don't include the 'TIME' variable
- # TODO: detect 'TIME' variable using the standard name 'time'
- to_add = []
- for v in vars:
- #print (vars[v].dimensions)
- if v != 'TIME':
- to_add.append(v)
-
- # for each variable, add a new ancillary variable _quality_control to each which has 'TIME' as a dimension
- for v in to_add:
- if "TIME" in vars[v].dimensions:
- # print("time dim ", v)
-
- ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True) # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
- ncVarOut[:] = np.zeros(vars[v].shape)
- ncVarOut.long_name = "quality_code for " + v
-
- vars[v].ancillary_variables = v + "_quality_control"
-
- # update the file version attribute
- ds.file_version = "Level 1 - Quality Controlled Data"
-
- ds.close()
-
- # rename the file FV00 to FV01 (imos specific)
- fn_new = fn.replace("FV00", "FV01")
- new_name.append(fn_new)
-
- if fn_new != fn:
- # copy file
- os.copy(fn, fn_new)
-
- print(fn_new)
-
- return new_name
-
-
-if __name__ == "__main__":
- add_qc(sys.argv)
-
-##############################################################################
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
-def flatline_test(*target_files,target_vars=[],window=3):
+def flatline_test(target_files,target_vars_in=[],window=3,flag=3):
- # If files aren't specified, take all the .nc files in the current folder
+ # If files aren't specified, take all the IMOS.nc files in the current folder
if not target_files:
- target_files = glob.glob('*.nc')
+ target_files = glob.glob('IMOS*.nc')
# Loop through each files in target_files
for current_file in target_files:
-
# Print each filename
print("input file %s" % current_file)
# Extract netcdf data into nc
- nc = Dataset(current_file, mode="r")
+ nc = Dataset(current_file, mode="a")
# Extract time
ncTime = nc.get_variables_by_attributes(standard_name='time')
# If target_vars aren't user specified, set it to all the variables of
- # the current_file, removing TIME
- if target_vars == []:
+ # the current_file, removing unwanted variables
+ if target_vars_in == []:
target_vars = list(nc.variables.keys())
+ # Remove TIME
target_vars.remove('TIME')
- # Check if file contains quality control variables, and if not create
-
- if not any("_quality_control" in i for i in target_vars:
-
- # insert _quality_control variables into file?
- # should this be done now, or should we assume it
- # will have already been done?
-
+ # Remove any quality_control variables
+ qc_vars = [s for s in target_vars if 'quality_control' in s]
+
+ target_vars = [s for s in target_vars if s not in qc_vars]
+
+ # Remove any variables of single length
+ single_vars = [s for s in target_vars if nc.variables[s].size==1]
+
+ target_vars = [s for s in target_vars if s not in single_vars]
+
+ print('target_vars are '+' '.join(target_vars))
+
+ else:
+ target_vars = target_vars_in
# For each variable, extract the data
for current_var in target_vars:
var_data = np.array(nc.variables[current_var])
- for i in 0:(len(var_data)-window+1):
+ print('checking '+current_var)
+
+ # Step through the data, one element at a time, using the window
+ for i in range(0,(len(var_data)-window+1)):
- # This is true if 'window' elements in a row are equal
- if len(set(var_data[i:(i+window)])) == 1
+ # This is true if 'window' elements in a row are equal
+ if len(set(var_data[i:(i+window)])) == 1:
# set corresponding QC value to...
+ nc.variables[current_var+'_quality_control'][i:(i+window)] = flag
+
+ nc.history += ' ' + datetime.utcnow().strftime("%Y%m%d:") + 'flatline_test performed, flatlines of '+str(window)+' consecutive values or more were flagged with '+str(flag)
+ nc.close()
From 05abdb91e2365967043dcba3517f6cfa63467d80 Mon Sep 17 00:00:00 2001
From: Peter Jansen
Date: Mon, 24 Feb 2020 10:07:35 +1100
Subject: [PATCH 26/59] Update flatline_test.py
separate global fine name gobblin
---
ocean_dp/qc/flatline_test.py | 53 ++++++++++++++++++++----------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/ocean_dp/qc/flatline_test.py b/ocean_dp/qc/flatline_test.py
index 074db32..db8fec7 100755
--- a/ocean_dp/qc/flatline_test.py
+++ b/ocean_dp/qc/flatline_test.py
@@ -26,23 +26,31 @@
import pytz
import os
-def flatline_test(target_files,target_vars_in=[],window=3,flag=3):
-
- # If files aren't specified, take all the IMOS.nc files in the current folder
- if not target_files:
-
- target_files = glob.glob('IMOS*.nc')
+
+# If files aren't specified, take all the IMOS*.nc files in the current folder
+def flatline_test_all_files(target_vars_in=[], window=3, flag=3):
+ target_files = glob.glob('IMOS*.nc')
+
+ flatline_test_files(target_files, target_vars_in=target_vars_in, window=window, flag=flag)
+
+
+def flatline_test_files(target_files, target_vars_in=[], window=3, flag=3):
# Loop through each files in target_files
for current_file in target_files:
-
# Print each filename
print("input file %s" % current_file)
-
+
# Extract netcdf data into nc
nc = Dataset(current_file, mode="a")
-
- # Extract time
+
+ # run the flat line test
+ flatline_test(nc=nc, target_vars_in=target_vars_in, window=window, flag=flag)
+
+
+def flatline_test(nc, target_vars_in=[], window=3, flag=3):
+
+ # Extract time, TODO: This is not used, should we set the window based on time, not samples?
ncTime = nc.get_variables_by_attributes(standard_name='time')
# If target_vars aren't user specified, set it to all the variables of
@@ -56,12 +64,10 @@ def flatline_test(target_files,target_vars_in=[],window=3,flag=3):
# Remove any quality_control variables
qc_vars = [s for s in target_vars if 'quality_control' in s]
-
target_vars = [s for s in target_vars if s not in qc_vars]
# Remove any variables of single length
single_vars = [s for s in target_vars if nc.variables[s].size==1]
-
target_vars = [s for s in target_vars if s not in single_vars]
print('target_vars are '+' '.join(target_vars))
@@ -84,16 +90,17 @@ def flatline_test(target_files,target_vars_in=[],window=3,flag=3):
# set corresponding QC value to...
nc.variables[current_var+'_quality_control'][i:(i+window)] = flag
-
- nc.history += ' ' + datetime.utcnow().strftime("%Y%m%d:") + 'flatline_test performed, flatlines of '+str(window)+' consecutive values or more were flagged with '+str(flag)
-
+
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+ except AttributeError:
+ hist = ""
+
+ nc.setncattr('history', hist + 'flatline_test performed on [' + str(target_vars) + '], window '+str(window)+' consecutive values or more were flagged with '+str(flag) )
+
nc.close()
-
-
-
-
-
-
-
-
\ No newline at end of file
+if __name__ == "__main__":
+ # usage is
+ flatline_test(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], window=float(sys.argv[3]), flag=float(sys.argv[4]))
From 42ef01b2480a95c3e96ffed5778c4a2e09d19313 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Mon, 24 Feb 2020 12:16:56 +1100
Subject: [PATCH 27/59] test changes
1st write of spike test, alter name=main and history for flatline
---
ocean_dp/qc/flatline_test.py | 7 +-
ocean_dp/qc/spike_test | 8 ---
ocean_dp/qc/spike_test.py | 120 +++++++++++++++++++++++++++++++++++
3 files changed, 122 insertions(+), 13 deletions(-)
delete mode 100755 ocean_dp/qc/spike_test
create mode 100755 ocean_dp/qc/spike_test.py
diff --git a/ocean_dp/qc/flatline_test.py b/ocean_dp/qc/flatline_test.py
index db8fec7..e211cf4 100755
--- a/ocean_dp/qc/flatline_test.py
+++ b/ocean_dp/qc/flatline_test.py
@@ -49,9 +49,6 @@ def flatline_test_files(target_files, target_vars_in=[], window=3, flag=3):
def flatline_test(nc, target_vars_in=[], window=3, flag=3):
-
- # Extract time, TODO: This is not used, should we set the window based on time, not samples?
- ncTime = nc.get_variables_by_attributes(standard_name='time')
# If target_vars aren't user specified, set it to all the variables of
# the current_file, removing unwanted variables
@@ -97,10 +94,10 @@ def flatline_test(nc, target_vars_in=[], window=3, flag=3):
except AttributeError:
hist = ""
- nc.setncattr('history', hist + 'flatline_test performed on [' + str(target_vars) + '], window '+str(window)+' consecutive values or more were flagged with '+str(flag) )
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + 'flatline_test performed on [' + str(target_vars) + '], window '+str(window)+' consecutive values or more were flagged with '+str(flag) )
nc.close()
if __name__ == "__main__":
# usage is
- flatline_test(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], window=float(sys.argv[3]), flag=float(sys.argv[4]))
+ flatline_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], window=float(sys.argv[3]), flag=float(sys.argv[4]))
diff --git a/ocean_dp/qc/spike_test b/ocean_dp/qc/spike_test
deleted file mode 100755
index 0bb13ea..0000000
--- a/ocean_dp/qc/spike_test
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 12 16:28:38 2020
-
-@author: tru050
-"""
-
diff --git a/ocean_dp/qc/spike_test.py b/ocean_dp/qc/spike_test.py
new file mode 100755
index 0000000..c18c177
--- /dev/null
+++ b/ocean_dp/qc/spike_test.py
@@ -0,0 +1,120 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+
+# If files aren't specified, take all the IMOS*.nc files in the current folder
+def spike_test_all_files(target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+ target_files = glob.glob('IMOS*.nc')
+
+ spike_test_files(target_files, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
+
+
+def spike_test_files(target_files, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+
+ # Loop through each files in target_files
+ for current_file in target_files:
+ # Print each filename
+ print("input file %s" % current_file)
+
+ # Extract netcdf data into nc
+ nc = Dataset(current_file, mode="a")
+
+ # run the spike test
+ spike_test(nc=nc, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
+
+
+def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+
+ # If target_vars aren't user specified, set it to all the variables of
+ # the current_file, removing unwanted variables
+ if target_vars_in == []:
+
+ target_vars = list(nc.variables.keys())
+
+ # Remove TIME
+ target_vars.remove('TIME')
+
+ # Remove any quality_control variables
+ qc_vars = [s for s in target_vars if 'quality_control' in s]
+ target_vars = [s for s in target_vars if s not in qc_vars]
+
+ # Remove any variables of single length
+ single_vars = [s for s in target_vars if nc.variables[s].size==1]
+ target_vars = [s for s in target_vars if s not in single_vars]
+
+ print('target_vars are '+' '.join(target_vars))
+
+ else:
+ target_vars = target_vars_in
+
+ # For each variable, extract the data
+ for current_var in target_vars:
+
+ var_data = np.array(nc.variables[current_var])
+
+ print('checking '+current_var)
+
+ # Step through the data, one element at a time, starting from the 2nd element
+ for i in range(1,(len(var_data)-1)):
+
+ # Calculate the mean of the i-1 and i+1 elements
+ shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+
+ # Check for spike exceeding high threshold
+ if abs(var_data[i]-shoulder_mean) > thresh_high:
+
+ #set corresponding QC value to...
+ nc.variables[current_var+'_quality_control'][i] = flag_high
+
+
+ # Check for spike exceeding low threshold
+ elif abs(var_data[i]-shoulder_mean) > thresh_low:
+
+ # set corresponding QC value to...
+ nc.variables[current_var+'_quality_control'][i] = flag_low
+
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+ except AttributeError:
+ hist = ""
+
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' :spike_test performed on [' + str(target_vars) + '], with spikes greater than '+str(thresh_high)+' flagged as '+str(flag_high)+' and spikes greater than '+str(thresh_low)+' flagged as '+str(flag_low))
+
+ nc.close()
+
+if __name__ == "__main__":
+ # usage is
+ spike_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], thresh_low=float(sys.argv[3]), thresh_high=float(sys.argv[4]), flag_low= float(sys.argv[5]), flag_high= float(sys.argv[6]))
+
+
+
+
+
+
+
From 925101f749a8700c993023a821088918a4522478 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Mon, 2 Mar 2020 15:56:30 +1100
Subject: [PATCH 28/59] Update spike_test.py
---
ocean_dp/qc/spike_test.py | 134 ++++++++++++++++++++++++++------------
1 file changed, 92 insertions(+), 42 deletions(-)
diff --git a/ocean_dp/qc/spike_test.py b/ocean_dp/qc/spike_test.py
index c18c177..154f439 100755
--- a/ocean_dp/qc/spike_test.py
+++ b/ocean_dp/qc/spike_test.py
@@ -50,63 +50,113 @@ def spike_test_files(target_files, target_vars_in=[], thresh_low=2, thresh_high=
def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
- # If target_vars aren't user specified, set it to all the variables of
- # the current_file, removing unwanted variables
- if target_vars_in == []:
+ # If target_vars aren't user specified, set it to all the variables of
+ # the current_file, removing unwanted variables
+ if target_vars_in == []:
+
+ target_vars = list(nc.variables.keys())
+
+ # Remove TIME
+ target_vars.remove('TIME')
+
+ # Remove any quality_control variables
+ qc_vars = [s for s in target_vars if 'quality_control' in s]
+ target_vars = [s for s in target_vars if s not in qc_vars]
+
+ # Remove any variables of single length
+ single_vars = [s for s in target_vars if nc.variables[s].size==1]
+ target_vars = [s for s in target_vars if s not in single_vars]
+
+ print('target_vars are '+' '.join(target_vars))
+
+ else:
+ target_vars = target_vars_in
+
+ # For each variable, extract the data
+ for current_var in target_vars:
+
+ var_data = np.array(nc.variables[current_var])
+
+ print('checking '+current_var+' for high spikes')
+
+ # Step through the data, one element at a time, starting from the 2nd element
+ for i in range(1,(len(var_data)-1)):
- target_vars = list(nc.variables.keys())
+ # Calculate the mean of the i-1 and i+1 elements
+ shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
- # Remove TIME
- target_vars.remove('TIME')
+ # Check for spike exceeding high threshold
+ if abs(var_data[i]-shoulder_mean) > thresh_high:
+
+ print('High spike found')
+
+ #set corresponding QC value to...
+ nc.variables[current_var+'_quality_control'][i] = flag_high
+
+ # # Extract the qc data
+ # current_qc = np.array(nc.variables[current_var+'_quality_control'][:])
+
+ # # Find all the instances of consecutive 4s, and reset them to 0
+ # for i in np.where(current_qc==4)[0][0:-1][np.diff(np.where(current_qc==4)[0])==1]:
+
+ # nc.variables[current_var+'_quality_control'][i:i+2] = 0
+
+ # Find the indices where qc isn't set to 4 (high spike), removing the final element as it can't be check for a spike
+ low_spike_chk_idx = np.where(nc.variables[current_var+'_quality_control'][:]!=4)[0][0:-1]
+
+ #print(low_spike_chk_idx)
+
+ # Remove from the indices those that are either side of a high spike
+ for i in np.where(nc.variables[current_var+'_quality_control'][:]==4)[0]:
+
+ low_spike_chk_idx=low_spike_chk_idx[low_spike_chk_idx!=[i-1]]
- # Remove any quality_control variables
- qc_vars = [s for s in target_vars if 'quality_control' in s]
- target_vars = [s for s in target_vars if s not in qc_vars]
-
- # Remove any variables of single length
- single_vars = [s for s in target_vars if nc.variables[s].size==1]
- target_vars = [s for s in target_vars if s not in single_vars]
+ low_spike_chk_idx=low_spike_chk_idx[low_spike_chk_idx!=[i+1]]
+
+ #print(low_spike_chk_idx)
+
+ print('checking '+current_var+' for low spikes')
+
+ # For each of the remaining indices
+ for i in low_spike_chk_idx:
- print('target_vars are '+' '.join(target_vars))
+ #print('i is '+str(i))
- else:
- target_vars = target_vars_in
+ # Calculate the mean of the i-1 and i+1 elements
+ shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
- # For each variable, extract the data
- for current_var in target_vars:
+ #print('shoulder mean is '+str(shoulder_mean))
- var_data = np.array(nc.variables[current_var])
+ abs_diff = abs(var_data[i]-shoulder_mean)
- print('checking '+current_var)
+ #print('absolute difference is '+str(abs_diff))
- # Step through the data, one element at a time, starting from the 2nd element
- for i in range(1,(len(var_data)-1)):
+ # Check for spike exceeding low threshold
+ if abs(var_data[i]-shoulder_mean) > thresh_low:
- # Calculate the mean of the i-1 and i+1 elements
- shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+ print('Low spike found')
- # Check for spike exceeding high threshold
- if abs(var_data[i]-shoulder_mean) > thresh_high:
-
- #set corresponding QC value to...
- nc.variables[current_var+'_quality_control'][i] = flag_high
+ #set corresponding QC value to...
+ nc.variables[current_var+'_quality_control'][i] = flag_low
+
+ # # Extract the qc data
+ # current_qc = np.array(nc.variables[current_var+'_quality_control'][:])
+
+ # # Find all the instances of consecutive 3s, and reset them to 0
+ # for i in np.where(current_qc==3)[0][0:-1][np.diff(np.where(current_qc==3)[0])==1]:
+
+ # nc.variables[current_var+'_quality_control'][i:i+2] = 0
-
- # Check for spike exceeding low threshold
- elif abs(var_data[i]-shoulder_mean) > thresh_low:
-
- # set corresponding QC value to...
- nc.variables[current_var+'_quality_control'][i] = flag_low
- # update the history attribute
- try:
- hist = nc.history + "\n"
- except AttributeError:
- hist = ""
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+ except AttributeError:
+ hist = ""
- nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' :spike_test performed on [' + str(target_vars) + '], with spikes greater than '+str(thresh_high)+' flagged as '+str(flag_high)+' and spikes greater than '+str(thresh_low)+' flagged as '+str(flag_low))
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' :spike_test performed on [' + str(target_vars) + '], with spikes greater than '+str(thresh_high)+' flagged as '+str(flag_high)+' and spikes greater than '+str(thresh_low)+' flagged as '+str(flag_low))
- nc.close()
+ nc.close()
if __name__ == "__main__":
# usage is
From 62b0419a1fcbb368a152728680bf3586c060d77c Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 3 Mar 2020 12:42:42 +1100
Subject: [PATCH 29/59] Update netcdf_gen.py
Brought -1 outside brackets, noticed that netcdfgen was not producing hourly data when using for rate of change test.
---
ocean_dp/qc/netcdf_gen.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ocean_dp/qc/netcdf_gen.py b/ocean_dp/qc/netcdf_gen.py
index 32cb37c..1b56de8 100755
--- a/ocean_dp/qc/netcdf_gen.py
+++ b/ocean_dp/qc/netcdf_gen.py
@@ -77,7 +77,7 @@ def netcdf_gen(file_name, nominal_depth, *args):
time_var.setncattr('valid_min', 0)
t0 = date2num(datetime(2020, 1, 1), units=time_var.units)
- ds.variables['TIME'][:] = np.linspace(t0,t0 + (1 / 24) * (len(var_data[0]-1)),num=len(var_data[0]))
+ ds.variables['TIME'][:] = np.linspace(t0,t0 + (1 / 24) * (len(var_data[0])-1),num=len(var_data[0]))
# Create the nominal depth variable
nom_depth_var = ds.createVariable("NOMINAL_DEPTH", "f8")
@@ -96,7 +96,8 @@ def netcdf_gen(file_name, nominal_depth, *args):
ds.createVariable(name_in, "f8", ("TIME"))
ds.variables[name_in][:] = data_in
- # read the variable names from the netCDF dataset
+
+ # read the variable names from the netCDF dataset
vars = ds.variables
# create a list of variables, don't include the 'TIME' variable
From cc0589762a76caea2a6334a918883ff77bfeb02b Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 4 Mar 2020 16:32:16 +1100
Subject: [PATCH 30/59] Create rate_of_change_test.py
Haven't yet worked out how to use sys.argv[] with both *args and a keyword argument - line 178
---
ocean_dp/qc/rate_of_change_test.py | 184 +++++++++++++++++++++++++++++
1 file changed, 184 insertions(+)
create mode 100755 ocean_dp/qc/rate_of_change_test.py
diff --git a/ocean_dp/qc/rate_of_change_test.py b/ocean_dp/qc/rate_of_change_test.py
new file mode 100755
index 0000000..c3e1d9b
--- /dev/null
+++ b/ocean_dp/qc/rate_of_change_test.py
@@ -0,0 +1,184 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+# how to specific rate of change? Will the function take rate of change as an
+# argument?
+
+# Need a way of linking the rate of change to each different variable type
+
+# Could max rate of change be something we upload into the netcdf file atts? Or just in history?
+
+# Tell the function how much change you tolerate, and over what period of time - in sec?
+
+# Then convert to match the file timesteps
+
+# If files aren't specified, take all the IMOS*.nc files in the current folder
+def roc_test_all_files(*args,target_vars_in=[]):
+ target_files = glob.glob('IMOS*.nc')
+
+ roc_test_files(target_files, target_vars_in=target_vars_in,*args)
+
+def roc_test_files(target_files,*args,target_vars_in=[]):
+
+ # Loop through each files in target_files
+ for current_file in target_files:
+ # Print each filename
+ print("input file %s" % current_file)
+
+ print(args)
+
+ # Extract netcdf data into nc
+ nc = Dataset(current_file, mode="a")
+
+ # run the spike test - specifying *args here makes python unpack args to be passed again successfully as separate items
+ roc_test(nc,*args, target_vars_in=target_vars_in)
+
+
+# Enter args as variable name and rate of change limit, ie. 'TEMP',4
+def roc_test(nc,*args,target_vars_in=[]):
+
+ # Check the time format
+ if nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ # Convert the args tuple to a list
+ args = list(args)
+
+ # If a single rate of change limit is supplied
+ if len(args) == 1:
+
+ change_per_hr = args[0]
+
+ print('One rate of change limit will be applied to all variables')
+
+ # If target_vars aren't user specified, set it to all the variables of
+ # the current_file, removing unwanted variables
+ if target_vars_in == []:
+
+ target_vars = list(nc.variables.keys())
+
+ # Remove TIME
+ target_vars.remove('TIME')
+
+ # Remove any quality_control variables
+ qc_vars = [s for s in target_vars if 'quality_control' in s]
+ target_vars = [s for s in target_vars if s not in qc_vars]
+
+ # Remove any variables of single length
+ single_vars = [s for s in target_vars if nc.variables[s].size==1]
+ target_vars = [s for s in target_vars if s not in single_vars]
+
+ print('target_vars are '+' '.join(target_vars))
+
+ else:
+ target_vars = target_vars_in
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][:])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # For each variable
+ for current_var in target_vars:
+
+ # Extract the data
+ var_data = np.array(nc.variables[current_var])
+
+ # Calculate dvar/dtime
+ var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
+
+ # For any change greater than change_per_hr, assign a qc value of 4
+ nc.variables[current_var+'_quality_control'][[x for x in abs(np.insert(var_roc,0,0)) > change_per_hr]] = 4
+
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+ except AttributeError:
+ hist = ""
+
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + 'rate of change test performed, with all changes above '+str(change_per_hr)+' flagged as 4')
+
+
+ # If multiple rate of change limits are supplied, with variable names
+ elif len(args) % 2 == 0 and all(isinstance(x,str) for x in args[0::2]) and all(isinstance(y,(float,int)) for y in args[1::2]):
+
+ # Take target variables from args
+ target_vars = args[0::2]
+
+ print('target_vars are '+' '.join(target_vars))
+
+ # Convert arguments to dict
+ rate_spec = dict(zip(args[0::2],args[1::2]))
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][:])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # For each variable
+ for current_var in target_vars:
+
+ # Extract the data
+ var_data = np.array(nc.variables[current_var])
+
+ # Calculate dvar/dtime
+ var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
+
+ # For any change greater than change_per_hr, assign a qc value of 4
+ nc.variables[current_var+'_quality_control'][[x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]]] = 4
+
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+ except AttributeError:
+ hist = ""
+
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ': rate of change test performed, with all changes above those specified in the following list flagged as 4: '+str(args))
+
+
+ else:
+ print('Arguments passed do not match the required format. No roc test performed.')
+
+
+ # If the time format doesn't match IMOS requirements
+ else:
+ print('Time format does not match the required IMOS form of: days since 1950-01-01 00:00:00 UTC')
+
+
+ nc.close()
+
+
+# Not sure how to sys.argv[] with both *args and a keyword argument
+if __name__ == "__main__":
+ # usage is <*args>
+ roc_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], *sys.argv[3:])
+
+
+
+
From 24e5efd24cb3c87d7d5273b437ab6ea7e8a1771b Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 11 Mar 2020 19:56:10 +1100
Subject: [PATCH 31/59] Create temp_diff_histograms.py
---
ocean_dp/qc/temp_diff_histograms.py | 109 ++++++++++++++++++++++++++++
1 file changed, 109 insertions(+)
create mode 100755 ocean_dp/qc/temp_diff_histograms.py
diff --git a/ocean_dp/qc/temp_diff_histograms.py b/ocean_dp/qc/temp_diff_histograms.py
new file mode 100755
index 0000000..2a78a92
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_histograms.py
@@ -0,0 +1,109 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+
+netcdf_files = []
+
+temp_diffs = np.array([])
+
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+ for file,dirx in files,dirs:
+ if file.endswith('.nc'):
+ netcdf_files.append(file)
+ nc = Dataset(os.path.join(dirs, file),mode='r')
+
+ temp_diffs = np.concatenate((temp_diffs,np.diff(np.array(nc.variables['TEMP'][:]))))
+
+ nc.close()
+
+
+print (list_of_files)
+
+
+
+
+
+files = glob.glob('*.nc')
+
+temp_diffs = np.array([])
+
+for current_file in files:
+
+ nc = Dataset(current_file,mode='r')
+
+ temp_diffs = np.concatenate((temp_diffs,np.diff(np.array(nc.variables['TEMP'][:]))))
+
+fig, ax = plt.subplots()
+
+ax.hist(temp_diffs,100,log=True)
+
+
+# use os.walk??? to run in each netcdf folder?? os.scandir()?
+
+
+
+
+
+# sofs75_60m = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4051-60m_END-20190331_C-20200204.nc',mode='r')
+# sofs75_70m = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4052-70m_END-20190331_C-20200204.nc',mode='r')
+# sofs75_75m = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4053-75m_END-20190331_C-20200204.nc',mode='r')
+
+
+# temp_60 = np.array(sofs75_60m.variables['TEMP'][:])
+# temp_70 = np.array(sofs75_70m.variables['TEMP'][:])
+# temp_75 = np.array(sofs75_75m.variables['TEMP'][:])
+
+# label_coords = (0.01, 0.85)
+# label_method = 'axes fraction'
+
+# fig, axs = plt.subplots(3, 1, sharey=True)
+# axs[0].set_title('Temp sensor comparison SOFS7.5')
+
+# axs[0].hist(np.diff(temp_60),bins=100,log=True, histtype='bar', stacked=True)
+# axs[0].set_ylim(bottom=0.1,top=10E5)
+# axs[0].set_xlim(left=-40, right=40)
+# axs[0].annotate('60m',xy=label_coords, xycoords=label_method)
+# axs[0].tick_params(labelbottom=False)
+
+# axs[1].hist(np.diff(temp_70),bins=100,log=True)
+# axs[1].set_ylim(bottom=0.1,top=10E5)
+# axs[1].set_xlim(left=-40, right=40)
+# axs[1].annotate('70m',xy=label_coords, xycoords=label_method)
+# axs[1].tick_params(labelbottom=False)
+
+# axs[2].hist(np.diff(temp_75),bins=100,log=True)
+# axs[2].set_ylim(bottom=0.1,top=10E5)
+# axs[2].set_xlim(left=-40, right=40)
+# axs[2].annotate('75m',xy=label_coords, xycoords=label_method)
+
+
+
+# fig.savefig('test.pdf')
+
+
+
+
+
+
From fd81c7504b9af279448d544fb4b1da1229904d2c Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Mon, 16 Mar 2020 18:05:27 +1100
Subject: [PATCH 32/59] plots for qc testing
---
ocean_dp/qc/agg_temp_plot.py | 104 ++++++++++++
ocean_dp/qc/in_out_water.py | 10 ++
ocean_dp/qc/temp_diff_hist_extra.py | 67 ++++++++
ocean_dp/qc/temp_diff_hist_glob | 121 ++++++++++++++
ocean_dp/qc/temp_diff_hist_glob.py | 119 ++++++++++++++
ocean_dp/qc/temp_diff_histograms.py | 244 +++++++++++++++++++++++++++-
ocean_dp/qc/temp_time_diff_plots.py | 117 +++++++++++++
7 files changed, 773 insertions(+), 9 deletions(-)
create mode 100755 ocean_dp/qc/agg_temp_plot.py
create mode 100755 ocean_dp/qc/temp_diff_hist_extra.py
create mode 100755 ocean_dp/qc/temp_diff_hist_glob
create mode 100755 ocean_dp/qc/temp_diff_hist_glob.py
create mode 100755 ocean_dp/qc/temp_time_diff_plots.py
diff --git a/ocean_dp/qc/agg_temp_plot.py b/ocean_dp/qc/agg_temp_plot.py
new file mode 100755
index 0000000..cb92107
--- /dev/null
+++ b/ocean_dp/qc/agg_temp_plot.py
@@ -0,0 +1,104 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+
+
+x=Dataset('IMOS_ABOS-SOTS_COPSTIP_20180822_SOFS_FV02_SOFS-Aggregate-TEMP_END-20190322_C-20200311.nc',mode='r')
+
+temp = np.array(x.variables['TEMP'][:])
+
+time = np.array(x.variables['TIME'][:])
+
+ins_idx = np.array(x.variables['instrument_index'][:])
+
+fig, ax = plt.subplots(6,5)
+
+ax=ax.flatten()
+
+label_coords = (0.1, 0.8)
+label_method = 'axes fraction'
+
+for i in set(np.array(ins_idx)):
+
+ ax[i].plot(time[ins_idx==i],temp[ins_idx==i])
+
+ ax[i].annotate('S:'+str(i),xy=label_coords, xycoords=label_method,fontsize=8)
+
+i=1
+fig, ax = plt.subplots()
+ax.plot(time[ins_idx==i],temp[ins_idx==i])
+
+
+# Remove bad instruments
+good_vals = [a!=14 and a!=15 for a in ins_idx]
+
+fig, ax = plt.subplots()
+ax.hist(temp[good_vals],21)
+
+sofs75_temp_diffs = np.array([])
+
+good_ins = set(np.array(ins_idx))
+
+good_ins -= {14,15}
+
+for i in good_ins:
+
+ cur_temp = temp[ins_idx==i]
+
+ cur_time = time[ins_idx==i]
+
+ cur_time_hr = cur_time*24
+
+ # Calculate time changes
+ cur_time_hr_diffs = np.diff(cur_time_hr)
+
+ cur_temp_diffs = np.diff(cur_temp)
+
+ # Calculate the rate of change of temperature wrt time
+ cur_dtemp_dtime = np.divide(cur_temp_diffs,cur_time_hr_diffs)
+
+ print('ins '+str(i)+':'+str(np.max(cur_dtemp_dtime)))
+
+ sofs75_temp_diffs = np.concatenate((sofs75_temp_diffs,cur_dtemp_dtime))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/in_out_water.py b/ocean_dp/qc/in_out_water.py
index ac14840..2543c23 100644
--- a/ocean_dp/qc/in_out_water.py
+++ b/ocean_dp/qc/in_out_water.py
@@ -60,6 +60,16 @@ def in_out_water(netCDFfile):
ds.file_version = "Level 1 - Quality Controlled Data"
+
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+
+ except AttributeError:
+ hist = ""
+
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ': in water test performed, with out of water data flagged at QC=7')
+
ds.close()
diff --git a/ocean_dp/qc/temp_diff_hist_extra.py b/ocean_dp/qc/temp_diff_hist_extra.py
new file mode 100755
index 0000000..d8b6019
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_hist_extra.py
@@ -0,0 +1,67 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ if fname.endswith('.nc') and 'FV01' in fname:
+
+ print(fname) #Here, the wanted file name is printed
+
+ ds = Dataset(os.path.join(root,fname), 'a')
+
+ vars = ds.variables
+
+ to_add = []
+ for v in vars:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+
+ time_var = vars["TIME"]
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
+ time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
+
+ print(time_deploy)
+
+ print(to_add)
+ for v in to_add:
+ if "TIME" in vars[v].dimensions:
+
+ if v.endswith("_quality_control"):
+
+ print("QC time dim ", v)
+
+ ncVarOut = vars[v]
+ mask = (time <= time_deploy) | (time >= time_recovery)
+ ncVarOut[mask] = np.ones(vars[v].shape)[mask] * 7
+
+
+ ds.file_version = "Level 1 - Quality Controlled Data"
+
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+
+ except AttributeError:
+ hist = ""
+
+ ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ': in water test performed, with out of water data flagged at QC=7')
+
+
+ ds.close()
\ No newline at end of file
diff --git a/ocean_dp/qc/temp_diff_hist_glob b/ocean_dp/qc/temp_diff_hist_glob
new file mode 100755
index 0000000..a87ca72
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_hist_glob
@@ -0,0 +1,121 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+import glob
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ if ('Pulse' in x) or ('SOFS' in x):
+
+ deployments.append(x)
+
+
+fv01_files = glob.glob("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data/*/*/*FV01*.nc")
+
+
+fig, ax = plt.subplots(4,4,sharex='all', sharey='all')
+
+ax=ax.flatten()
+
+
+for current_deployment, plt_idx in zip(deployments, range(0,16)):
+
+
+
+ for fname in files:
+
+ if fname.find(current_deployment) and fname.endswith('.nc') and 'FV01' in fname:
+
+ print(fname) #Here, the wanted file name is printed
+
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ if 'TEMP_quality_control' in list(nc.variables) and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ # Add the results for this netcdf to the record for all files
+ #all_dtemp_dtime = np.concatenate((all_dtemp_dtime,nc_dtemp_dtime))
+
+ #all_dtemp_dtime_deps += ([nc.deployment_code] * len(nc_dtemp_dtime))
+
+ #netcdffiles.append(fname)
+
+ #mins.append(np.amin(nc_dtemp_dtime))
+
+ #maxs.append(np.amax(nc_dtemp_dtime))
+
+
+
+ nc.close()
+
+ ax[plt_idx].hist(nc_dtemp_dtime,100,log=True)
+
+ ax[plt_idx].set_ylim(bottom=0.1,top=10E5)
+
+ ax[plt_idx].set_xlim(left=-500, right=500)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/temp_diff_hist_glob.py b/ocean_dp/qc/temp_diff_hist_glob.py
new file mode 100755
index 0000000..1e63d14
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_hist_glob.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+import glob
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ if ('Pulse' in x) or ('SOFS' in x):
+
+ deployments.append(x)
+
+
+deployment_dtemp_dtime = np.array([])
+
+fv01_files = glob.glob("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data/*/*/*FV01*.nc")
+
+
+fig, ax = plt.subplots(4,4,sharex='all', sharey='all')
+
+ax=ax.flatten()
+
+
+for current_deployment, plt_idx in zip(deployments, range(0,len(deployments))):
+
+ print(current_deployment + 'files')
+
+ for fname in fv01_files:
+
+ if current_deployment in fname:
+
+ #print(fname + ' contains ' + current_deployment) #Here, the wanted file name is printed
+
+ nc = Dataset(fname, mode = 'r')
+
+ if 'TEMP_quality_control' in list(nc.variables) and np.array(nc.variables['TEMP'][:]).ndim == 1:
+
+ print(fname)
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ # Add the results for this netcdf to the record for the deployment
+ deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+
+ nc.close()
+
+ print('plotting '+ str(len(deployment_dtemp_dtime)) + ' values')
+
+ ax[plt_idx].hist(deployment_dtemp_dtime,100)
+
+ #ax[plt_idx].set_ylim(bottom=0.1,top=10E5)
+
+ #ax[plt_idx].set_xlim(left=-10, right=10)
+
+ deployment_dtemp_dtime = np.array([])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/temp_diff_histograms.py b/ocean_dp/qc/temp_diff_histograms.py
index 2a78a92..2003afc 100755
--- a/ocean_dp/qc/temp_diff_histograms.py
+++ b/ocean_dp/qc/temp_diff_histograms.py
@@ -14,7 +14,8 @@
# along with this program. If not, see .
import numpy.ma as ma
import sys
-from netCDF4 import Dataset
+from netCDF4 import Dataset, num2date
+from dateutil import parser
import numpy as np
import argparse
import glob
@@ -23,26 +24,249 @@
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
+from sigfig import round
-netcdf_files = []
+deployments = []
-temp_diffs = np.array([])
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ if ('Pulse' in x) or ('SOFS' in x):
+
+ deployments.append(x)
+
+
+
+# check for in water test in history of netcdf file, if not perform the test
+
+netcdffiles = []
+
+mins=[]
+
+maxs=[]
+
+all_dtemp_dtime = np.array([])
+
+all_dtemp_dtime_deps = []
for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
- for file,dirx in files,dirs:
- if file.endswith('.nc'):
- netcdf_files.append(file)
- nc = Dataset(os.path.join(dirs, file),mode='r')
+
+ for fname in files:
+
+ if fname.endswith('.nc') and 'FV01' in fname:
+
+ print(fname) #Here, the wanted file name is printed
+
+
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ if 'TEMP_quality_control' in list(nc.variables) and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
- temp_diffs = np.concatenate((temp_diffs,np.diff(np.array(nc.variables['TEMP'][:]))))
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ # Add the results for this netcdf to the record for all files
+ all_dtemp_dtime = np.concatenate((all_dtemp_dtime,nc_dtemp_dtime))
+
+ all_dtemp_dtime_deps += ([nc.deployment_code] * len(nc_dtemp_dtime))
+
+ netcdffiles.append(fname)
+
+ mins.append(np.amin(nc_dtemp_dtime))
+
+ maxs.append(np.amax(nc_dtemp_dtime))
nc.close()
+
+
+fig, ax = plt.subplots()
+
+bins = np.linspace(-450,450,901)
+
+line_thick = 0.5
+
+counts,bins,bars = ax.hist(all_dtemp_dtime,bins,log=True)
+
+ax.axvline(x=3*np.std(all_dtemp_dtime),color='r',linewidth=line_thick)
+
+ax.axvline(x=-3*np.std(all_dtemp_dtime),color='r',linewidth=line_thick)
+
+ax.set_title('Hourly temp changes from all FV01 files in SOTS-TEMP-Raw_Data')
+
+label_coords = (0.01, 0.9)
+
+label_method = 'axes fraction'
+
+ax.annotate('~1.84E7 measurements',xy=label_coords, xycoords=label_method)
+
+
+
+def last_four(entry):
+
+ output = entry[-4::]
+
+ return output
+
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ if ('Pulse' in x) or ('SOFS' in x):
+
+ deployments.append(x)
+
+deployments.sort(key=last_four)
+
+
+
+
+
+
+
+all_deployment_dtemp_dtime = [None] * len(deployments)
+
+for current_deployment, plt_idx in zip(deployments, range(0,len(deployments))):
+
+ print('current deployment is '+current_deployment)
+
+ deployment_dtemp_dtime = np.array([])
+
+ for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+
+ #print(fname) #Here, the wanted file name is printed
+
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ time_var = nc.variables["TIME"]
+
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+
+ time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+
+ #print('using '+fname)
+
+ temp_extract = np.array(nc.variables['TEMP'][:][(time >= time_deploy) | (time <= time_recovery)])
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(temp_extract)
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][:][(time >= time_deploy) | (time <= time_recovery)])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ # Add the results for this netcdf to the record for the deployment
+ deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+
+ all_deployment_dtemp_dtime[plt_idx] = deployment_dtemp_dtime
+
+ nc.close()
+
+
+
+
+
+
+
+
+fig, ax = plt.subplots(4,4)
+
+ax=ax.flatten()
+
+line_thick = 1
+
+label_coords = (0.6, 0.6)
+label_method = 'axes fraction'
+
+for plt_idx,dep_name in zip(range(0,len(deployments)),deployments):
+
+ print('plotting '+ str(len(all_deployment_dtemp_dtime[plt_idx])) + ' values')
+ hist_data = ax[plt_idx].hist(all_deployment_dtemp_dtime[plt_idx],21,log=True)
+
+ ax[plt_idx].set_title(dep_name,fontsize=10)
+
+ #ax[plt_idx].axvline(x=3*np.mean(all_deployment_dtemp_dtime[plt_idx]),color='g',linewidth=line_thick)
+
+ ax[plt_idx].axvline(x=np.mean(all_deployment_dtemp_dtime[plt_idx])+3*np.std(all_deployment_dtemp_dtime[plt_idx]),color='r',linewidth=line_thick)
-print (list_of_files)
+ ax[plt_idx].axvline(x=np.mean(all_deployment_dtemp_dtime[plt_idx])-3*np.std(all_deployment_dtemp_dtime[plt_idx]),color='r',linewidth=line_thick)
+ anno = 'mean = '+str(round(float(np.mean(all_deployment_dtemp_dtime[plt_idx])),sigfigs=3))
+ anno += '\n3SD = ' + str(round(float(3*np.std(all_deployment_dtemp_dtime[plt_idx])),sigfigs=3))
+
+ anno += '\nsamples = ' + str(len(all_deployment_dtemp_dtime[plt_idx]))
+
+ ax[plt_idx].annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
+
+ #ax[plt_idx].set_ylim(bottom=0,top=np.max(hist_data[0]))
+
+ #ax[plt_idx].set_xlim(left=-450, right=450) np.linspace(-450,450,901)
+
+#ax[-1].axis('off')
+
+all_data = np.concatenate(all_deployment_dtemp_dtime)
+
+hist_data = ax[15].hist(all_data,21,log=True)
+
+ax[15].set_title('All data',fontsize=10)
+
+#ax[plt_idx].axvline(x=3*np.mean(all_deployment_dtemp_dtime[plt_idx]),color='g',linewidth=line_thick)
+
+ax[15].axvline(x=np.mean(all_data)+3*np.std(all_data),color='r',linewidth=line_thick)
+
+ax[15].axvline(x=np.mean(all_data)-3*np.std(all_data),color='r',linewidth=line_thick)
+
+anno = 'mean = '+str(round(float(np.mean(all_data)),sigfigs=3))
+
+anno += '\n3SD = ' + str(round(float(3*np.std(all_data)),sigfigs=3))
+
+anno += '\nsamples = ' + str(len(all_data))
+
+ax[15].annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
+
+fig.subplots_adjust(left=0.05,right=0.99,bottom=0.1,top=0.9,wspace=0.15,hspace=0.4)
+
+
+
+
+
+
+
+
+
+
+
+
files = glob.glob('*.nc')
@@ -60,6 +284,8 @@
ax.hist(temp_diffs,100,log=True)
+
+
# use os.walk??? to run in each netcdf folder?? os.scandir()?
diff --git a/ocean_dp/qc/temp_time_diff_plots.py b/ocean_dp/qc/temp_time_diff_plots.py
new file mode 100755
index 0000000..cf42a13
--- /dev/null
+++ b/ocean_dp/qc/temp_time_diff_plots.py
@@ -0,0 +1,117 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+
+
+
+def last_four(entry):
+
+ output = entry[-4::]
+
+ return output
+
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ if ('Pulse' in x) or ('SOFS' in x):
+
+ deployments.append(x)
+
+deployments.sort(key=last_four)
+
+
+fig, ax = plt.subplots(4,4)
+
+ax=ax.flatten()
+
+
+all_deployment_dtemp_dtime = [None] * len(deployments)
+
+for current_deployment, plt_idx in zip(deployments, range(0,len(deployments))):
+
+ print('current deployment is '+current_deployment)
+
+ deployment_dtemp_dtime = np.array([])
+
+ for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+
+ #print(fname) #Here, the wanted file name is printed
+
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ time_var = nc.variables["TIME"]
+
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+
+ time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+
+ #print('using '+fname)
+
+ temp_extract = np.array(nc.variables['TEMP'][:][(time >= time_deploy) | (time <= time_recovery)])
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(temp_extract)
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][:][(time >= time_deploy) | (time <= time_recovery)])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ ax[plt_idx].plot(nc_time,temp_extract)
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ # Add the results for this netcdf to the record for the deployment
+ deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+
+ all_deployment_dtemp_dtime[plt_idx] = deployment_dtemp_dtime
+
+ nc.close()
+
+
+
+
+
+
+
+
From 222024a2dad9ce9e57992abc112c43a968908adc Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 17 Mar 2020 16:41:42 +1100
Subject: [PATCH 33/59] new plotting codes
---
ocean_dp/qc/agg_temp_plot.py | 16 +-
ocean_dp/qc/temp_diff_timeseries_from_fv00.py | 156 ++++++++++++++++++
2 files changed, 171 insertions(+), 1 deletion(-)
create mode 100755 ocean_dp/qc/temp_diff_timeseries_from_fv00.py
diff --git a/ocean_dp/qc/agg_temp_plot.py b/ocean_dp/qc/agg_temp_plot.py
index cb92107..9902493 100755
--- a/ocean_dp/qc/agg_temp_plot.py
+++ b/ocean_dp/qc/agg_temp_plot.py
@@ -45,7 +45,21 @@
for i in set(np.array(ins_idx)):
- ax[i].plot(time[ins_idx==i],temp[ins_idx==i])
+ cur_temp = temp[ins_idx==i]
+
+ cur_time = time[ins_idx==i]
+
+ cur_time_hr = cur_time*24
+
+ # Calculate time changes
+ cur_time_hr_diffs = np.diff(cur_time_hr)
+
+ cur_temp_diffs = np.diff(cur_temp)
+
+ # Calculate the rate of change of temperature wrt time
+ cur_dtemp_dtime = np.divide(cur_temp_diffs,cur_time_hr_diffs)
+
+ ax[i].scatter(cur_time,cur_temp,s=1,c=np.concatenate((np.array([0]),cur_dtemp_dtime)),cmap='cool')
ax[i].annotate('S:'+str(i),xy=label_coords, xycoords=label_method,fontsize=8)
diff --git a/ocean_dp/qc/temp_diff_timeseries_from_fv00.py b/ocean_dp/qc/temp_diff_timeseries_from_fv00.py
new file mode 100755
index 0000000..b21a605
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_timeseries_from_fv00.py
@@ -0,0 +1,156 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+
+def last_four(entry):
+
+ output = entry[-4::]
+
+ return output
+
+
+def sp_layout(num_in):
+
+ sp_nums = np.array([1,2,4,6,9,12,16,20,25,30])
+
+ sp_dict={1:[1,1],2:[2,1],4:[2,2],6:[3,2],9:[3,3],12:[4,3],16:[4,4],20:[5,4],25:[5,5],30:[6,5]}
+
+ return sp_dict[sp_nums[np.where(num_in<=sp_nums)[0][0]]]
+
+
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ if ('Pulse' in x) or ('SOFS' in x):
+
+
+ deployments.append(x)
+
+deployments.sort(key=last_four)
+
+
+
+
+
+
+for current_deployment in deployments:
+
+ acceptable_files = []
+
+ print('current deployment is '+current_deployment)
+
+ deployment_dtemp_dtime = np.array([])
+
+ for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ print('checking '+fname)
+
+ if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+
+ print('opening '+fname)
+
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ acceptable_files.append(fname)
+
+ print(fname+' accepted')
+
+ nc.close()
+
+ fig, ax = plt.subplots(sp_layout(len(acceptable_files))[0],sp_layout(len(acceptable_files))[1])
+
+ ax=ax.flatten()
+
+ for fname,f_idx in zip(acceptable_files, range(0,len(acceptable_files))):
+
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ time_var = nc.variables["TIME"]
+
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+
+ time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+
+ #print('using '+fname)
+
+ temp_extract = np.array(nc.variables['TEMP'][:][(time > time_deploy) | (time < time_recovery)])
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(temp_extract)
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][:][(time >= time_deploy) | (time <= time_recovery)])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ ax[f_idx].scatter(nc_time,nc_dtemp_dtime)
+
+ # Add the results for this netcdf to the record for the deployment
+ deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+
+ nc.close()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From d0f4a8e76dd62fd049b4e27061c181365e0d97d2 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 24 Mar 2020 17:31:02 +1100
Subject: [PATCH 34/59] various bits
---
ocean_dp/qc/agg_temp_plot.py | 23 ++-
ocean_dp/qc/arg_tester.py | 38 ++++
ocean_dp/qc/rate_of_change_test.py | 4 +
ocean_dp/qc/spike_test.py | 20 +-
ocean_dp/qc/spike_test_ver_2.py | 149 ++++++++++++++
ocean_dp/qc/temp_diff_timeseries_from_fv00.py | 183 ++++++++++++++----
6 files changed, 373 insertions(+), 44 deletions(-)
create mode 100755 ocean_dp/qc/arg_tester.py
create mode 100755 ocean_dp/qc/spike_test_ver_2.py
diff --git a/ocean_dp/qc/agg_temp_plot.py b/ocean_dp/qc/agg_temp_plot.py
index 9902493..71ab427 100755
--- a/ocean_dp/qc/agg_temp_plot.py
+++ b/ocean_dp/qc/agg_temp_plot.py
@@ -43,6 +43,17 @@
label_coords = (0.1, 0.8)
label_method = 'axes fraction'
+# cmap = colors.ListedColormap(['black','green','blue','red','orange'])
+
+# boundaries = [0,5,10,20,40,80]
+
+cmap = colors.ListedColormap(['blue','orange','red'])
+
+boundaries = [0,20,30,500]
+
+norm = colors.BoundaryNorm(boundaries, cmap.N, clip=True)
+
+
for i in set(np.array(ins_idx)):
cur_temp = temp[ins_idx==i]
@@ -59,9 +70,17 @@
# Calculate the rate of change of temperature wrt time
cur_dtemp_dtime = np.divide(cur_temp_diffs,cur_time_hr_diffs)
- ax[i].scatter(cur_time,cur_temp,s=1,c=np.concatenate((np.array([0]),cur_dtemp_dtime)),cmap='cool')
+ im = ax[i].scatter(cur_time,cur_temp,s=1,c=np.concatenate((np.array([0]),np.abs(cur_dtemp_dtime))),cmap=cmap,norm=norm)
+
+ #ax[i].set_title(,fontsize=10)
+
+ ax[i].annotate('Ins:'+str(i),xy=label_coords, xycoords=label_method,fontsize=8)
+
+ if i==27:
+ fig.colorbar(im)
+
- ax[i].annotate('S:'+str(i),xy=label_coords, xycoords=label_method,fontsize=8)
+fig.colorbar(cmap)
i=1
fig, ax = plt.subplots()
diff --git a/ocean_dp/qc/arg_tester.py b/ocean_dp/qc/arg_tester.py
new file mode 100755
index 0000000..2bfe7b4
--- /dev/null
+++ b/ocean_dp/qc/arg_tester.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+def learn_arg_sys(text_in):
+
+ print(text_in)
+
+# USE runfile('arg_tester.py', args='bing')
+
+
+#if __name__ == "__main__":
+ # usage is <*args>
+learn_arg_sys(text_in=sys.argv[1])
\ No newline at end of file
diff --git a/ocean_dp/qc/rate_of_change_test.py b/ocean_dp/qc/rate_of_change_test.py
index c3e1d9b..f4e187b 100755
--- a/ocean_dp/qc/rate_of_change_test.py
+++ b/ocean_dp/qc/rate_of_change_test.py
@@ -114,6 +114,8 @@ def roc_test(nc,*args,target_vars_in=[]):
# For any change greater than change_per_hr, assign a qc value of 4
nc.variables[current_var+'_quality_control'][[x for x in abs(np.insert(var_roc,0,0)) > change_per_hr]] = 4
+
+ print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > change_per_hr])) + ' changes found above '+str(change_per_hr)+' '+nc.variables[current_var].units+' per hour')
# update the history attribute
try:
@@ -152,6 +154,8 @@ def roc_test(nc,*args,target_vars_in=[]):
# For any change greater than change_per_hr, assign a qc value of 4
nc.variables[current_var+'_quality_control'][[x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]]] = 4
+
+ print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]])) + ' changes found above '+str(rate_spec[current_var])+' '+nc.variables[current_var].units+' per hour')
# update the history attribute
try:
diff --git a/ocean_dp/qc/spike_test.py b/ocean_dp/qc/spike_test.py
index 154f439..a061c32 100755
--- a/ocean_dp/qc/spike_test.py
+++ b/ocean_dp/qc/spike_test.py
@@ -28,13 +28,13 @@
# If files aren't specified, take all the IMOS*.nc files in the current folder
-def spike_test_all_files(target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+def spike_test_all_files(target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3, flag_high=4):
target_files = glob.glob('IMOS*.nc')
spike_test_files(target_files, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
-def spike_test_files(target_files, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+def spike_test_files(target_files, target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3, flag_high=4):
# Loop through each files in target_files
for current_file in target_files:
@@ -48,7 +48,7 @@ def spike_test_files(target_files, target_vars_in=[], thresh_low=2, thresh_high=
spike_test(nc=nc, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
-def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+def spike_test(nc, target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3, flag_high=4):
# If target_vars aren't user specified, set it to all the variables of
# the current_file, removing unwanted variables
@@ -77,6 +77,8 @@ def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, f
var_data = np.array(nc.variables[current_var])
+
+
print('checking '+current_var+' for high spikes')
# Step through the data, one element at a time, starting from the 2nd element
@@ -85,8 +87,11 @@ def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, f
# Calculate the mean of the i-1 and i+1 elements
shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+ # Calculate the step changes
+ shoulder_diff = np.diff(var_data[i-1:i+2])
+
# Check for spike exceeding high threshold
- if abs(var_data[i]-shoulder_mean) > thresh_high:
+ if (abs(var_data[i]-shoulder_mean) > thresh_high) & (True in (shoulder_diff>0)) & (True in (shoulder_diff<0)):# & (1.25*abs(shoulder_diff[0]) >= abs(x[1]) >= 0.75*abs(shoulder_diff[0])):
print('High spike found')
@@ -125,14 +130,17 @@ def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, f
# Calculate the mean of the i-1 and i+1 elements
shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+ # Calculate the step changes
+ shoulder_diff = np.diff(var_data[i-1:i+2])
+
#print('shoulder mean is '+str(shoulder_mean))
- abs_diff = abs(var_data[i]-shoulder_mean)
+ #abs_diff = abs(var_data[i]-shoulder_mean)
#print('absolute difference is '+str(abs_diff))
# Check for spike exceeding low threshold
- if abs(var_data[i]-shoulder_mean) > thresh_low:
+ if (abs(var_data[i]-shoulder_mean) > thresh_low) & (True in (shoulder_diff>0)) & (True in (shoulder_diff<0)): #& (1.25*abs(shoulder_diff[0]) >= abs(x[1]) >= 0.75*abs(shoulder_diff[0])):
print('Low spike found')
diff --git a/ocean_dp/qc/spike_test_ver_2.py b/ocean_dp/qc/spike_test_ver_2.py
new file mode 100755
index 0000000..68fe69e
--- /dev/null
+++ b/ocean_dp/qc/spike_test_ver_2.py
@@ -0,0 +1,149 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+# If files aren't specified, take all the IMOS*.nc files in the current folder
+def spike_test_all_files(target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+ target_files = glob.glob('IMOS*.nc')
+
+ spike_test_files(target_files, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
+
+
+def spike_test_files(target_files, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+
+ # Loop through each files in target_files
+ for current_file in target_files:
+ # Print each filename
+ print("input file %s" % current_file)
+
+ # Extract netcdf data into nc
+ nc = Dataset(current_file, mode="a")
+
+ # run the spike test
+ spike_test(nc=nc, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
+
+
+
+def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
+
+ # If target_vars aren't user specified, set it to all the variables of
+ # the current_file, removing unwanted variables
+ if target_vars_in == []:
+
+ target_vars = list(nc.variables.keys())
+
+ # Remove TIME
+ target_vars.remove('TIME')
+
+ # Remove any quality_control variables
+ qc_vars = [s for s in target_vars if 'quality_control' in s]
+ target_vars = [s for s in target_vars if s not in qc_vars]
+
+ # Remove any variables of single length
+ single_vars = [s for s in target_vars if nc.variables[s].size==1]
+ target_vars = [s for s in target_vars if s not in single_vars]
+
+ print('target_vars are '+' '.join(target_vars))
+
+ else:
+ target_vars = target_vars_in
+
+ # For each variable, extract the data
+ for current_var in target_vars:
+
+ var_data = np.array(nc.variables[current_var])
+
+ print('checking '+current_var+' for high spikes')
+
+ # Step through the data, one element at a time, starting from the 2nd element
+ for i in range(1,(len(var_data)-1)):
+
+ # Calculate the mean of the i-1 and i+1 elements
+ shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+
+ # Calculate the step changes
+ shoulder_diff = np.diff(var_data[i-1:i+2])
+
+ # Check for spike exceeding high threshold
+ if (abs(var_data[i]-shoulder_mean) > thresh_high) & (True in (shoulder_diff>=0)) & (False in (shoulder_diff>=0)):
+
+ print('High spike found')
+
+ #set corresponding QC value to...
+ nc.variables[current_var+'_quality_control'][i] = flag_high
+
+ print('checking '+current_var+' for low spikes')
+
+ # For each of the remaining indices
+ for i in low_spike_chk_idx:
+
+ #print('i is '+str(i))
+
+ # Calculate the mean of the i-1 and i+1 elements
+ shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+
+ # Calculate the step changes
+ shoulder_diff = np.diff(var_data[i-1:i+2])
+
+ #print('shoulder mean is '+str(shoulder_mean))
+
+ abs_diff = abs(var_data[i]-shoulder_mean)
+
+ #print('absolute difference is '+str(abs_diff))
+
+ # Check for spike exceeding low threshold
+ if (abs(var_data[i]-shoulder_mean) > thresh_low) & (True in (shoulder_diff>=0)) & (False in (shoulder_diff>=0)):
+
+ print('Low spike found')
+
+ #set corresponding QC value to...
+ nc.variables[current_var+'_quality_control'][i] = flag_low
+
+ # # Extract the qc data
+ # current_qc = np.array(nc.variables[current_var+'_quality_control'][:])
+
+ # # Find all the instances of consecutive 3s, and reset them to 0
+ # for i in np.where(current_qc==3)[0][0:-1][np.diff(np.where(current_qc==3)[0])==1]:
+
+ # nc.variables[current_var+'_quality_control'][i:i+2] = 0
+
+
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+ except AttributeError:
+ hist = ""
+
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' :spike_test performed on [' + str(target_vars) + '], with spikes greater than '+str(thresh_high)+' flagged as '+str(flag_high)+' and spikes greater than '+str(thresh_low)+' flagged as '+str(flag_low))
+
+ nc.close()
+
+if __name__ == "__main__":
+ # usage is
+ spike_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], thresh_low=float(sys.argv[3]), thresh_high=float(sys.argv[4]), flag_low= float(sys.argv[5]), flag_high= float(sys.argv[6]))
+
+
\ No newline at end of file
diff --git a/ocean_dp/qc/temp_diff_timeseries_from_fv00.py b/ocean_dp/qc/temp_diff_timeseries_from_fv00.py
index b21a605..9a38ccf 100755
--- a/ocean_dp/qc/temp_diff_timeseries_from_fv00.py
+++ b/ocean_dp/qc/temp_diff_timeseries_from_fv00.py
@@ -44,11 +44,19 @@ def sp_layout(num_in):
+cmap = colors.ListedColormap(['blue','orange','red'])
+
+boundaries = [0,20,30,2000]
+
+norm = colors.BoundaryNorm(boundaries, cmap.N, clip=True)
+
+
+
deployments = []
-for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+for x in next(os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"))[1]:
- if ('Pulse' in x) or ('SOFS' in x):
+ if (('Pulse' in x) or ('SOFS' in x)):
deployments.append(x)
@@ -64,6 +72,8 @@ def sp_layout(num_in):
acceptable_files = []
+ acceptable_depths = []
+
print('current deployment is '+current_deployment)
deployment_dtemp_dtime = np.array([])
@@ -82,57 +92,158 @@ def sp_layout(num_in):
if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
- acceptable_files.append(fname)
+ acceptable_files.append(os.path.join(root,fname))
+
+ acceptable_depths.append(nc.instrument_nominal_depth)
print(fname+' accepted')
nc.close()
-
- fig, ax = plt.subplots(sp_layout(len(acceptable_files))[0],sp_layout(len(acceptable_files))[1])
-
- ax=ax.flatten()
- for fname,f_idx in zip(acceptable_files, range(0,len(acceptable_files))):
+
+ acceptable_files = [x for _,x in sorted(zip(acceptable_depths,acceptable_files))]
- nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ fig, ax = plt.subplots(sp_layout(len(acceptable_files))[0],sp_layout(len(acceptable_files))[1],figsize=((12,8)),constrained_layout=True,sharex=True)
+
+ ax=ax.flatten()
+
+ fig.suptitle(current_deployment, fontsize=14)
+
- time_var = nc.variables["TIME"]
-
- time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
-
- time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
-
- time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+ for fpath,f_idx in zip(acceptable_files, range(0,len(acceptable_files))):
- #print('using '+fname)
+ nc = Dataset(fpath, mode = 'r') #mismatching files and folders?? looking for sofs7.5 file in pulse 7 folder!?!?
- temp_extract = np.array(nc.variables['TEMP'][:][(time > time_deploy) | (time < time_recovery)])
-
- # Calculate temperature changes
- nc_temp_diffs = np.diff(temp_extract)
-
- # Extract the time data
- nc_time = np.array(nc.variables['TIME'][:][(time >= time_deploy) | (time <= time_recovery)])
+ time_var = nc.variables["TIME"]
- # Convert from days to hours
- nc_time_hr = nc_time*24
-
- # Calculate time changes
- nc_time_hr_diffs = np.diff(nc_time_hr)
-
- # Calculate the rate of change of temperature wrt time
- nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+
+ time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+
+ #print('using '+fname)
+
+ temp_extract = np.array(nc.variables['TEMP'][:][(time > time_deploy) & (time < time_recovery)])
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(temp_extract)
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][:][(time > time_deploy) & (time < time_recovery)])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ im=ax[f_idx].scatter(nc_time,temp_extract,s=0.2,c=np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime))),cmap=cmap,norm=norm)
+
+ im=ax[f_idx].scatter(nc_time[np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))>20],temp_extract[np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))>20],s=0.5,c=np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))[np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))>20],cmap=cmap,norm=norm)
+
+ ax[f_idx].set_title(str(nc.instrument_nominal_depth)+'m',fontsize=10)
+
+ # Add the results for this netcdf to the record for the deployment
+ deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+
+ nc.close()
+
+ if f_idx==0:
- ax[f_idx].scatter(nc_time,nc_dtemp_dtime)
+ fig.colorbar(im)
- # Add the results for this netcdf to the record for the deployment
- deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+ for f_idx in range(len(acceptable_files),len(ax)):
- nc.close()
+ ax[f_idx].set_axis_off()
+##############################################################################
+fig, ax = plt.subplots(sp_layout(len(deployments))[0],sp_layout(len(deployments))[1],figsize=((12,8)),constrained_layout=True,sharex=False)
+ax=ax.flatten()
+
+for current_deployment,d_idx in zip(deployments,range(0,len(deployments))):
+
+ acceptable_files = []
+
+ acceptable_depths = []
+
+ print('current deployment is '+current_deployment)
+
+ deployment_dtemp_dtime = np.array([])
+
+ for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ print('checking '+fname)
+
+ if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+
+ print('opening '+fname)
+
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ acceptable_files.append(os.path.join(root,fname))
+
+ acceptable_depths.append(nc.instrument_nominal_depth)
+
+ print(fname+' accepted')
+
+ nc.close()
+
+
+ acceptable_files = [x for _,x in sorted(zip(acceptable_depths,acceptable_files))]
+
+
+ for fpath in acceptable_files:
+
+ nc = Dataset(fpath, mode = 'r') #mismatching files and folders?? looking for sofs7.5 file in pulse 7 folder!?!?
+
+ time_var = nc.variables["TIME"]
+
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+
+ time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+
+ #print('using '+fname)
+
+ temp_extract = np.array(nc.variables['TEMP'][:][(time > time_deploy) & (time < time_recovery)])
+
+ # Calculate temperature changes
+ nc_temp_diffs = np.diff(temp_extract)
+
+ # Extract the time data
+ nc_time = np.array(nc.variables['TIME'][:][(time > time_deploy) & (time < time_recovery)])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # Calculate the rate of change of temperature wrt time
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+ # Add the results for this netcdf to the record for the deployment
+ deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+
+ nc.close()
+
+ ax[d_idx].hist(deployment_dtemp_dtime,21,log=True)
+
+ ax[d_idx].set_title(current_deployment,fontsize=10)
+
From 5b390327300e528da818f7b281746b248a63f407 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 21 Apr 2020 17:15:06 +1000
Subject: [PATCH 35/59] QC and preprocessing
---
ocean_dp/aggregation/copyDataset.py | 73 ++++-----
ocean_dp/processing/pressure_interpolator.py | 7 +-
ocean_dp/qc/flatline_test.py | 27 +++-
ocean_dp/qc/global_range.py | 69 +++++++--
ocean_dp/qc/in_out_water.py | 66 ++++----
ocean_dp/qc/rate_of_change_test.py | 43 +++++-
ocean_dp/qc/spike_test.py | 64 ++++----
ocean_dp/qc/spike_test_ver_2.py | 149 -------------------
ocean_dp/sots_processing_runthrough.py | 74 +++++++++
9 files changed, 306 insertions(+), 266 deletions(-)
delete mode 100755 ocean_dp/qc/spike_test_ver_2.py
create mode 100755 ocean_dp/sots_processing_runthrough.py
diff --git a/ocean_dp/aggregation/copyDataset.py b/ocean_dp/aggregation/copyDataset.py
index 95c5998..54eb960 100644
--- a/ocean_dp/aggregation/copyDataset.py
+++ b/ocean_dp/aggregation/copyDataset.py
@@ -38,7 +38,10 @@ def aggregate(files, varNames):
# look over all files, create a time array from all files
# TODO: maybe delete files here without variables we're not interested in
# TODO: Create set of variables in all files
-
+ if not isinstance(varNames, list):
+
+ varNames = [varNames]
+
filen = 0
for path_file in files:
@@ -351,56 +354,56 @@ def aggregate(files, varNames):
return outputName
-def collect_vars_to_agg(files):
+# def collect_vars_to_agg(files):
- var_list = []
+# var_list = []
- nc = Dataset(files[0])
- varList = nc.variables
+# nc = Dataset(files[0])
+# varList = nc.variables
- # default to all variables in first file should no variable be specified
- var_list.extend(varList.keys())
- var_list.remove("TIME")
+# # default to all variables in first file should no variable be specified
+# var_list.extend(varList.keys())
+# var_list.remove("TIME")
- nc.close()
+# nc.close()
- print("collect_vars_to_agg::", var_list)
+# print("collect_vars_to_agg::", var_list)
- return var_list
+# return var_list
-if __name__ == "__main__":
+# if __name__ == "__main__":
- files = []
- varToAgg = None # defaults to all in first file
+# files = []
+# varToAgg = None # defaults to all in first file
- if len(sys.argv) > 1:
- parser = argparse.ArgumentParser()
- parser.add_argument('-v', action='append', dest='var', help='variable to include in output file (defaults to all)')
- parser.add_argument('-f', dest='filelist', help='read file names from file')
- parser.add_argument('file', nargs='*', help='input file name')
- args = parser.parse_args()
+# if len(sys.argv) > 1:
+# parser = argparse.ArgumentParser()
+# parser.add_argument('-v', action='append', dest='var', help='variable to include in output file (defaults to all)')
+# parser.add_argument('-f', dest='filelist', help='read file names from file')
+# parser.add_argument('file', nargs='*', help='input file name')
+# args = parser.parse_args()
- if not isinstance(args.filelist, type(None)):
- with open(args.filelist, "r") as ins:
- for line in ins:
- print(line)
- files.append(line.strip())
+# if not isinstance(args.filelist, type(None)):
+# with open(args.filelist, "r") as ins:
+# for line in ins:
+# print(line)
+# files.append(line.strip())
- if len(args.file):
- # files = args.file
- for fn in args.file:
- files.extend(glob.glob(fn))
+# if len(args.file):
+# # files = args.file
+# for fn in args.file:
+# files.extend(glob.glob(fn))
- varToAgg = args.var
+# varToAgg = args.var
- if isinstance(varToAgg, type(None)):
- varToAgg = collect_vars_to_agg(files)
+# if isinstance(varToAgg, type(None)):
+# varToAgg = collect_vars_to_agg(files)
- print("Aggregating variables ", varToAgg)
+# print("Aggregating variables ", varToAgg)
- outputName = aggregate(files, varToAgg)
+# outputName = aggregate(files, varToAgg)
- print("Output file : %s" % outputName)
+# print("Output file : %s" % outputName)
diff --git a/ocean_dp/processing/pressure_interpolator.py b/ocean_dp/processing/pressure_interpolator.py
index af248c2..0473144 100755
--- a/ocean_dp/processing/pressure_interpolator.py
+++ b/ocean_dp/processing/pressure_interpolator.py
@@ -26,6 +26,8 @@
def pressure_interpolator(netCDFfiles = [],agg = []):
+ files_out = []
+
if netCDFfiles==[]:
print('netcdffiles = none')
@@ -65,6 +67,8 @@ def pressure_interpolator(netCDFfiles = [],agg = []):
# a copy of the old file with the new name
if fn_new != fn:
+ files_out.append(fn_new)
+
print('copying file')
# copy file
shutil.copy(fn, fn_new)
@@ -254,7 +258,8 @@ def pressure_interpolator(netCDFfiles = [],agg = []):
fv01_contents.close()
agg.close()
-
+
+ return files_out
diff --git a/ocean_dp/qc/flatline_test.py b/ocean_dp/qc/flatline_test.py
index e211cf4..a9e7d23 100755
--- a/ocean_dp/qc/flatline_test.py
+++ b/ocean_dp/qc/flatline_test.py
@@ -28,13 +28,13 @@
# If files aren't specified, take all the IMOS*.nc files in the current folder
-def flatline_test_all_files(target_vars_in=[], window=3, flag=3):
+def flatline_test_all_files(target_vars_in=[], window=3, flag=4):
target_files = glob.glob('IMOS*.nc')
flatline_test_files(target_files, target_vars_in=target_vars_in, window=window, flag=flag)
-def flatline_test_files(target_files, target_vars_in=[], window=3, flag=3):
+def flatline_test_files(target_files, target_vars_in=[], window=3, flag=4):
# Loop through each files in target_files
for current_file in target_files:
@@ -48,7 +48,7 @@ def flatline_test_files(target_files, target_vars_in=[], window=3, flag=3):
flatline_test(nc=nc, target_vars_in=target_vars_in, window=window, flag=flag)
-def flatline_test(nc, target_vars_in=[], window=3, flag=3):
+def flatline_test(nc, target_vars_in=[], window=3, flag=4):
# If target_vars aren't user specified, set it to all the variables of
# the current_file, removing unwanted variables
@@ -75,6 +75,21 @@ def flatline_test(nc, target_vars_in=[], window=3, flag=3):
# For each variable, extract the data
for current_var in target_vars:
+ # Extract the variable
+ nc_var = nc.variables[current_var]
+
+ if nc_var.name + "_quality_control_flt" in nc.variables:
+ ncVarOut = nc.variables[nc_var.name + "_quality_control_flt"]
+ else:
+ ncVarOut = nc.createVariable(nc_var.name + "_quality_control_flt", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut.long_name = "quality flag for " + nc_var.name
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ # add new variable to list of aux variables
+ nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_flt"
+
var_data = np.array(nc.variables[current_var])
print('checking '+current_var)
@@ -86,13 +101,17 @@ def flatline_test(nc, target_vars_in=[], window=3, flag=3):
if len(set(var_data[i:(i+window)])) == 1:
# set corresponding QC value to...
- nc.variables[current_var+'_quality_control'][i:(i+window)] = flag
+ nc.variables[current_var+'_quality_control_flt'][i:(i+window)] = flag
+
+ nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_flt"][:],nc.variables[current_var + "_quality_control"][:])
# update the history attribute
try:
hist = nc.history + "\n"
except AttributeError:
hist = ""
+
+
nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + 'flatline_test performed on [' + str(target_vars) + '], window '+str(window)+' consecutive values or more were flagged with '+str(flag) )
diff --git a/ocean_dp/qc/global_range.py b/ocean_dp/qc/global_range.py
index e11bf40..68e2ee0 100644
--- a/ocean_dp/qc/global_range.py
+++ b/ocean_dp/qc/global_range.py
@@ -28,29 +28,71 @@
# flag 4 (bad) when out of global range
-def global_range(netCDFfile, variable, max, min):
+def global_range(netCDFfile, variable, max, min, qc_value=4):
ds = Dataset(netCDFfile, 'a')
- var = ds.variables[variable]
+ nc_var = ds.variables[variable]
+ var_data = nc_var[:]
+ var_data.mask = False
try:
- var_qc = ds.variables[variable + "_quality_control"]
+ # find the existing quality_control variable in the auxillary variables list
+ aux_vars = nc_var.ancillary_variables
+ aux_var = aux_vars.split(" ")
+ qc_vars = [i for i in aux_var if i.endswith("_quality_control")]
+ qc_var = qc_vars[0]
+ print("QC var name ", qc_var)
+ var_qc = ds.variables[qc_var]
except KeyError:
print("no QC variable found")
return None
+ # read existing quality_control flags
+ qc = var_qc[:]
+
# this is where the actual QC test is done
- mask = ((var[:] > max) | (var[:] < min))
+ mask = ((var_data > max) | (var_data < min))
+ print('mask data ', mask)
+
+ # create a qc variable just for this test flags
+ if nc_var.name + "_quality_control_gr" in ds.variables:
+ ncVarOut = ds.variables[nc_var.name + "_quality_control_gr"]
+ else:
+ ncVarOut = ds.createVariable(nc_var.name + "_quality_control_gr", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut.long_name = "quality flag for " + nc_var.name
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ # add new variable to list of aux variables
+ nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_gr"
+
+ # store the qc flags
+ ncVarOut[mask] = qc_value
- mask = mask & (var_qc[:] < 1) # only mark data that has not been QCd already
+ # store qc flags to main quality_control flags variable
+ mask = mask & (qc < 1) # only mark data that has not been QCd already
+ print('mask other qc ', mask)
- var_qc[mask] = 4
- count = sum(mask)
- print('marked records ', count)
+ qc[mask] = qc_value # mark the out of range points with bad_data
+
+ # calculate the number of points marked as bad_data
+ marked = np.zeros_like(qc)
+ marked[mask] = 1
+ count = sum(marked)
+ print('marked records ', count, mask, qc)
+
+ # write flags back to main QC variable
+ var_qc[:] = qc
# update the history attribute
- history = ds.history
- ds.setncattr("history", history + "\n" + datetime.utcnow().strftime("%Y-%m-%d") + " " + variable + " global range min = " + str(min) + " max = " + str(max) + " marked " + str(count))
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
+ ds.setncattr("history", hist + datetime.utcnow().strftime("%Y-%m-%d") + " " + variable + " global range min = " + str(min) + " max = " + str(max) + " marked " + str(count))
+
+ ds.variables[variable + "_quality_control"][:] = np.maximum(ds.variables[variable + "_quality_control_gr"][:],ds.variables[variable + "_quality_control"][:])
ds.close()
@@ -59,5 +101,8 @@ def global_range(netCDFfile, variable, max, min):
if __name__ == "__main__":
- # usage is
- global_range(sys.argv[1], sys.argv[2], float(sys.argv[3]), float(sys.argv[4]))
+ # usage is
+ if len(sys.argv) > 5:
+ global_range(sys.argv[1], sys.argv[2], max=float(sys.argv[3]), min=float(sys.argv[4]), qc_value=int(sys.argv[5]))
+ else:
+ global_range(sys.argv[1], sys.argv[2], max=float(sys.argv[3]), min=float(sys.argv[4]))
\ No newline at end of file
diff --git a/ocean_dp/qc/in_out_water.py b/ocean_dp/qc/in_out_water.py
index 2543c23..e8635ed 100644
--- a/ocean_dp/qc/in_out_water.py
+++ b/ocean_dp/qc/in_out_water.py
@@ -27,49 +27,56 @@
# flag out of water as QC value 7 (not_deployed), with wise leave as 0
-def in_out_water(netCDFfile):
+def in_out_water(netCDFfile, var_name=None):
ds = Dataset(netCDFfile, 'a')
- vars = ds.variables
-
+ nc_vars = ds.variables
to_add = []
- for v in vars:
- #print (vars[v].dimensions)
- if v != 'TIME':
- to_add.append(v)
-
- time_var = vars["TIME"]
+ if var_name:
+ to_add.append(var_name)
+ else:
+ for v in nc_vars:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+
+ time_var = nc_vars["TIME"]
time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
- print(time_deploy)
+ print('deployment time', time_deploy)
print(to_add)
- for v in to_add:
- if "TIME" in vars[v].dimensions:
- if v.endswith("_quality_control"):
+ # create a mask for the time range
+ mask = (time <= time_deploy) | (time >= time_recovery)
+ for v in to_add:
+ if "TIME" in nc_vars[v].dimensions:
+ if v.endswith("_quality_control"):
print("QC time dim ", v)
- ncVarOut = vars[v]
- mask = (time <= time_deploy) | (time >= time_recovery)
- ncVarOut[mask] = np.ones(vars[v].shape)[mask] * 7
-
+ ncVarOut = nc_vars[v]
+ ncVarOut[mask] = 7
+ else:
+ # create a qc variable just for this test flags
+ if v + "_quality_control_io" in ds.variables:
+ ncVarOut = ds.variables[v + "_quality_control_io"]
+ else:
+ ncVarOut = ds.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_vars[v].shape)
+ ncVarOut.long_name = "quality flag for " + v
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+ ncVarOut[mask] = 7
+
+ ds.variables[v + "_quality_control"][:] = np.maximum(ds.variables[v + "_quality_control_io"][:],ds.variables[v + "_quality_control"][:])
ds.file_version = "Level 1 - Quality Controlled Data"
-
- # update the history attribute
- try:
- hist = nc.history + "\n"
-
- except AttributeError:
- hist = ""
-
- nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ': in water test performed, with out of water data flagged at QC=7')
-
ds.close()
@@ -77,4 +84,7 @@ def in_out_water(netCDFfile):
if __name__ == "__main__":
- in_out_water(sys.argv[1])
+ if len(sys.argv) > 2 & sys.argv[1].startswith('-'):
+ in_out_water(sys.argv[2], var_name=sys.argv[1][1:])
+ else:
+ in_out_water(sys.argv[1])
\ No newline at end of file
diff --git a/ocean_dp/qc/rate_of_change_test.py b/ocean_dp/qc/rate_of_change_test.py
index f4e187b..d889b1f 100755
--- a/ocean_dp/qc/rate_of_change_test.py
+++ b/ocean_dp/qc/rate_of_change_test.py
@@ -106,14 +106,31 @@ def roc_test(nc,*args,target_vars_in=[]):
# For each variable
for current_var in target_vars:
- # Extract the data
- var_data = np.array(nc.variables[current_var])
+ # Extract the variable
+ nc_var = nc.variables[current_var]
+
+ if nc_var.name + "_quality_control_roc" in nc.variables:
+ ncVarOut = nc.variables[nc_var.name + "_quality_control_roc"]
+ else:
+ ncVarOut = nc.createVariable(nc_var.name + "_quality_control_roc", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut.long_name = "quality flag for " + nc_var.name
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ # add new variable to list of aux variables
+ nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
+
+ # Extract the variable data
+ var_data = np.array(nc.variables[current_var][:])
# Calculate dvar/dtime
var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
# For any change greater than change_per_hr, assign a qc value of 4
- nc.variables[current_var+'_quality_control'][[x for x in abs(np.insert(var_roc,0,0)) > change_per_hr]] = 4
+ nc.variables[current_var+'_quality_control_roc'][[x for x in abs(np.insert(var_roc,0,0)) > change_per_hr]] = 4
+
+ nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_roc"][:],nc.variables[current_var + "_quality_control"][:])
print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > change_per_hr])) + ' changes found above '+str(change_per_hr)+' '+nc.variables[current_var].units+' per hour')
@@ -146,6 +163,22 @@ def roc_test(nc,*args,target_vars_in=[]):
# For each variable
for current_var in target_vars:
+ # Extract the variable
+ nc_var = nc.variables[current_var]
+
+ if nc_var.name + "_quality_control_roc" in nc.variables:
+ ncVarOut = nc.variables[nc_var.name + "_quality_control_roc"]
+ else:
+ ncVarOut = nc.createVariable(nc_var.name + "_quality_control_roc", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut.long_name = "quality flag for " + nc_var.name
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ # add new variable to list of aux variables
+ nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
+
+
# Extract the data
var_data = np.array(nc.variables[current_var])
@@ -153,7 +186,9 @@ def roc_test(nc,*args,target_vars_in=[]):
var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
# For any change greater than change_per_hr, assign a qc value of 4
- nc.variables[current_var+'_quality_control'][[x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]]] = 4
+ nc.variables[current_var+'_quality_control_roc'][[x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]]] = 4
+
+ nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_roc"][:],nc.variables[current_var + "_quality_control"][:])
print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]])) + ' changes found above '+str(rate_spec[current_var])+' '+nc.variables[current_var].units+' per hour')
diff --git a/ocean_dp/qc/spike_test.py b/ocean_dp/qc/spike_test.py
index a061c32..622d3a3 100755
--- a/ocean_dp/qc/spike_test.py
+++ b/ocean_dp/qc/spike_test.py
@@ -26,15 +26,19 @@
import pytz
import os
+default_high = 100
+
+default_low = 50
+
# If files aren't specified, take all the IMOS*.nc files in the current folder
-def spike_test_all_files(target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3, flag_high=4):
+def spike_test_all_files(target_vars_in=[], thresh_low=default_low, thresh_high=default_high, flag_low=3, flag_high=4):
target_files = glob.glob('IMOS*.nc')
spike_test_files(target_files, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
-def spike_test_files(target_files, target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3, flag_high=4):
+def spike_test_files(target_files, target_vars_in=[], thresh_low=default_low, thresh_high=default_high, flag_low=3, flag_high=4):
# Loop through each files in target_files
for current_file in target_files:
@@ -48,7 +52,7 @@ def spike_test_files(target_files, target_vars_in=[], thresh_low=10, thresh_high
spike_test(nc=nc, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
-def spike_test(nc, target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3, flag_high=4):
+def spike_test(nc, target_vars_in=[], thresh_low=default_low, thresh_high=default_high, flag_low=3, flag_high=4):
# If target_vars aren't user specified, set it to all the variables of
# the current_file, removing unwanted variables
@@ -72,12 +76,27 @@ def spike_test(nc, target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3,
else:
target_vars = target_vars_in
- # For each variable, extract the data
+ # For each variable
for current_var in target_vars:
- var_data = np.array(nc.variables[current_var])
-
-
+ # Extract the variable
+ nc_var = nc.variables[current_var]
+
+ # Create a test specific qc variable if it doesn't already exist
+ if nc_var.name + "_quality_control_spk" in nc.variables:
+ ncVarOut = nc.variables[nc_var.name + "_quality_control_spk"]
+ else:
+ ncVarOut = nc.createVariable(nc_var.name + "_quality_control_spk", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut.long_name = "quality flag for " + nc_var.name
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ # add new variable to list of aux variables
+ nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_spk"
+
+ # Extract the variable data
+ var_data = np.array(nc.variables[current_var][:])
print('checking '+current_var+' for high spikes')
@@ -96,20 +115,12 @@ def spike_test(nc, target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3,
print('High spike found')
#set corresponding QC value to...
- nc.variables[current_var+'_quality_control'][i] = flag_high
+ nc.variables[current_var+'_quality_control_spk'][i] = flag_high
- # # Extract the qc data
- # current_qc = np.array(nc.variables[current_var+'_quality_control'][:])
-
- # # Find all the instances of consecutive 4s, and reset them to 0
- # for i in np.where(current_qc==4)[0][0:-1][np.diff(np.where(current_qc==4)[0])==1]:
-
- # nc.variables[current_var+'_quality_control'][i:i+2] = 0
# Find the indices where qc isn't set to 4 (high spike), removing the final element as it can't be check for a spike
low_spike_chk_idx = np.where(nc.variables[current_var+'_quality_control'][:]!=4)[0][0:-1]
-
- #print(low_spike_chk_idx)
+
# Remove from the indices those that are either side of a high spike
for i in np.where(nc.variables[current_var+'_quality_control'][:]==4)[0]:
@@ -133,28 +144,15 @@ def spike_test(nc, target_vars_in=[], thresh_low=10, thresh_high=20, flag_low=3,
# Calculate the step changes
shoulder_diff = np.diff(var_data[i-1:i+2])
- #print('shoulder mean is '+str(shoulder_mean))
-
- #abs_diff = abs(var_data[i]-shoulder_mean)
-
- #print('absolute difference is '+str(abs_diff))
-
# Check for spike exceeding low threshold
if (abs(var_data[i]-shoulder_mean) > thresh_low) & (True in (shoulder_diff>0)) & (True in (shoulder_diff<0)): #& (1.25*abs(shoulder_diff[0]) >= abs(x[1]) >= 0.75*abs(shoulder_diff[0])):
print('Low spike found')
#set corresponding QC value to...
- nc.variables[current_var+'_quality_control'][i] = flag_low
-
- # # Extract the qc data
- # current_qc = np.array(nc.variables[current_var+'_quality_control'][:])
-
- # # Find all the instances of consecutive 3s, and reset them to 0
- # for i in np.where(current_qc==3)[0][0:-1][np.diff(np.where(current_qc==3)[0])==1]:
-
- # nc.variables[current_var+'_quality_control'][i:i+2] = 0
-
+ nc.variables[current_var+'_quality_control_spk'][i] = flag_low
+
+ nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_spk"][:],nc.variables[current_var + "_quality_control"][:])
# update the history attribute
try:
diff --git a/ocean_dp/qc/spike_test_ver_2.py b/ocean_dp/qc/spike_test_ver_2.py
deleted file mode 100755
index 68fe69e..0000000
--- a/ocean_dp/qc/spike_test_ver_2.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright (C) 2020 Ben Weeding
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-import re
-from datetime import datetime, timedelta
-from netCDF4 import num2date, date2num
-from netCDF4 import stringtochar
-import numpy.ma as ma
-import sys
-from netCDF4 import Dataset
-import numpy as np
-import argparse
-import glob
-import pytz
-import os
-
-# If files aren't specified, take all the IMOS*.nc files in the current folder
-def spike_test_all_files(target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
- target_files = glob.glob('IMOS*.nc')
-
- spike_test_files(target_files, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
-
-
-def spike_test_files(target_files, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
-
- # Loop through each files in target_files
- for current_file in target_files:
- # Print each filename
- print("input file %s" % current_file)
-
- # Extract netcdf data into nc
- nc = Dataset(current_file, mode="a")
-
- # run the spike test
- spike_test(nc=nc, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
-
-
-
-def spike_test(nc, target_vars_in=[], thresh_low=2, thresh_high=4, flag_low=3, flag_high=4):
-
- # If target_vars aren't user specified, set it to all the variables of
- # the current_file, removing unwanted variables
- if target_vars_in == []:
-
- target_vars = list(nc.variables.keys())
-
- # Remove TIME
- target_vars.remove('TIME')
-
- # Remove any quality_control variables
- qc_vars = [s for s in target_vars if 'quality_control' in s]
- target_vars = [s for s in target_vars if s not in qc_vars]
-
- # Remove any variables of single length
- single_vars = [s for s in target_vars if nc.variables[s].size==1]
- target_vars = [s for s in target_vars if s not in single_vars]
-
- print('target_vars are '+' '.join(target_vars))
-
- else:
- target_vars = target_vars_in
-
- # For each variable, extract the data
- for current_var in target_vars:
-
- var_data = np.array(nc.variables[current_var])
-
- print('checking '+current_var+' for high spikes')
-
- # Step through the data, one element at a time, starting from the 2nd element
- for i in range(1,(len(var_data)-1)):
-
- # Calculate the mean of the i-1 and i+1 elements
- shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
-
- # Calculate the step changes
- shoulder_diff = np.diff(var_data[i-1:i+2])
-
- # Check for spike exceeding high threshold
- if (abs(var_data[i]-shoulder_mean) > thresh_high) & (True in (shoulder_diff>=0)) & (False in (shoulder_diff>=0)):
-
- print('High spike found')
-
- #set corresponding QC value to...
- nc.variables[current_var+'_quality_control'][i] = flag_high
-
- print('checking '+current_var+' for low spikes')
-
- # For each of the remaining indices
- for i in low_spike_chk_idx:
-
- #print('i is '+str(i))
-
- # Calculate the mean of the i-1 and i+1 elements
- shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
-
- # Calculate the step changes
- shoulder_diff = np.diff(var_data[i-1:i+2])
-
- #print('shoulder mean is '+str(shoulder_mean))
-
- abs_diff = abs(var_data[i]-shoulder_mean)
-
- #print('absolute difference is '+str(abs_diff))
-
- # Check for spike exceeding low threshold
- if (abs(var_data[i]-shoulder_mean) > thresh_low) & (True in (shoulder_diff>=0)) & (False in (shoulder_diff>=0)):
-
- print('Low spike found')
-
- #set corresponding QC value to...
- nc.variables[current_var+'_quality_control'][i] = flag_low
-
- # # Extract the qc data
- # current_qc = np.array(nc.variables[current_var+'_quality_control'][:])
-
- # # Find all the instances of consecutive 3s, and reset them to 0
- # for i in np.where(current_qc==3)[0][0:-1][np.diff(np.where(current_qc==3)[0])==1]:
-
- # nc.variables[current_var+'_quality_control'][i:i+2] = 0
-
-
- # update the history attribute
- try:
- hist = nc.history + "\n"
- except AttributeError:
- hist = ""
-
- nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' :spike_test performed on [' + str(target_vars) + '], with spikes greater than '+str(thresh_high)+' flagged as '+str(flag_high)+' and spikes greater than '+str(thresh_low)+' flagged as '+str(flag_low))
-
- nc.close()
-
-if __name__ == "__main__":
- # usage is
- spike_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], thresh_low=float(sys.argv[3]), thresh_high=float(sys.argv[4]), flag_low= float(sys.argv[5]), flag_high= float(sys.argv[6]))
-
-
\ No newline at end of file
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
new file mode 100755
index 0000000..491f3a5
--- /dev/null
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# Initial package import
+import sys
+import glob
+import fnmatch
+import os
+
+# Addition of folder containing user defined packages/modules
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/qc')
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/aggregation')
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/processing')
+
+# Import user defined packages
+import add_qc_flags
+import in_out_water
+import copyDataset
+import pressure_interpolator
+
+import global_range
+import rate_of_change_test
+
+# Set the working directory
+os.chdir('/Users/tru050/Desktop/sofs7.5 test data')
+
+# Make a list of FV00 filenames
+fv00_files = glob.glob('*IMOS_ABOS-SOTS*FV00*.nc')
+
+# Run add_qc_flags.py and collect FV01 filenames
+fv01_files = add_qc_flags.add_qc(fv00_files)
+
+# Run in_out_water.py
+for ncfile in fv01_files:
+
+ in_out_water.in_out_water(ncfile,var_name='TEMP')
+
+# Select pressure files using matching = fnmatch.filter(sofs75filesfv01,'*SOTS*P*_2*.nc')
+pres_files = fnmatch.filter(fv01_files,'*IMOS_ABOS-SOTS*P*_2*FV01*.nc')
+
+# Run copyDataset.py
+copyDataset.aggregate(pres_files,'PRES')
+
+# Run pressure_interpolator.py
+fv01_pres_interp_files = pressure_interpolator.pressure_interpolator(netCDFfiles=fv01_files,agg=glob.glob('*IMOS_ABOS-SOTS*Aggregate*.nc')[0])
+
+# Global range test
+for ncfile in fv01_pres_interp_files:
+
+ print(ncfile)
+
+ global_range.global_range(ncfile,'TEMP',40,-2)
+
+# Rate of change
+rate_of_change_test.roc_test_files(fv01_pres_interp_files,'TEMP',20)
+
+# Spike
+
+
+# Flatline
+
+
From 07b40fdd30982cd558448c4adec61e3cc8f424aa Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 22 Apr 2020 14:03:52 +1000
Subject: [PATCH 36/59] Qc checker and variable selector
---
ocean_dp/qc/qc_checker.py | 116 +++++++++++++++++++++++++
ocean_dp/qc/rate_of_change_test.py | 13 +--
ocean_dp/sots_processing_runthrough.py | 17 +++-
3 files changed, 134 insertions(+), 12 deletions(-)
create mode 100755 ocean_dp/qc/qc_checker.py
diff --git a/ocean_dp/qc/qc_checker.py b/ocean_dp/qc/qc_checker.py
new file mode 100755
index 0000000..5d8720d
--- /dev/null
+++ b/ocean_dp/qc/qc_checker.py
@@ -0,0 +1,116 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+# Code is design to check that netcdf files processed using the SOTS methods
+# conform to the QC labelling designed by Peter Jansen.
+
+def qc_checker_all_files(target_vars_in=[]):
+
+ successful_files=[]
+
+ target_files = glob.glob('IMOS*.nc')
+
+ successful_files = qc_checker_files(target_files, target_vars_in=target_vars_in)
+
+
+def qc_checker_files(target_files,target_vars_in=[]):
+
+ successful_files=[]
+
+ # Loop through each files in target_files
+ for current_file in target_files:
+ # Print each filename
+ print("input file %s" % current_file)
+
+ # Extract netcdf data into nc
+ nc = Dataset(current_file, mode="a")
+
+ # run the spike test - specifying *args here makes python unpack args to be passed again successfully as separate items
+ if qc_checker(nc,target_vars_in=target_vars_in):
+
+ successful_files.append(current_file)
+
+ return successful_files
+
+
+# Enter args as variable name and rate of change limit, ie. 'TEMP',4
+def qc_checker(nc,target_vars_in=[]):
+
+ all_vars = list(nc.variables.keys())
+
+ # If target_vars aren't user specified, set it to all the variables of
+ # the current_file, removing unwanted variables (qc, single length, TIME)
+ if target_vars_in == []:
+
+ target_vars = [s for s in all_vars if 'TIME' not in s and 'quality_control' not in s and nc.variables[s].size!=1]
+
+ print('target_vars are '+' '.join(target_vars))
+
+ else:
+ target_vars = target_vars_in
+
+ qc_behaving = True
+
+ for current_var in target_vars:
+
+ qc_global_data = np.array(nc.variables[current_var+"_quality_control"][:])
+
+ qc_test_specific = [s for s in all_vars if current_var+"_quality_control" in s and not s.endswith('control')]
+
+ for current_qc_test in qc_test_specific:
+
+ qc_test_data = np.array(nc.variables[current_qc_test])
+
+ #print('checking '+current_qc_test)
+
+ # If true, fail process
+ if any(np.less(qc_global_data,qc_test_data)):
+
+ print(current_qc_test + "failed")
+
+ qc_behaving = False
+
+ if qc_behaving:
+
+ return True
+
+
+ # Close the current netcdf file
+ nc.close()
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ocean_dp/qc/rate_of_change_test.py b/ocean_dp/qc/rate_of_change_test.py
index d889b1f..a7f54d1 100755
--- a/ocean_dp/qc/rate_of_change_test.py
+++ b/ocean_dp/qc/rate_of_change_test.py
@@ -79,18 +79,9 @@ def roc_test(nc,*args,target_vars_in=[]):
# the current_file, removing unwanted variables
if target_vars_in == []:
- target_vars = list(nc.variables.keys())
+ all_vars = list(nc.variables.keys())
- # Remove TIME
- target_vars.remove('TIME')
-
- # Remove any quality_control variables
- qc_vars = [s for s in target_vars if 'quality_control' in s]
- target_vars = [s for s in target_vars if s not in qc_vars]
-
- # Remove any variables of single length
- single_vars = [s for s in target_vars if nc.variables[s].size==1]
- target_vars = [s for s in target_vars if s not in single_vars]
+ target_vars = [s for s in all_vars if 'TIME' not in s and 'quality_control' not in s and nc.variables[s].size!=1]
print('target_vars are '+' '.join(target_vars))
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
index 491f3a5..474eb3a 100755
--- a/ocean_dp/sots_processing_runthrough.py
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -18,6 +18,7 @@
import glob
import fnmatch
import os
+import time
# Addition of folder containing user defined packages/modules
sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/qc')
@@ -32,6 +33,11 @@
import global_range
import rate_of_change_test
+import spike_test
+import flatline_test
+import qc_checker
+
+start = time.time()
# Set the working directory
os.chdir('/Users/tru050/Desktop/sofs7.5 test data')
@@ -67,8 +73,17 @@
rate_of_change_test.roc_test_files(fv01_pres_interp_files,'TEMP',20)
# Spike
-
+spike_test.spike_test_files(fv01_pres_interp_files,target_vars_in=['TEMP'])
# Flatline
+flatline_test.flatline_test_files(fv01_pres_interp_files,['TEMP'])
+
+# Check qc process has worked
+fv01_qc_checked = qc_checker.qc_checker_files(fv01_pres_interp_files,['TEMP'])
+
+end = time.time()
+
+print('time elapsed: '+end-start)
+
From a6bb5603206ca99784be304a4e20d676dd64e7fc Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 22 Apr 2020 14:23:55 +1000
Subject: [PATCH 37/59] commented qc checker
---
ocean_dp/qc/qc_checker.py | 28 +++++++++++++++++---------
ocean_dp/sots_processing_runthrough.py | 2 +-
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/ocean_dp/qc/qc_checker.py b/ocean_dp/qc/qc_checker.py
index 5d8720d..e229d72 100755
--- a/ocean_dp/qc/qc_checker.py
+++ b/ocean_dp/qc/qc_checker.py
@@ -36,6 +36,7 @@ def qc_checker_all_files(target_vars_in=[]):
target_files = glob.glob('IMOS*.nc')
+ # Returns the files that conform to the qc labelling
successful_files = qc_checker_files(target_files, target_vars_in=target_vars_in)
@@ -51,17 +52,19 @@ def qc_checker_files(target_files,target_vars_in=[]):
# Extract netcdf data into nc
nc = Dataset(current_file, mode="a")
- # run the spike test - specifying *args here makes python unpack args to be passed again successfully as separate items
+ # If the qc_checker was successfull, add the filename to the list
if qc_checker(nc,target_vars_in=target_vars_in):
successful_files.append(current_file)
-
+
+ # Returns the files that conform to the qc labelling
return successful_files
# Enter args as variable name and rate of change limit, ie. 'TEMP',4
def qc_checker(nc,target_vars_in=[]):
+ # Collect all the variables from the netcdf
all_vars = list(nc.variables.keys())
# If target_vars aren't user specified, set it to all the variables of
@@ -74,31 +77,38 @@ def qc_checker(nc,target_vars_in=[]):
else:
target_vars = target_vars_in
-
- qc_behaving = True
-
+
+ # For each of the variables selected
for current_var in target_vars:
+ # Collect the global qc data
qc_global_data = np.array(nc.variables[current_var+"_quality_control"][:])
+ # Collect the names of all the other test specific qc vectors
qc_test_specific = [s for s in all_vars if current_var+"_quality_control" in s and not s.endswith('control')]
+ # For each of the other test specific qc vectors
for current_qc_test in qc_test_specific:
+ # Extract the data
qc_test_data = np.array(nc.variables[current_qc_test])
#print('checking '+current_qc_test)
- # If true, fail process
+ # If any of the test specific qc vectors ever have a great value than the global qc vector at a timestamp, the qc process has failed
if any(np.less(qc_global_data,qc_test_data)):
print(current_qc_test + "failed")
qc_behaving = False
- if qc_behaving:
-
- return True
+ else:
+
+ # The qc process has succeeded
+ qc_behaving = True
+
+ # Returns true if qc has succeeded, false if not
+ return qc_behaving
# Close the current netcdf file
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
index 474eb3a..6c8bd90 100755
--- a/ocean_dp/sots_processing_runthrough.py
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -83,7 +83,7 @@
end = time.time()
-print('time elapsed: '+end-start)
+print('time elapsed: '+str(end-start))
From f97718bcd8264cf0450c8bd19d8cfe85c5b7af74 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Mon, 27 Apr 2020 17:00:20 +1000
Subject: [PATCH 38/59] Changed tests to update netcdfs correctly when
reprocessing data
---
ocean_dp/plotting/plotNetCDFmultiqc.py | 340 +++++++++++++++++++++++++
ocean_dp/qc/flatline_test.py | 145 ++++++-----
ocean_dp/qc/global_range.py | 3 +-
ocean_dp/qc/qc_checker.py | 6 +-
ocean_dp/qc/rate_of_change_test.py | 57 ++++-
ocean_dp/qc/spike_test.py | 8 +-
ocean_dp/sots_processing_runthrough.py | 6 +-
7 files changed, 479 insertions(+), 86 deletions(-)
create mode 100755 ocean_dp/plotting/plotNetCDFmultiqc.py
diff --git a/ocean_dp/plotting/plotNetCDFmultiqc.py b/ocean_dp/plotting/plotNetCDFmultiqc.py
new file mode 100755
index 0000000..9442a56
--- /dev/null
+++ b/ocean_dp/plotting/plotNetCDFmultiqc.py
@@ -0,0 +1,340 @@
+#!/usr/bin/python3
+
+# raw2netCDF
+# Copyright (C) 2019 Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from netCDF4 import Dataset
+from netCDF4 import num2date
+import datetime as dt
+import numpy as np
+import matplotlib
+
+matplotlib.use('Agg')
+
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+import sys
+import os
+from matplotlib import rc
+
+# rc('text', usetex=True)
+
+for path_file in sys.argv[1:len(sys.argv)]:
+
+ nc = Dataset(path_file)
+
+ # get time variable
+ nctime = nc.variables['TIME'][:]
+ t_unit = nc.variables['TIME'].units # get unit "days since 1950-01-01T00:00:00Z"
+
+ try:
+ t_cal = nc.variables['TIME'].calendar
+ except AttributeError: # Attribute doesn't exist
+ t_cal = u"gregorian" # or standard
+
+ dt_time = [num2date(t, units=t_unit, calendar=t_cal) for t in nctime]
+
+ # work out variables to plot
+ nc_vars_to_plot = [var for var in nc.variables]
+
+ # remove any dimensions from the list to plot
+ nc_dims = [dim for dim in nc.dimensions] # list of nc dimensions
+
+ for i in nc_dims:
+ try:
+ nc_vars_to_plot.remove(i)
+ except ValueError:
+ print('did not remove ', i)
+
+ # remove an auxiliary variables from the list to plot
+ aux_vars = list()
+ for var in nc.variables:
+ try:
+ aux_vars.append(nc.variables[var].getncattr('ancillary_variables'))
+ except AttributeError:
+ pass
+
+ # remove any variables without a TIME dimension from the list to plot
+ to_plot = list()
+
+ for var in nc.variables:
+ # print var
+ if var in nc_dims:
+ continue
+ if var in aux_vars:
+ continue
+ if 'TIME' in nc.variables[var].dimensions:
+ print('to plot ', var)
+ to_plot.append(var)
+
+ # pdffile = path_file[path_file.rfind('/')+1:len(path_file)] + '-' + nc.getncattr('deployment_code') + '-plot.pdf'
+
+ pdffile = path_file + '.pdf'
+
+ pp = PdfPages(pdffile)
+
+ txt = ""
+ lines = 0
+ plt.figure(figsize=(11.69, 8.27))
+
+ txt = 'file name : ' + os.path.basename(path_file) + '\n\n'
+
+ txt += 'Dimensions:\n'
+ for x in nc.dimensions:
+ txt += ' ' + x + ' (' + str(nc.dimensions[x].size) + ')\n'
+
+ txt += '\nVariables:\n'
+ for x in nc.variables:
+ v_atts = nc.variables[x]
+ var_line = ' ' + x + ' ' + str(v_atts.dimensions)
+
+ try:
+ var_line += ' : long_name = ' + v_atts.long_name
+ except AttributeError:
+ pass
+ try:
+ var_line += ' (' + v_atts.units + ')'
+ except AttributeError:
+ pass
+ var_line += ' : type ' + str(v_atts.datatype)
+
+ print(var_line)
+
+ lines = txt.count('\n') + var_line.count('\n')
+ # print("lines ", lines)
+ if lines > 57:
+ #print(txt)
+ print('new page')
+ plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+ plt.axis('off')
+ pp.savefig()
+ plt.close()
+ plt.figure(figsize=(11.69, 8.27))
+
+ txt = ""
+
+ lines = 0
+
+ txt += var_line + '\n'
+
+ plt.figure(figsize=(11.69, 8.27))
+
+ plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+ plt.axis('off')
+ pp.savefig()
+ plt.close()
+
+ txt = ""
+ plt.figure(figsize=(11.69, 8.27))
+
+ lines = 0
+ # print "NetCDF Global Attributes:"
+ for nc_attr in sorted(nc.ncattrs(), key=lambda s: s.lower()):
+ #print('\t%s:' % nc_attr, repr(nc.getncattr(nc_attr)))
+ attrib_txt = nc_attr + ' : ' + str(nc.getncattr(nc_attr)).replace('\n', '\n ') + '\n'
+ lines = txt.count('\n') + attrib_txt.count('\n')
+ # print("lines ", lines)
+ if lines > 57:
+ #print(txt)
+ print('new page')
+ plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+ plt.axis('off')
+ pp.savefig()
+ plt.close()
+ plt.figure(figsize=(11.69, 8.27))
+
+ txt = ""
+
+ lines = 0
+
+ txt += attrib_txt
+
+ lines += 1
+
+ #print(txt)
+ plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+ plt.axis('off')
+ pp.savefig()
+ plt.close()
+
+ # plot each variable in the to_plot list
+ for plot in to_plot:
+
+ plot_var = nc.variables[plot]
+
+ var = plot_var[:]
+ shape_len = len(var.shape)
+
+ # create a page with information about the variable, and any aux variables
+ fig = plt.figure(figsize=(11.69, 8.27))
+
+ text = "Variable : " + plot_var.name + str(plot_var.dimensions) + "\n"
+ nc_attrs = plot_var.ncattrs()
+ # print "NetCDF Variable Attributes:"
+ for nc_attr in nc_attrs:
+ attrVal = plot_var.getncattr(nc_attr)
+ #print('\t%s:' % nc_attr, repr(plot_var.getncattr(nc_attr)), type(attrVal))
+ text += nc_attr + ' : ' + str(attrVal) + '\n'
+
+ if hasattr(plot_var, 'ancillary_variables'):
+ qc_var_name = plot_var.getncattr('ancillary_variables')
+ qc_var = nc.variables[qc_var_name]
+
+ text += "\nAUX : " + qc_var.name + str(qc_var.dimensions) + "\n"
+
+ nc_attrs = qc_var.ncattrs()
+ # print "NetCDF AUX Variable Attributes:"
+ for nc_attr in nc_attrs:
+ # print '\t%s:' % nc_attr, repr(nc.getncattr(nc_attr))
+ text += nc_attr + ' : ' + str(qc_var.getncattr(nc_attr)) + '\n'
+
+ qc = nc.variables[qc_var_name][:]
+
+ if plot_var.dimensions[0] != 'TIME':
+ qc = np.transpose(qc)
+
+ qc = np.squeeze(qc)
+ else:
+ qc = 0
+
+ plt.text(-0.1, 0.0, text, fontsize=8, family='monospace')
+ plt.axis('off')
+ pp.savefig(fig)
+ plt.close(fig)
+
+ print(plot_var.name, " shape ", var.shape, " len ", shape_len)
+
+ # now create a page with the plot
+
+ fig = plt.figure(figsize=(11.69, 8.27))
+ ax = plt.subplot(111)
+
+ if plot_var.dimensions[0] != 'TIME':
+ var = np.transpose(var)
+ var = np.squeeze(var)
+
+ # create range from only good data
+ qc_m = np.ma.masked_where((qc == 9) | (qc == 4) | (qc == 6), var)
+ mx = qc_m.max()
+ mi = qc_m.min()
+
+ marg = (mx - mi) * 0.1
+ print("max ", mx, " min ", mi)
+
+ plt.ylim([mi - marg, mx + marg])
+
+ # create a legend entry made from serial_number and depth
+ if hasattr(plot_var, 'sensor_serial_number'):
+ sn = plot_var.getncattr('sensor_serial_number').split('; ')
+ elif hasattr(plot_var, 'sensor_serial_number'):
+ sn = nc.getncattr('instrument_serial_number').split('; ')
+ else:
+ sn = 'not found'
+
+ if hasattr(plot_var, 'sensor_depth'):
+ dpth = plot_var.getncattr('sensor_depth').split('; ')
+ elif hasattr(plot_var, 'sensor_height'):
+ dpth = plot_var.getncattr('sensor_height').split('; ')
+ elif hasattr(nc, 'instrument_nominal_depth'):
+ dpth = str(nc.getncattr('instrument_nominal_depth')).split('; ')
+ else:
+ dpth = 'unknown'
+
+ print("depth ", dpth)
+
+ leg = [x + ' (' + y + ' m)' for x, y in zip(sn, dpth)]
+
+ # if less than 200 points plot with a dot and line
+ plot_marks = '-'
+ if len(dt_time) < 200:
+ plot_marks = '.-'
+
+ pl = ax.plot(dt_time, qc_m, plot_marks)
+
+ # mark qc>2 with yellow dot, qc>3 with red dot
+ qc_m = np.ma.masked_where((qc <= 2) | (qc == 8), var)
+ ax.plot(dt_time, qc_m, 'yo')
+ qc_m = np.ma.masked_where((qc <= 3) | (qc == 8), var)
+ ax.plot(dt_time, qc_m, 'ro')
+
+ # shrink the plot some
+ box = ax.get_position()
+ ax.set_position([box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9])
+
+ # add legend below plot
+ #plt.legend(iter(pl), leg, loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=5)
+
+ plt.legend(iter(pl), leg, bbox_to_anchor=(0.0, -0.2, 1.0, -0.15), loc=3, ncol=6, mode="expand", borderaxespad=0.0, fontsize='x-small')
+
+ # invert the yaxis if the units are dbar
+ try:
+ if plot_var.units == 'dbar':
+ plt.gca().invert_yaxis()
+ except AttributeError:
+ pass
+ try:
+ if plot_var.positive == 'down':
+ plt.gca().invert_yaxis()
+ except AttributeError:
+ pass
+
+ #fig.autofmt_xdate()
+ plt.grid()
+
+ # add deployment/instrument/standard name as title
+
+ # plt.title(nc.getncattr('deployment_code') + ' : ' + plot_var.sensor_name + ' ' + \
+ # plot_var.sensor_serial_number + ' : ' + plot_var.name, fontsize=10)
+
+ # plt.title(nc.getncattr('deployment_code') + ' : ' + plot_var.getncattr('name'), fontsize=10)
+ try:
+ plt.title(nc.getncattr('deployment_code'), fontsize=10)
+ except AttributeError:
+ pass
+
+ # add units to Y axis
+ try:
+ plt.ylabel(plot + ' (' + plot_var.units + ')')
+ except AttributeError:
+ pass
+
+ date_time_start = None
+ date_time_end = None
+
+ # plot only the time of deployment
+ try:
+ date_time_start = dt.datetime.strptime(nc.getncattr('time_coverage_start'), '%Y-%m-%dT%H:%M:%SZ')
+ date_time_end = dt.datetime.strptime(nc.getncattr('time_coverage_end'), '%Y-%m-%dT%H:%M:%SZ')
+ except AttributeError:
+ pass
+ try:
+ date_time_start = dt.datetime.strptime(nc.getncattr('time_deployment_start'), '%Y-%m-%dT%H:%M:%SZ')
+ date_time_end = dt.datetime.strptime(nc.getncattr('time_deployment_end'), '%Y-%m-%dT%H:%M:%SZ')
+ except AttributeError:
+ pass
+
+ if date_time_start:
+ plt.xlim(date_time_start, date_time_end)
+
+ # plt.savefig(plot + '.pdf')
+ pp.savefig(fig, papertype='a4')
+ plt.close(fig)
+
+ # plt.show()
+
+ pp.close()
+
+ nc.close()
diff --git a/ocean_dp/qc/flatline_test.py b/ocean_dp/qc/flatline_test.py
index a9e7d23..79ab981 100755
--- a/ocean_dp/qc/flatline_test.py
+++ b/ocean_dp/qc/flatline_test.py
@@ -28,13 +28,13 @@
# If files aren't specified, take all the IMOS*.nc files in the current folder
-def flatline_test_all_files(target_vars_in=[], window=3, flag=4):
+def flatline_test_all_files(target_vars_in=[], window=5, flag=4):
target_files = glob.glob('IMOS*.nc')
flatline_test_files(target_files, target_vars_in=target_vars_in, window=window, flag=flag)
-def flatline_test_files(target_files, target_vars_in=[], window=3, flag=4):
+def flatline_test_files(target_files, target_vars_in=[], window=5, flag=4):
# Loop through each files in target_files
for current_file in target_files:
@@ -48,74 +48,87 @@ def flatline_test_files(target_files, target_vars_in=[], window=3, flag=4):
flatline_test(nc=nc, target_vars_in=target_vars_in, window=window, flag=flag)
-def flatline_test(nc, target_vars_in=[], window=3, flag=4):
+def flatline_test(nc, target_vars_in=[], window=5, flag=4):
- # If target_vars aren't user specified, set it to all the variables of
- # the current_file, removing unwanted variables
- if target_vars_in == []:
-
- target_vars = list(nc.variables.keys())
-
- # Remove TIME
- target_vars.remove('TIME')
-
- # Remove any quality_control variables
- qc_vars = [s for s in target_vars if 'quality_control' in s]
- target_vars = [s for s in target_vars if s not in qc_vars]
-
- # Remove any variables of single length
- single_vars = [s for s in target_vars if nc.variables[s].size==1]
- target_vars = [s for s in target_vars if s not in single_vars]
-
- print('target_vars are '+' '.join(target_vars))
-
- else:
- target_vars = target_vars_in
-
- # For each variable, extract the data
- for current_var in target_vars:
-
- # Extract the variable
- nc_var = nc.variables[current_var]
-
- if nc_var.name + "_quality_control_flt" in nc.variables:
- ncVarOut = nc.variables[nc_var.name + "_quality_control_flt"]
- else:
- ncVarOut = nc.createVariable(nc_var.name + "_quality_control_flt", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
- ncVarOut[:] = np.zeros(nc_var.shape)
- ncVarOut.long_name = "quality flag for " + nc_var.name
- ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
- ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+ print('Window is '+str(window))
+
+ # If target_vars aren't user specified, set it to all the variables of
+ # the current_file, removing unwanted variables
+ if target_vars_in == []:
+
+ target_vars = list(nc.variables.keys())
+
+ # Remove TIME
+ target_vars.remove('TIME')
+
+ # Remove any quality_control variables
+ qc_vars = [s for s in target_vars if 'quality_control' in s]
+ target_vars = [s for s in target_vars if s not in qc_vars]
+
+ # Remove any variables of single length
+ single_vars = [s for s in target_vars if nc.variables[s].size==1]
+ target_vars = [s for s in target_vars if s not in single_vars]
+
+ print('target_vars are '+' '.join(target_vars))
+ else:
+ target_vars = target_vars_in
+
+ # For each variable, extract the data
+ for current_var in target_vars:
+
+ # Extract the variable
+ nc_var = nc.variables[current_var]
+
+ if nc_var.name + "_quality_control_flt" in nc.variables:
+ print('flt qc variable already present')
+ ncVarOut = nc.variables[nc_var.name + "_quality_control_flt"]
+ ncVarOut[:] = 0
+ else:
+ ncVarOut = nc.createVariable(nc_var.name + "_quality_control_flt", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = 0
+ # print(all(nc.variables[nc_var.name + "_quality_control_flt"]==0))
+ ncVarOut.long_name = "quality flag for " + nc_var.name
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
# add new variable to list of aux variables
nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_flt"
-
- var_data = np.array(nc.variables[current_var])
-
- print('checking '+current_var)
-
- # Step through the data, one element at a time, using the window
- for i in range(0,(len(var_data)-window+1)):
-
- # This is true if 'window' elements in a row are equal
- if len(set(var_data[i:(i+window)])) == 1:
-
- # set corresponding QC value to...
- nc.variables[current_var+'_quality_control_flt'][i:(i+window)] = flag
-
- nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_flt"][:],nc.variables[current_var + "_quality_control"][:])
-
- # update the history attribute
- try:
- hist = nc.history + "\n"
- except AttributeError:
- hist = ""
-
-
-
- nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + 'flatline_test performed on [' + str(target_vars) + '], window '+str(window)+' consecutive values or more were flagged with '+str(flag) )
-
- nc.close()
+
+ var_data = np.array(nc.variables[current_var][:])
+
+ if (all(nc.variables[nc_var.name + "_quality_control_flt"][:] == 0)):
+ print('All test specific qc values are zero before filling')
+
+ print('checking ' + current_var)
+
+ print('Window is ' + str(window))
+
+ # Step through the data, one element at a time, using the window
+ for i in range(0, (len(var_data) - window + 1)):
+
+ # This is true if 'window' elements in a row are equal
+ if len(set(var_data[i:(i + window)])) == 1:
+ print(str(i))
+ # set corresponding QC value to...
+ ncVarOut[i:(i + window)] = flag
+
+ points_marked = len([elem for elem in ncVarOut[:] if elem == 4])
+ print('Data points flagged: ', points_marked)
+
+ qc_var = nc.variables[current_var + "_quality_control"]
+ qc_var[:] = np.maximum(ncVarOut[:], qc_var[:])
+ # update the history attribute
+ try:
+ hist = nc.history + "\n"
+ except AttributeError:
+ hist = ""
+
+
+
+ nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + 'flatline_test performed on [' + str(target_vars) + '], window '+str(window)+' consecutive values or more were flagged with '+str(flag) )
+
+ nc.close()
if __name__ == "__main__":
# usage is
diff --git a/ocean_dp/qc/global_range.py b/ocean_dp/qc/global_range.py
index 68e2ee0..db3b80b 100644
--- a/ocean_dp/qc/global_range.py
+++ b/ocean_dp/qc/global_range.py
@@ -57,9 +57,10 @@ def global_range(netCDFfile, variable, max, min, qc_value=4):
# create a qc variable just for this test flags
if nc_var.name + "_quality_control_gr" in ds.variables:
ncVarOut = ds.variables[nc_var.name + "_quality_control_gr"]
+ ncVarOut[:] = 0
else:
ncVarOut = ds.createVariable(nc_var.name + "_quality_control_gr", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
- ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut[:] = 0
ncVarOut.long_name = "quality flag for " + nc_var.name
ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
diff --git a/ocean_dp/qc/qc_checker.py b/ocean_dp/qc/qc_checker.py
index e229d72..90e83e8 100755
--- a/ocean_dp/qc/qc_checker.py
+++ b/ocean_dp/qc/qc_checker.py
@@ -115,7 +115,11 @@ def qc_checker(nc,target_vars_in=[]):
nc.close()
-
+# def qc_check_plot(target_file,target_var)
+
+# nc = Dataset(target_file,'r')
+
+
diff --git a/ocean_dp/qc/rate_of_change_test.py b/ocean_dp/qc/rate_of_change_test.py
index a7f54d1..84f8e4f 100755
--- a/ocean_dp/qc/rate_of_change_test.py
+++ b/ocean_dp/qc/rate_of_change_test.py
@@ -102,26 +102,38 @@ def roc_test(nc,*args,target_vars_in=[]):
if nc_var.name + "_quality_control_roc" in nc.variables:
ncVarOut = nc.variables[nc_var.name + "_quality_control_roc"]
+ ncVarOut[:] = np.zeros(nc.variables[nc_var.name + "_quality_control_roc"].shape)
else:
ncVarOut = nc.createVariable(nc_var.name + "_quality_control_roc", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
- ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut[:] = 0
ncVarOut.long_name = "quality flag for " + nc_var.name
ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
- # add new variable to list of aux variables
- nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
+ # add new variable to list of aux variables
+ nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
# Extract the variable data
var_data = np.array(nc.variables[current_var][:])
+ print('Not equal to zero test type 1')
+ print(str(np.where(ncVarOut[:]!=0)))
+
+ if (all(ncVarOut[:] == 0)):
+ print('All test specific qc values are zero before filling')
+
# Calculate dvar/dtime
var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
# For any change greater than change_per_hr, assign a qc value of 4
- nc.variables[current_var+'_quality_control_roc'][[x for x in abs(np.insert(var_roc,0,0)) > change_per_hr]] = 4
+ ncVarOut[[x for x in abs(np.insert(var_roc,0,0)) > change_per_hr]] = 4
+
+ # Extract global qc
+ qc_var = nc.variables[current_var + "_quality_control"]
+
+ # Overwrite global qc with any higher values from test specific qc
+ qc_var[:] = np.maximum(ncVarOut[:], qc_var[:])
- nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_roc"][:],nc.variables[current_var + "_quality_control"][:])
print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > change_per_hr])) + ' changes found above '+str(change_per_hr)+' '+nc.variables[current_var].units+' per hour')
@@ -159,27 +171,50 @@ def roc_test(nc,*args,target_vars_in=[]):
if nc_var.name + "_quality_control_roc" in nc.variables:
ncVarOut = nc.variables[nc_var.name + "_quality_control_roc"]
+ ncVarOut[:] = np.zeros(nc.variables[nc_var.name + "_quality_control_roc"].shape)
else:
ncVarOut = nc.createVariable(nc_var.name + "_quality_control_roc", "i1", nc_var.dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
- ncVarOut[:] = np.zeros(nc_var.shape)
+ ncVarOut[:] = 0
ncVarOut.long_name = "quality flag for " + nc_var.name
ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
- # add new variable to list of aux variables
- nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
-
+ # add new variable to list of aux variables
+ nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
+
# Extract the data
var_data = np.array(nc.variables[current_var])
+ print('Not equal to zero test type 2')
+ print(str(np.where(ncVarOut[:]!=0)))
+
+ if (all(ncVarOut[:] == 0)):
+ print('All test specific qc values are zero before filling')
+
# Calculate dvar/dtime
var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
# For any change greater than change_per_hr, assign a qc value of 4
- nc.variables[current_var+'_quality_control_roc'][[x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]]] = 4
+ ncVarOut[[x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]]] = 4
+
+ print('ncVarOut 4s')
+ print(str(np.where(ncVarOut[:]==4)))
+
+ print('Netcdf variable 4s')
+ print(str(np.where(nc.variables[nc_var.name + "_quality_control_roc"][:]==4)))
+
+ nc.variables[nc_var.name + "_quality_control_roc"][:] = ncVarOut[:]
+
+ print('Netcdf variable 4s after assignment')
+ print(str(np.where(nc.variables[nc_var.name + "_quality_control_roc"][:]==4)))
+
+
+ # Extract global qc
+ qc_var = nc.variables[current_var + "_quality_control"]
- nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_roc"][:],nc.variables[current_var + "_quality_control"][:])
+ # Overwrite global qc with any higher values from test specific qc
+ qc_var[:] = np.maximum(ncVarOut[:], qc_var[:])
print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]])) + ' changes found above '+str(rate_spec[current_var])+' '+nc.variables[current_var].units+' per hour')
diff --git a/ocean_dp/qc/spike_test.py b/ocean_dp/qc/spike_test.py
index 622d3a3..089cf63 100755
--- a/ocean_dp/qc/spike_test.py
+++ b/ocean_dp/qc/spike_test.py
@@ -115,15 +115,15 @@ def spike_test(nc, target_vars_in=[], thresh_low=default_low, thresh_high=defaul
print('High spike found')
#set corresponding QC value to...
- nc.variables[current_var+'_quality_control_spk'][i] = flag_high
+ ncVarOut[i] = flag_high
# Find the indices where qc isn't set to 4 (high spike), removing the final element as it can't be check for a spike
- low_spike_chk_idx = np.where(nc.variables[current_var+'_quality_control'][:]!=4)[0][0:-1]
+ low_spike_chk_idx = np.where(ncVarOut[:]!=4)[0][0:-1]
# Remove from the indices those that are either side of a high spike
- for i in np.where(nc.variables[current_var+'_quality_control'][:]==4)[0]:
+ for i in ncVarOut[:]==4:
low_spike_chk_idx=low_spike_chk_idx[low_spike_chk_idx!=[i-1]]
@@ -150,7 +150,7 @@ def spike_test(nc, target_vars_in=[], thresh_low=default_low, thresh_high=defaul
print('Low spike found')
#set corresponding QC value to...
- nc.variables[current_var+'_quality_control_spk'][i] = flag_low
+ ncVarOut[i] = flag_low
nc.variables[current_var + "_quality_control"][:] = np.maximum(nc.variables[current_var + "_quality_control_spk"][:],nc.variables[current_var + "_quality_control"][:])
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
index 6c8bd90..75fe6f4 100755
--- a/ocean_dp/sots_processing_runthrough.py
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -40,7 +40,7 @@
start = time.time()
# Set the working directory
-os.chdir('/Users/tru050/Desktop/sofs7.5 test data')
+os.chdir('/Users/tru050/Desktop/sofs6 test data')
# Make a list of FV00 filenames
fv00_files = glob.glob('*IMOS_ABOS-SOTS*FV00*.nc')
@@ -70,13 +70,13 @@
global_range.global_range(ncfile,'TEMP',40,-2)
# Rate of change
-rate_of_change_test.roc_test_files(fv01_pres_interp_files,'TEMP',20)
+rate_of_change_test.roc_test_files(fv01_pres_interp_files,'TEMP',10)
# Spike
spike_test.spike_test_files(fv01_pres_interp_files,target_vars_in=['TEMP'])
# Flatline
-flatline_test.flatline_test_files(fv01_pres_interp_files,['TEMP'])
+flatline_test.flatline_test_files(fv01_pres_interp_files,['TEMP'],window=10)
# Check qc process has worked
fv01_qc_checked = qc_checker.qc_checker_files(fv01_pres_interp_files,['TEMP'])
From 43dd011c0a4b308aecc79c344b47a96c75414153 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 19 May 2020 17:17:28 +1000
Subject: [PATCH 39/59] updating petes plotQC, building temp_stat_plot
---
ocean_dp/plotting/plotQC.py | 88 ++++++++++++++++++++
ocean_dp/qc/temp_diff_hist_extra.py | 16 ++++
ocean_dp/qc/temp_diff_histograms.py | 4 +-
ocean_dp/qc/temp_stat_plot.py | 107 +++++++++++++++++++++++++
ocean_dp/sots_processing_runthrough.py | 2 +-
5 files changed, 214 insertions(+), 3 deletions(-)
create mode 100755 ocean_dp/plotting/plotQC.py
create mode 100755 ocean_dp/qc/temp_stat_plot.py
diff --git a/ocean_dp/plotting/plotQC.py b/ocean_dp/plotting/plotQC.py
new file mode 100755
index 0000000..3060759
--- /dev/null
+++ b/ocean_dp/plotting/plotQC.py
@@ -0,0 +1,88 @@
+#!/usr/bin/python3
+
+# raw2netCDF
+# Copyright (C) 2019 Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import xarray as xr
+
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+import seaborn as sns
+
+from pandas.plotting import register_matplotlib_converters
+
+import sys
+import os
+
+
+def plot_all(files):
+ register_matplotlib_converters()
+ plt.style.use('seaborn-darkgrid')
+ sns.set_context("paper")
+
+ pp = PdfPages(os.path.join(os.path.dirname(files[0]), "batch-qc.pdf"))
+
+ for f in files:
+ print("file ", f)
+ do_plot(f)
+
+ pp.savefig()
+ plt.close()
+
+ pp.close()
+
+
+def plot(fn):
+ register_matplotlib_converters()
+ plt.style.use('seaborn-darkgrid')
+ sns.set_context("paper")
+
+ pp = PdfPages(fn + "-qc.pdf")
+
+ do_plot(fn)
+
+ pp.savefig()
+ pp.close()
+ plt.close()
+
+
+def do_plot(fn):
+
+ #fn = sys.argv[1]
+ #fn = '/Users/pete/cloudstor/SOTS-Temp-Raw-Data/SOFS-7.5-2018/netCDF/IMOS_ABOS-SOTS_TIP_20180801_SOFS_FV01_SOFS-7.5-2018-Starmon-mini-4047-40m_END-20190331_C-20200429.nc'
+
+ DS = xr.open_dataset(fn)
+
+ ax1 = plt.subplot(2, 1, 1)
+ plt.plot(DS.TIME, DS.PAR)
+ plt.title(DS.deployment_code + " - " + DS.instrument_model + ":" + DS.instrument_serial_number + " @ " + str(DS.instrument_nominal_depth), {'fontsize': 8})
+
+ ax2 = plt.subplot(2, 1, 2, sharex=ax1)
+ aux = DS.PAR.ancillary_variables
+ a_vars = aux.split(" ")
+ for f in sorted(set(a_vars)):
+ print('aux var', f)
+ varn = f.split("_")
+ plt.plot(DS.TIME, DS.variables[f], label=varn[-1])
+ plt.ylim(0, 9)
+
+ plt.legend(prop={'size': 6})
+
+ DS.close()
+
+
+if __name__ == "__main__":
+ plot_all(sys.argv[1:])
\ No newline at end of file
diff --git a/ocean_dp/qc/temp_diff_hist_extra.py b/ocean_dp/qc/temp_diff_hist_extra.py
index d8b6019..14dc6cf 100755
--- a/ocean_dp/qc/temp_diff_hist_extra.py
+++ b/ocean_dp/qc/temp_diff_hist_extra.py
@@ -13,6 +13,22 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+import glob
+from netCDF4 import num2date
+from dateutil import parser
+import datetime
+
for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
for fname in files:
diff --git a/ocean_dp/qc/temp_diff_histograms.py b/ocean_dp/qc/temp_diff_histograms.py
index 2003afc..a3f8420 100755
--- a/ocean_dp/qc/temp_diff_histograms.py
+++ b/ocean_dp/qc/temp_diff_histograms.py
@@ -130,7 +130,7 @@ def last_four(entry):
deployments.sort(key=last_four)
-
+#######
@@ -207,7 +207,7 @@ def last_four(entry):
for plt_idx,dep_name in zip(range(0,len(deployments)),deployments):
- print('plotting '+ str(len(all_deployment_dtemp_dtime[plt_idx])) + ' values')
+ print('plotting '+ str((all_deployment_dtemp_dtime[plt_idx])) + ' values')
hist_data = ax[plt_idx].hist(all_deployment_dtemp_dtime[plt_idx],21,log=True)
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
new file mode 100755
index 0000000..752ebe5
--- /dev/null
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -0,0 +1,107 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+import pandas as pd
+
+# creates an empty array to store the names of the SOTS deployments
+deployments = []
+
+# loops through all the folders and files contained in the folder
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ # if the folder/file name contains 'Pulse' or 'SOFS' and doesn't contain '.', append it to deployments
+ if (('Pulse' in x) or ('SOFS' in x)) and ('.' not in x):
+
+ deployments.append(x)
+
+# create a dataframe to store extract information
+temp_ensemble = pd.DataFrame(columns = ["Temp rate of change","QC","Nominal depth","Deployment"])
+
+# loops through all files in the directory
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ # for each netcdf file labelled as FV01
+ if fname.endswith('.nc') and 'FV01' in fname:
+
+ # print the filename
+ print(fname)
+
+ # open the file
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ # check that the in_out_water test has been run on the file
+ if 'TEMP_quality_control_io' in list(nc.variables):
+
+ # check that the file has a single dimension temperature vector, and that the time format is correct
+ if np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ # calculate temperature changes for in water data
+ nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+
+ # extract the time data
+ nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes in hours
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # calculate the rate of change of temperature wrt time (degrees °C per hour)
+ nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+
+
+
+ # extract temp_qc data
+ nc_temp_qc = np.array(nc.variables['TEMP_quality_control'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+
+ # calculate qc values for each nc_dtemp_dtime by taking the maximum of the qc values of the two contributing temps
+ nc_dtemp_dtime_qc = pd.Series(nc_temp_qc).rolling(2).max().dropna().to_numpy()
+
+
+ # extract sensor nominal depth
+ nc_nom_depth = np.array(nc.variables['NOMINAL_DEPTH'])
+
+
+ # extract deployment name
+ nc_deployment = nc.deployment_code
+
+ # Next step: append all this information to temp ensemble!
+
+ nc.close()
+
+
+
+ # pd.Series(lst).rolling(5).max().dropna().to_numpy()
+
+
+
+
+
\ No newline at end of file
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
index 75fe6f4..c939838 100755
--- a/ocean_dp/sots_processing_runthrough.py
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -40,7 +40,7 @@
start = time.time()
# Set the working directory
-os.chdir('/Users/tru050/Desktop/sofs6 test data')
+#os.chdir('/Users/tru050/Desktop/sofs6 test data')
# Make a list of FV00 filenames
fv00_files = glob.glob('*IMOS_ABOS-SOTS*FV00*.nc')
From 4a9590f5c8dfd95ccd2ad9d67baccdea4a097556 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 20 May 2020 10:15:26 +1000
Subject: [PATCH 40/59] Update temp_stat_plot.py
Code loads and processes SOTS data for std calc
---
ocean_dp/qc/temp_stat_plot.py | 189 ++++++++++++++++++++++++++++++++--
1 file changed, 178 insertions(+), 11 deletions(-)
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index 752ebe5..ab93f2c 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -28,36 +28,100 @@
from sigfig import round
import pandas as pd
+############################# Data extraction ################################
+
# creates an empty array to store the names of the SOTS deployments
deployments = []
+checked_files = []
+
+processed_files = []
+
# loops through all the folders and files contained in the folder
for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
# if the folder/file name contains 'Pulse' or 'SOFS' and doesn't contain '.', append it to deployments
- if (('Pulse' in x) or ('SOFS' in x)) and ('.' not in x):
+ if (('Pulse' in x) or ('SOFS' in x)) and ('.p' not in x):
deployments.append(x)
# create a dataframe to store extract information
-temp_ensemble = pd.DataFrame(columns = ["Temp rate of change","QC","Nominal depth","Deployment"])
+sots_temp_ensemble = pd.DataFrame(columns = ["Temp rate of change","QC","Nominal depth","Deployment"])
# loops through all files in the directory
for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
for fname in files:
- # for each netcdf file labelled as FV01
- if fname.endswith('.nc') and 'FV01' in fname:
+ # append the filename to the list of checked files
+ checked_files.append(fname)
+
+ # for each netcdf file labelled as FV01 and containing a deployment in its name
+ if fname.endswith('.nc') and 'FV01' in fname and any(ele in fname for ele in deployments):
# print the filename
print(fname)
# open the file
- nc = Dataset(os.path.join(root,fname), mode = 'r')
+ nc = Dataset(os.path.join(root,fname), mode = 'a')
+
+ # check file contains temperature data
+ if 'TEMP' in list(nc.variables):
- # check that the in_out_water test has been run on the file
- if 'TEMP_quality_control_io' in list(nc.variables):
+ # check that the in_out_water test has been run on the file, if not run in_out_water code
+ if not 'TEMP_quality_control_io' in list(nc.variables):
+
+ # run in_out_water script - uncommented at this point as just copied and pasted
+ var_name = 'TEMP'
+ nc_vars = nc.variables
+ to_add = []
+ if var_name:
+ to_add.append(var_name)
+ else:
+ for v in nc_vars:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+
+ time_var = nc_vars["TIME"]
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+ time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+
+ print('deployment time', time_deploy)
+
+ print(to_add)
+
+ # create a mask for the time range
+ mask = (time <= time_deploy) | (time >= time_recovery)
+
+ for v in to_add:
+ if "TIME" in nc_vars[v].dimensions:
+ if v.endswith("_quality_control"):
+ print("QC time dim ", v)
+
+ ncVarOut = nc_vars[v]
+ ncVarOut[mask] = 7
+ else:
+ # create a qc variable just for this test flags
+ if v + "_quality_control_io" in nc.variables:
+ ncVarOut = nc.variables[v + "_quality_control_io"]
+ else:
+ ncVarOut = nc.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_vars[v].shape)
+ ncVarOut.long_name = "quality flag for " + v
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+ ncVarOut[mask] = 7
+
+ nc.variables[v + "_quality_control"][:] = np.maximum(nc.variables[v + "_quality_control_io"][:],nc.variables[v + "_quality_control"][:])
+
+ nc.file_version = "Level 1 - Quality Controlled Data"
+
+
# check that the file has a single dimension temperature vector, and that the time format is correct
if np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
@@ -86,22 +150,125 @@
nc_dtemp_dtime_qc = pd.Series(nc_temp_qc).rolling(2).max().dropna().to_numpy()
+
# extract sensor nominal depth
nc_nom_depth = np.array(nc.variables['NOMINAL_DEPTH'])
+ # create a vector the same size as nc_dtemp_dtime with the nominal depth
+ nc_nom_depth_vector = np.repeat(nc_nom_depth,len(nc_dtemp_dtime))
+
+
# extract deployment name
nc_deployment = nc.deployment_code
- # Next step: append all this information to temp ensemble!
+ # create a list the same size as nc_dtemp_dtime with the deployment name
+ nc_deployment_list = [nc_deployment] * len(nc_dtemp_dtime)
+
+
+
+ # combine information into an length x 4 dataframe
+ nc_temp_ensemble = pd.DataFrame({"Temp rate of change":nc_dtemp_dtime,"QC":nc_dtemp_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
+
+ # append the current netcdf's dataframe to the sots_temp_ensemble
+ sots_temp_ensemble = sots_temp_ensemble.append(nc_temp_ensemble)
+
+ # append the filename to the list of processed files
+ processed_files.append(fname)
+
nc.close()
+############################# Data processing ################################
- # pd.Series(lst).rolling(5).max().dropna().to_numpy()
-
-
+# creates a new dataframe containing only data with QC < 3
+sots_temp_ensemble_qc210 = sots_temp_ensemble[sots_temp_ensemble["QC"]<3]
+
+# calculates overall standard deviation
+std_total = np.std(sots_temp_ensemble_qc210["Temp rate of change"])
+
+
+
+
+# creates an emply list to store data deployment by deployment
+std_by_deployment_data = []
+
+# creates a dict of deployment names and standard deviations
+for i in sots_temp_ensemble_qc210.Deployment.unique():
+ std_by_deployment_data.append(
+ {
+ 'Deployment': i,
+ 'STD': np.std(sots_temp_ensemble_qc210["Temp rate of change"][sots_temp_ensemble_qc210["Deployment"]==i]),
+ }
+ )
+
+# creates a Dataframe from the dict
+std_by_deployment = pd.DataFrame(std_by_deployment_data)
+
+
+
+
+
+# =============================================================================
+# std_by_depth: this function takes two compulsary arguments (top: the shallowest
+# depth(m)), bottom: the deepest depth(m)) and one option argument (deployment_in:
+# the deployment from which data will be taken). The function will return the standard
+# deviation of the d(Temp)/d(Time) data from sensors with nominal depths at and
+# between the two depths, and from only the deployment_in if specified.
+#
+# sample call: std_by_depth(500,10000,'SOFS-7.5-2018')
+#
+# this will give the std of all d(Temp)/d(Time) data from SOFS-7.5-2018 from
+# sensors with 500m <= nominal depth <= 10000m
+# =============================================================================
+
+def std_by_depth(top,bottom,deployment_in=None):
+
+ if deployment_in == None:
+
+ # subsamples sots_temp_ensemble_qc210 based on depth
+ target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom)]
+
+ # calculates the standard deviation of the subsample
+ target_std = np.std(target_ensemble["Temp rate of change"])
+
+ # returns the standard deviation of the subsample
+ return target_std
+
+ else:
+
+ # subsamples sots_temp_ensemble_qc210 based on depth
+ target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom) & (sots_temp_ensemble_qc210["Deployment"]==deployment_in)]
+
+ # calculates the standard deviation of the subsample
+ target_std = np.std(target_ensemble["Temp rate of change"])
+
+ # returns the standard deviation of the subsample
+ return target_std
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
From f1d5618bc204a64bb15c19a4d525832eee08bd58 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 20 May 2020 11:53:13 +1000
Subject: [PATCH 41/59] Update temp_stat_plot.py
adding plotting to function and improved if else layout
---
ocean_dp/qc/temp_stat_plot.py | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index ab93f2c..bc73767 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -230,22 +230,32 @@ def std_by_depth(top,bottom,deployment_in=None):
# subsamples sots_temp_ensemble_qc210 based on depth
target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom)]
- # calculates the standard deviation of the subsample
- target_std = np.std(target_ensemble["Temp rate of change"])
-
- # returns the standard deviation of the subsample
- return target_std
-
else:
# subsamples sots_temp_ensemble_qc210 based on depth
target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom) & (sots_temp_ensemble_qc210["Deployment"]==deployment_in)]
- # calculates the standard deviation of the subsample
- target_std = np.std(target_ensemble["Temp rate of change"])
+ # calculates the mean of the subsample
+ target_mean = np.mean(target_ensemble["Temp rate of change"])
+
+ # calculates the standard deviation of the subsample
+ target_std = np.std(target_ensemble["Temp rate of change"])
- # returns the standard deviation of the subsample
- return target_std
+ line_thick = 1
+
+ ax_hist=plt.axes()
+
+ target_ensemble.hist(column="Temp rate of change",bins=100,log=True,ax=ax_hist)
+
+ ax_hist.axvline(x=target_mean+3*target_std,color='r',linewidth=line_thick)
+
+ ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick)
+
+
+ # returns the standard deviation of the subsample
+ return target_std
+
+
From 528c5cf2de4062d76e59edf0524ee3405848a144 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 20 May 2020 11:54:25 +1000
Subject: [PATCH 42/59] Update temp_stat_plot.py
---
ocean_dp/qc/temp_stat_plot.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index bc73767..63673ba 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -241,12 +241,16 @@ def std_by_depth(top,bottom,deployment_in=None):
# calculates the standard deviation of the subsample
target_std = np.std(target_ensemble["Temp rate of change"])
+ # sets line thickness for plot
line_thick = 1
+ # creates axes for histogram
ax_hist=plt.axes()
+ # plots a histogram of the data selected
target_ensemble.hist(column="Temp rate of change",bins=100,log=True,ax=ax_hist)
+ # draws lines at the mean +- 3 STD on the histogram
ax_hist.axvline(x=target_mean+3*target_std,color='r',linewidth=line_thick)
ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick)
From 6f3748ecb6dcd061acae485cc099e5ad22fc5ded Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 20 May 2020 14:45:28 +1000
Subject: [PATCH 43/59] Update temp_stat_plot.py
---
ocean_dp/qc/temp_stat_plot.py | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index 63673ba..fd0b86a 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -255,6 +255,30 @@ def std_by_depth(top,bottom,deployment_in=None):
ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick)
+ # sets the x label
+ ax_hist.set_xlabel('°C/hr')
+
+
+ label_coords = (0.65, 0.8)
+ label_method = 'axes fraction'
+
+ anno = 'mean = '+str(round(float(target_mean),sigfigs=3))
+
+ anno += '\n3 STD = ' + str(round(float(3*target_std),sigfigs=3))
+
+ anno += '\nno. samples = ' + str(len(target_ensemble))
+
+ anno += '\n'+str(top)+'m <= depth <= '+str(bottom)+'m'
+
+ if deployment_in == None:
+
+ anno += '\nall available data'
+
+ else:
+
+ anno += '\n'+deployment_in
+
+ ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
# returns the standard deviation of the subsample
return target_std
From 7d28a8c13ceb46e64b14e82e9565502a4fed916c Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 20 May 2020 16:19:29 +1000
Subject: [PATCH 44/59] creation of psal stat plot
---
ocean_dp/qc/psal_stat_plot.py | 310 ++++++++++++++++++++++++++++++++++
ocean_dp/qc/temp_stat_plot.py | 2 +-
2 files changed, 311 insertions(+), 1 deletion(-)
create mode 100755 ocean_dp/qc/psal_stat_plot.py
diff --git a/ocean_dp/qc/psal_stat_plot.py b/ocean_dp/qc/psal_stat_plot.py
new file mode 100755
index 0000000..8449b2a
--- /dev/null
+++ b/ocean_dp/qc/psal_stat_plot.py
@@ -0,0 +1,310 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+import pandas as pd
+
+############################# Data extraction ################################
+
+# creates an empty array to store the names of the SOTS deployments
+deployments = []
+
+checked_files = []
+
+processed_files = []
+
+# loops through all the folders and files contained in the folder
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ # if the folder/file name contains 'Pulse' or 'SOFS' and doesn't contain '.', append it to deployments
+ if (('Pulse' in x) or ('SOFS' in x)) and ('.p' not in x):
+
+ deployments.append(x)
+
+
+# create a dataframe to store extract information
+sots_psal_ensemble = pd.DataFrame(columns = ["PSAL rate of change","QC","Nominal depth","Deployment"])
+
+
+# loops through all files in the directory
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ # append the filename to the list of checked files
+ checked_files.append(fname)
+
+ # for each netcdf file labelled as FV01 and containing a deployment in its name
+ if fname.endswith('.nc') and 'FV01' in fname and any(ele in fname for ele in deployments):
+
+ # print the filename
+ print(fname)
+
+ # open the file
+ nc = Dataset(os.path.join(root,fname), mode = 'a')
+
+ # check file contains psalerature data
+ if 'PSAL' in list(nc.variables):
+
+ # check that the in_out_water test has been run on the file, if not run in_out_water code
+ if not 'PSAL_quality_control_io' in list(nc.variables):
+
+ # run in_out_water script - uncommented at this point as just copied and pasted
+ var_name = 'PSAL'
+ nc_vars = nc.variables
+ to_add = []
+ if var_name:
+ to_add.append(var_name)
+ else:
+ for v in nc_vars:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+
+ time_var = nc_vars["TIME"]
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+ time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+
+ print('deployment time', time_deploy)
+
+ print(to_add)
+
+ # create a mask for the time range
+ mask = (time <= time_deploy) | (time >= time_recovery)
+
+ for v in to_add:
+ if "TIME" in nc_vars[v].dimensions:
+ if v.endswith("_quality_control"):
+ print("QC time dim ", v)
+
+ ncVarOut = nc_vars[v]
+ ncVarOut[mask] = 7
+ else:
+ # create a qc variable just for this test flags
+ if v + "_quality_control_io" in nc.variables:
+ ncVarOut = nc.variables[v + "_quality_control_io"]
+ else:
+ ncVarOut = nc.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ ncVarOut[:] = np.zeros(nc_vars[v].shape)
+ ncVarOut.long_name = "quality flag for " + v
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+ nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+ ncVarOut[mask] = 7
+
+ nc.variables[v + "_quality_control"][:] = np.maximum(nc.variables[v + "_quality_control_io"][:],nc.variables[v + "_quality_control"][:])
+
+ nc.file_version = "Level 1 - Quality Controlled Data"
+
+
+
+ # check that the file has a single dimension psalerature vector, and that the time format is correct
+ if np.array(nc.variables['PSAL'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+
+ # calculate psalerature changes for in water data
+ nc_psal_diffs = np.diff(np.array(nc.variables['PSAL'][np.array(nc.variables['PSAL_quality_control'][:])!=7]))
+
+ # extract the time data
+ nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['PSAL_quality_control'][:])!=7])
+
+ # Convert from days to hours
+ nc_time_hr = nc_time*24
+
+ # Calculate time changes in hours
+ nc_time_hr_diffs = np.diff(nc_time_hr)
+
+ # calculate the rate of change of psalerature wrt time (degrees °C per hour)
+ nc_dpsal_dtime = np.divide(nc_psal_diffs,nc_time_hr_diffs)
+
+
+
+ # extract psal_qc data
+ nc_psal_qc = np.array(nc.variables['PSAL_quality_control'][np.array(nc.variables['PSAL_quality_control'][:])!=7])
+
+ # calculate qc values for each nc_dpsal_dtime by taking the maximum of the qc values of the two contributing psals
+ nc_dpsal_dtime_qc = pd.Series(nc_psal_qc).rolling(2).max().dropna().to_numpy()
+
+
+
+ # extract sensor nominal depth
+ nc_nom_depth = np.array(nc.variables['NOMINAL_DEPTH'])
+
+ # create a vector the same size as nc_dpsal_dtime with the nominal depth
+ nc_nom_depth_vector = np.repeat(nc_nom_depth,len(nc_dpsal_dtime))
+
+
+
+ # extract deployment name
+ nc_deployment = nc.deployment_code
+
+ # create a list the same size as nc_dpsal_dtime with the deployment name
+ nc_deployment_list = [nc_deployment] * len(nc_dpsal_dtime)
+
+
+
+ # combine information into an length x 4 dataframe
+ nc_psal_ensemble = pd.DataFrame({"Psal rate of change":nc_dpsal_dtime,"QC":nc_dpsal_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
+
+ # append the current netcdf's dataframe to the sots_psal_ensemble
+ sots_psal_ensemble = sots_psal_ensemble.append(nc_psal_ensemble)
+
+ # append the filename to the list of processed files
+ processed_files.append(fname)
+
+
+ nc.close()
+
+
+############################# Data processing ################################
+
+# creates a new dataframe containing only data with QC < 3
+sots_psal_ensemble_qc210 = sots_psal_ensemble[sots_psal_ensemble["QC"]<3]
+
+# calculates overall standard deviation
+std_total = np.std(sots_psal_ensemble_qc210["Psal rate of change"])
+
+
+
+
+# creates an emply list to store data deployment by deployment
+std_by_deployment_data = []
+
+# creates a dict of deployment names and standard deviations
+for i in sots_psal_ensemble_qc210.Deployment.unique():
+ std_by_deployment_data.append(
+ {
+ 'Deployment': i,
+ 'STD': np.std(sots_psal_ensemble_qc210["Psal rate of change"][sots_psal_ensemble_qc210["Deployment"]==i]),
+ }
+ )
+
+# creates a Dataframe from the dict
+std_by_deployment = pd.DataFrame(std_by_deployment_data)
+
+
+
+
+
+# =============================================================================
+# std_by_depth: this function takes two compulsary arguments (top: the shallowest
+# depth(m)), bottom: the deepest depth(m)) and one option argument (deployment_in:
+# the deployment from which data will be taken). The function will return the standard
+# deviation of the d(psal)/d(Time) data from sensors with nominal depths at and
+# between the two depths, and from only the deployment_in if specified.
+#
+# sample call: std_by_depth(500,10000,'SOFS-7.5-2018')
+#
+# this will give the std of all d(psal)/d(Time) data from SOFS-7.5-2018 from
+# sensors with 500m <= nominal depth <= 10000m
+# =============================================================================
+
+def std_by_depth_psal(top,bottom,deployment_in=None):
+
+ if deployment_in == None:
+
+ # subsamples sots_psal_ensemble_qc210 based on depth
+ target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom)]
+
+ else:
+
+ # subsamples sots_psal_ensemble_qc210 based on depth
+ target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom) & (sots_psal_ensemble_qc210["Deployment"]==deployment_in)]
+
+ # calculates the mean of the subsample
+ target_mean = np.mean(target_ensemble["Psal rate of change"])
+
+ # calculates the standard deviation of the subsample
+ target_std = np.std(target_ensemble["Psal rate of change"])
+
+ # sets line thickness for plot
+ line_thick = 1
+
+ # creates axes for histogram
+ ax_hist=plt.axes()
+
+ # plots a histogram of the data selected
+ target_ensemble.hist(column="Psal rate of change",bins=100,log=True,ax=ax_hist)
+
+ # draws lines at the mean +- 3 STD on the histogram
+ ax_hist.axvline(x=target_mean+3*target_std,color='r',linewidth=line_thick)
+
+ ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick)
+
+ # sets the x label
+ ax_hist.set_xlabel('PSU/hr')
+
+
+ label_coords = (0.65, 0.8)
+ label_method = 'axes fraction'
+
+ anno = 'mean = '+str(round(float(target_mean),sigfigs=3))
+
+ anno += '\n3 STD = ' + str(round(float(3*target_std),sigfigs=3))
+
+ anno += '\nno. samples = ' + str(len(target_ensemble))
+
+ anno += '\n'+str(top)+'m <= depth <= '+str(bottom)+'m'
+
+ if deployment_in == None:
+
+ anno += '\nall available data'
+
+ else:
+
+ anno += '\n'+deployment_in
+
+ ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
+
+ # returns the standard deviation of the subsample
+ return target_std
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index fd0b86a..6eda254 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -223,7 +223,7 @@
# sensors with 500m <= nominal depth <= 10000m
# =============================================================================
-def std_by_depth(top,bottom,deployment_in=None):
+def std_by_depth_temp(top,bottom,deployment_in=None):
if deployment_in == None:
From 56e469a954da103104a8089a40d7cf4f807bd3dc Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 21 May 2020 09:59:19 +1000
Subject: [PATCH 45/59] modified stat plot code
-now swaps depths if inputted incorrectly
-can now input list of deployments
-fixed label location
---
ocean_dp/qc/psal_stat_plot.py | 33 ++++++++++++++++++++++----
ocean_dp/qc/temp_stat_plot.py | 27 +++++++++++++++++++--
ocean_dp/sots_processing_runthrough.py | 2 +-
3 files changed, 55 insertions(+), 7 deletions(-)
diff --git a/ocean_dp/qc/psal_stat_plot.py b/ocean_dp/qc/psal_stat_plot.py
index 8449b2a..46999bc 100755
--- a/ocean_dp/qc/psal_stat_plot.py
+++ b/ocean_dp/qc/psal_stat_plot.py
@@ -228,15 +228,32 @@
def std_by_depth_psal(top,bottom,deployment_in=None):
+ # if user incorrectly inputs depths, swap them and run the code
+ if top > bottom:
+
+ top, bottom = bottom, top
+
if deployment_in == None:
# subsamples sots_psal_ensemble_qc210 based on depth
target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom)]
+ elif isinstance(deployment_in, list):
+
+ # subsamples sots_psal_ensemble_qc210 based on depth
+ target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom) & (sots_psal_ensemble_qc210.Deployment.isin(deployment_in))]
+
+
else:
# subsamples sots_psal_ensemble_qc210 based on depth
target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom) & (sots_psal_ensemble_qc210["Deployment"]==deployment_in)]
+
+ # if not data is available with the given choices, end the function
+ if len(target_ensemble)==0:
+
+ return 'No data available for those choices'
+
# calculates the mean of the subsample
target_mean = np.mean(target_ensemble["Psal rate of change"])
@@ -262,7 +279,7 @@ def std_by_depth_psal(top,bottom,deployment_in=None):
ax_hist.set_xlabel('PSU/hr')
- label_coords = (0.65, 0.8)
+ label_coords = (0.70, 0.99)
label_method = 'axes fraction'
anno = 'mean = '+str(round(float(target_mean),sigfigs=3))
@@ -277,19 +294,27 @@ def std_by_depth_psal(top,bottom,deployment_in=None):
anno += '\nall available data'
+ elif isinstance(deployment_in, list):
+
+ anno += '\n'
+
+ anno += '\n'.join(deployment_in)
+
else:
anno += '\n'+deployment_in
- ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
+ ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8,va = "top", ha="left")
# returns the standard deviation of the subsample
return target_std
-
-
+
+
+
+
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index 6eda254..9efd849 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -225,15 +225,32 @@
def std_by_depth_temp(top,bottom,deployment_in=None):
+ # if user incorrectly inputs depths, swap them and run the code
+ if top > bottom:
+
+ top, bottom = bottom, top
+
if deployment_in == None:
# subsamples sots_temp_ensemble_qc210 based on depth
target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom)]
+ elif isinstance(deployment_in, list):
+
+ # subsamples sots_temp_ensemble_qc210 based on depth
+ target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom) & (sots_temp_ensemble_qc210.Deployment.isin(deployment_in))]
+
+
else:
# subsamples sots_temp_ensemble_qc210 based on depth
target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom) & (sots_temp_ensemble_qc210["Deployment"]==deployment_in)]
+
+ # if not data is available with the given choices, end the function
+ if len(target_ensemble)==0:
+
+ return 'No data available for those choices'
+
# calculates the mean of the subsample
target_mean = np.mean(target_ensemble["Temp rate of change"])
@@ -259,7 +276,7 @@ def std_by_depth_temp(top,bottom,deployment_in=None):
ax_hist.set_xlabel('°C/hr')
- label_coords = (0.65, 0.8)
+ label_coords = (0.70, 0.99)
label_method = 'axes fraction'
anno = 'mean = '+str(round(float(target_mean),sigfigs=3))
@@ -274,11 +291,17 @@ def std_by_depth_temp(top,bottom,deployment_in=None):
anno += '\nall available data'
+ elif isinstance(deployment_in, list):
+
+ anno += '\n'
+
+ anno += '\n'.join(deployment_in)
+
else:
anno += '\n'+deployment_in
- ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
+ ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8,va = "top", ha="left")
# returns the standard deviation of the subsample
return target_std
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
index c939838..cc7bc30 100755
--- a/ocean_dp/sots_processing_runthrough.py
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -40,7 +40,7 @@
start = time.time()
# Set the working directory
-#os.chdir('/Users/tru050/Desktop/sofs6 test data')
+#os.chdir('/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data/SOFS-5-2015')
# Make a list of FV00 filenames
fv00_files = glob.glob('*IMOS_ABOS-SOTS*FV00*.nc')
From 4317aff5f13bead9c778a4c68ec512b03436e61d Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 21 May 2020 13:16:29 +1000
Subject: [PATCH 46/59] Update temp_stat_plot.py
adds per sample functionality
---
ocean_dp/qc/temp_stat_plot.py | 41 +++++++++++++++++++++++++++--------
1 file changed, 32 insertions(+), 9 deletions(-)
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index 9efd849..bc1020d 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -46,7 +46,7 @@
deployments.append(x)
# create a dataframe to store extract information
-sots_temp_ensemble = pd.DataFrame(columns = ["Temp rate of change","QC","Nominal depth","Deployment"])
+sots_temp_ensemble = pd.DataFrame(columns = ["dTemp/dtime","dTemp/dSample","QC","Nominal depth","Deployment"])
# loops through all files in the directory
for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
@@ -168,7 +168,7 @@
# combine information into an length x 4 dataframe
- nc_temp_ensemble = pd.DataFrame({"Temp rate of change":nc_dtemp_dtime,"QC":nc_dtemp_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
+ nc_temp_ensemble = pd.DataFrame({"dTemp/dtime":nc_dtemp_dtime,"dTemp/dSample":nc_temp_diffs,"QC":nc_dtemp_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
# append the current netcdf's dataframe to the sots_temp_ensemble
sots_temp_ensemble = sots_temp_ensemble.append(nc_temp_ensemble)
@@ -186,7 +186,7 @@
sots_temp_ensemble_qc210 = sots_temp_ensemble[sots_temp_ensemble["QC"]<3]
# calculates overall standard deviation
-std_total = np.std(sots_temp_ensemble_qc210["Temp rate of change"])
+std_time_total = np.std(sots_temp_ensemble_qc210["dTemp/dtime"])
@@ -199,7 +199,8 @@
std_by_deployment_data.append(
{
'Deployment': i,
- 'STD': np.std(sots_temp_ensemble_qc210["Temp rate of change"][sots_temp_ensemble_qc210["Deployment"]==i]),
+ 'STD time': np.std(sots_temp_ensemble_qc210["dTemp/dtime"][sots_temp_ensemble_qc210["Deployment"]==i]),
+ 'STD sample': np.std(sots_temp_ensemble_qc210["dTemp/dSample"][sots_temp_ensemble_qc210["Deployment"]==i])
}
)
@@ -223,7 +224,23 @@
# sensors with 500m <= nominal depth <= 10000m
# =============================================================================
-def std_by_depth_temp(top,bottom,deployment_in=None):
+def std_by_depth_temp(top,bottom,deployment_in=None,rate='time'):
+
+ selection = ''
+
+ if rate == 'time':
+
+ selection = "dTemp/dtime"
+
+ elif rate == 'sample':
+
+ selection = "dTemp/dSample"
+
+ else:
+
+ return "incorrect rate specification"
+
+
# if user incorrectly inputs depths, swap them and run the code
if top > bottom:
@@ -253,10 +270,10 @@ def std_by_depth_temp(top,bottom,deployment_in=None):
# calculates the mean of the subsample
- target_mean = np.mean(target_ensemble["Temp rate of change"])
+ target_mean = np.mean(target_ensemble[selection])
# calculates the standard deviation of the subsample
- target_std = np.std(target_ensemble["Temp rate of change"])
+ target_std = np.std(target_ensemble[selection])
# sets line thickness for plot
line_thick = 1
@@ -265,7 +282,7 @@ def std_by_depth_temp(top,bottom,deployment_in=None):
ax_hist=plt.axes()
# plots a histogram of the data selected
- target_ensemble.hist(column="Temp rate of change",bins=100,log=True,ax=ax_hist)
+ target_ensemble.hist(column=selection,bins=100,log=True,ax=ax_hist)
# draws lines at the mean +- 3 STD on the histogram
ax_hist.axvline(x=target_mean+3*target_std,color='r',linewidth=line_thick)
@@ -273,7 +290,13 @@ def std_by_depth_temp(top,bottom,deployment_in=None):
ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick)
# sets the x label
- ax_hist.set_xlabel('°C/hr')
+ if rate == 'time':
+
+ ax_hist.set_xlabel('°C/hr')
+
+ elif rate == 'sample':
+
+ ax_hist.set_xlabel('°C')
label_coords = (0.70, 0.99)
From f6b597fe273c9b3cfd65ca00daea4dec3fc008b3 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Mon, 25 May 2020 15:29:52 +1000
Subject: [PATCH 47/59] update to stat plots
changed labels, added per sample functionality to psal plotter
---
ocean_dp/plotting/batch-qc.pdf | Bin 0 -> 208 bytes
ocean_dp/plotting/plotQC.py | 4 +--
ocean_dp/qc/psal_stat_plot.py | 42 +++++++++++++++++++------
ocean_dp/qc/temp_stat_plot.py | 9 +++++-
ocean_dp/sots_processing_runthrough.py | 4 +--
5 files changed, 44 insertions(+), 15 deletions(-)
create mode 100644 ocean_dp/plotting/batch-qc.pdf
diff --git a/ocean_dp/plotting/batch-qc.pdf b/ocean_dp/plotting/batch-qc.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..5e7a87baea6886313712bca3fae565bb0266a9f5
GIT binary patch
literal 208
zcmY!laB$t4Qr
JV8ap40RR*-I}iW>
literal 0
HcmV?d00001
diff --git a/ocean_dp/plotting/plotQC.py b/ocean_dp/plotting/plotQC.py
index 3060759..979976a 100755
--- a/ocean_dp/plotting/plotQC.py
+++ b/ocean_dp/plotting/plotQC.py
@@ -67,11 +67,11 @@ def do_plot(fn):
DS = xr.open_dataset(fn)
ax1 = plt.subplot(2, 1, 1)
- plt.plot(DS.TIME, DS.PAR)
+ plt.plot(DS.TIME, DS.TEMP)
plt.title(DS.deployment_code + " - " + DS.instrument_model + ":" + DS.instrument_serial_number + " @ " + str(DS.instrument_nominal_depth), {'fontsize': 8})
ax2 = plt.subplot(2, 1, 2, sharex=ax1)
- aux = DS.PAR.ancillary_variables
+ aux = DS.TEMP.ancillary_variables
a_vars = aux.split(" ")
for f in sorted(set(a_vars)):
print('aux var', f)
diff --git a/ocean_dp/qc/psal_stat_plot.py b/ocean_dp/qc/psal_stat_plot.py
index 46999bc..cb9cbc7 100755
--- a/ocean_dp/qc/psal_stat_plot.py
+++ b/ocean_dp/qc/psal_stat_plot.py
@@ -48,7 +48,7 @@
# create a dataframe to store extract information
-sots_psal_ensemble = pd.DataFrame(columns = ["PSAL rate of change","QC","Nominal depth","Deployment"])
+sots_psal_ensemble = pd.DataFrame(columns = ["dPsal/dtime","dPsal/dSample","QC","Nominal depth","Deployment"])
# loops through all files in the directory
@@ -171,7 +171,7 @@
# combine information into an length x 4 dataframe
- nc_psal_ensemble = pd.DataFrame({"Psal rate of change":nc_dpsal_dtime,"QC":nc_dpsal_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
+ nc_psal_ensemble = pd.DataFrame({"dPsal/dtime":nc_dpsal_dtime,"dPsal/dSample":nc_psal_diffs,"QC":nc_dpsal_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
# append the current netcdf's dataframe to the sots_psal_ensemble
sots_psal_ensemble = sots_psal_ensemble.append(nc_psal_ensemble)
@@ -189,7 +189,7 @@
sots_psal_ensemble_qc210 = sots_psal_ensemble[sots_psal_ensemble["QC"]<3]
# calculates overall standard deviation
-std_total = np.std(sots_psal_ensemble_qc210["Psal rate of change"])
+std_total = np.std(sots_psal_ensemble_qc210["dPsal/dtime"])
@@ -202,7 +202,8 @@
std_by_deployment_data.append(
{
'Deployment': i,
- 'STD': np.std(sots_psal_ensemble_qc210["Psal rate of change"][sots_psal_ensemble_qc210["Deployment"]==i]),
+ 'STD': np.std(sots_psal_ensemble_qc210["dPsal/dtime"][sots_psal_ensemble_qc210["Deployment"]==i]),
+ 'STD sample': np.std(sots_psal_ensemble_qc210["dPsal/dSample"][sots_psal_ensemble_qc210["Deployment"]==i])
}
)
@@ -226,7 +227,23 @@
# sensors with 500m <= nominal depth <= 10000m
# =============================================================================
-def std_by_depth_psal(top,bottom,deployment_in=None):
+def std_by_depth_psal(top,bottom,deployment_in=None,rate='time'):
+
+ selection = ''
+
+ if rate == 'time':
+
+ selection = "dPsal/dtime"
+
+ elif rate == 'sample':
+
+ selection = "dPsal/dSample"
+
+ else:
+
+ return "incorrect rate specification"
+
+
# if user incorrectly inputs depths, swap them and run the code
if top > bottom:
@@ -256,10 +273,10 @@ def std_by_depth_psal(top,bottom,deployment_in=None):
# calculates the mean of the subsample
- target_mean = np.mean(target_ensemble["Psal rate of change"])
+ target_mean = np.mean(target_ensemble[selection])
# calculates the standard deviation of the subsample
- target_std = np.std(target_ensemble["Psal rate of change"])
+ target_std = np.std(target_ensemble[selection])
# sets line thickness for plot
line_thick = 1
@@ -268,7 +285,7 @@ def std_by_depth_psal(top,bottom,deployment_in=None):
ax_hist=plt.axes()
# plots a histogram of the data selected
- target_ensemble.hist(column="Psal rate of change",bins=100,log=True,ax=ax_hist)
+ target_ensemble.hist(column=selection,bins=100,log=True,ax=ax_hist)
# draws lines at the mean +- 3 STD on the histogram
ax_hist.axvline(x=target_mean+3*target_std,color='r',linewidth=line_thick)
@@ -276,7 +293,13 @@ def std_by_depth_psal(top,bottom,deployment_in=None):
ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick)
# sets the x label
- ax_hist.set_xlabel('PSU/hr')
+ if rate == 'time':
+
+ ax_hist.set_xlabel('PSU/hr')
+
+ elif rate == 'sample':
+
+ ax_hist.set_xlabel('PSU/sample')
label_coords = (0.70, 0.99)
@@ -314,7 +337,6 @@ def std_by_depth_psal(top,bottom,deployment_in=None):
-
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
index bc1020d..5b31a82 100755
--- a/ocean_dp/qc/temp_stat_plot.py
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -28,6 +28,12 @@
from sigfig import round
import pandas as pd
+import warnings
+import scipy.stats as st
+import statsmodels as sm
+import matplotlib
+
+
############################# Data extraction ################################
# creates an empty array to store the names of the SOTS deployments
@@ -296,7 +302,7 @@ def std_by_depth_temp(top,bottom,deployment_in=None,rate='time'):
elif rate == 'sample':
- ax_hist.set_xlabel('°C')
+ ax_hist.set_xlabel('°C/sample')
label_coords = (0.70, 0.99)
@@ -351,6 +357,7 @@ def std_by_depth_temp(top,bottom,deployment_in=None,rate='time'):
+
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
index cc7bc30..c389d27 100755
--- a/ocean_dp/sots_processing_runthrough.py
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -70,13 +70,13 @@
global_range.global_range(ncfile,'TEMP',40,-2)
# Rate of change
-rate_of_change_test.roc_test_files(fv01_pres_interp_files,'TEMP',10)
+rate_of_change_test.roc_test_files(fv01_pres_interp_files,'TEMP',3.36)
# Spike
spike_test.spike_test_files(fv01_pres_interp_files,target_vars_in=['TEMP'])
# Flatline
-flatline_test.flatline_test_files(fv01_pres_interp_files,['TEMP'],window=10)
+flatline_test.flatline_test_files(fv01_pres_interp_files,['TEMP'],window=20)
# Check qc process has worked
fv01_qc_checked = qc_checker.qc_checker_files(fv01_pres_interp_files,['TEMP'])
From d3839b146110f506a63722f2592e190f3b819b60 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Wed, 27 May 2020 16:53:31 +1000
Subject: [PATCH 48/59] Create add_density.py
---
ocean_dp/processing/add_density.py | 78 ++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
create mode 100755 ocean_dp/processing/add_density.py
diff --git a/ocean_dp/processing/add_density.py b/ocean_dp/processing/add_density.py
new file mode 100755
index 0000000..6e56a53
--- /dev/null
+++ b/ocean_dp/processing/add_density.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2020 Ben Weeding and Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+
+from netCDF4 import Dataset
+import sys
+import gsw
+import numpy as np
+from datetime import datetime
+
+# add density to a data file with TEMP, PSAL, PRES variables, many assumptions are made about the input file
+# based on Peter Jansen's addPSAL.py, using TEOS-10
+
+def add_density(netCDFfile):
+
+ # loads the netcdf file
+ ds = Dataset(netCDFfile, 'a')
+
+ if 'DENSITY' in list(ds.variables):
+
+ ds.close()
+
+ return "file already contains density"
+
+ # extracts the variables from the netcdf
+ var_temp = ds.variables["TEMP"]
+ var_psal = ds.variables["PSAL"]
+ var_pres = ds.variables["PRES"]
+ var_lon = ds.variables["LONGITUDE"]
+ var_lat = ds.variables["LATITUDE"]
+
+ # extracts the data from the variables
+ t = var_temp[:]
+ psal = var_psal[:]
+ p = var_pres[:]
+ lon = var_lon[:]
+ lat = var_lat[:]
+
+ # calculates absolute salinity
+ SA = gsw.SA_from_SP(psal, p, lon, lat)
+
+ # calculates conservative temperature
+ CT = gsw.CT_from_t(SA, t, p)
+
+ # calculates density
+ density = gsw.rho(SA, CT, p)
+
+
+ ncVarOut = ds.createVariable("DENSITY", "f4", ("TIME",), fill_value=np.nan, zlib=True) # fill_value=nan otherwise defaults to max
+ ncVarOut[:] = density
+ ncVarOut.units = "kg/m^3"
+ ncVarOut.comment = "calculated using gsw-python https://teos-10.github.io/GSW-Python/index.html"
+
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
+
+ ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + " : added DENSITY from TEMP, PSAL, PRES, LAT, LON")
+
+ ds.close()
+
+
+if __name__ == "__main__":
+ add_density(sys.argv[1])
From 031fd0e9e0d022b697448cef3967c9784b8ac909 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 28 May 2020 16:11:27 +1000
Subject: [PATCH 49/59] Making Pandas
Don't know what zoos are complaining about
---
ocean_dp/file_name/find_file_with.py | 90 +++++++++++++++++++
ocean_dp/plotting/density_plot.py | 25 ++++++
ocean_dp/plotting/panda_maker.py | 126 +++++++++++++++++++++++++++
ocean_dp/processing/add_density.py | 4 +-
4 files changed, 244 insertions(+), 1 deletion(-)
create mode 100755 ocean_dp/file_name/find_file_with.py
create mode 100755 ocean_dp/plotting/density_plot.py
create mode 100755 ocean_dp/plotting/panda_maker.py
diff --git a/ocean_dp/file_name/find_file_with.py b/ocean_dp/file_name/find_file_with.py
new file mode 100755
index 0000000..59225b3
--- /dev/null
+++ b/ocean_dp/file_name/find_file_with.py
@@ -0,0 +1,90 @@
+#!/usr/bin/python3
+
+# raw2netCDF
+# Copyright (C) 2019 Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import glob
+
+import sys
+import re
+
+from netCDF4 import Dataset
+
+
+def find_files_pattern(file_pattern):
+ match_files = []
+ files = glob.glob(file_pattern)
+
+ match_files.extend(files)
+ return match_files
+
+def find_global(files, attribute, regexp):
+
+ match_files = []
+ #print("find", file_pattern, files)
+ for f in files:
+ #print("check file", f)
+ ds = Dataset(f, 'r')
+ if attribute in ds.ncattrs():
+ if re.match(regexp, ds.getncattr(attribute)):
+ match_files.append(f)
+ ds.close()
+
+ return match_files
+
+
+def find_variable(files, variable):
+
+ match_files = []
+ for f in files:
+ #print("check file", f)
+ ds = Dataset(f, 'r')
+ if variable in ds.variables:
+ match_files.append(f)
+ ds.close()
+
+ return match_files
+
+
+def find_variable_attribute(files, attribute, value):
+
+ match_files = []
+ for f in files:
+ #print("check file", f)
+ ds = Dataset(f, 'r')
+ nv = {attribute : value}
+ find = ds.get_variables_by_attributes(**nv)
+ if len(find) > 0:
+ match_files.append(f)
+ ds.close()
+
+ return match_files
+
+
+if __name__ == "__main__":
+ fns = []
+ if sys.argv[1] == '-v':
+ files = find_files_pattern(sys.argv[3])
+ fns = find_variable(files, variable=sys.argv[2])
+ elif sys.argv[1] == '-a':
+ files = find_files_pattern(sys.argv[4])
+ fns = find_variable_attribute(files, attribute=sys.argv[2], value=sys.argv[3])
+ else:
+ files = find_files_pattern(sys.argv[4])
+ fns = find_global(files, attribute=sys.argv[1], regexp=sys.argv[2])
+
+ for f in fns:
+ print(f)
\ No newline at end of file
diff --git a/ocean_dp/plotting/density_plot.py b/ocean_dp/plotting/density_plot.py
new file mode 100755
index 0000000..a798b02
--- /dev/null
+++ b/ocean_dp/plotting/density_plot.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import sys
+import os
+
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/file_name')
+
+import find_file_with
+
+path = "/Users/Tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"
+
+sots_files = find_file_with.find_files_pattern(os.path.join(path, "IMOS*FV00*.nc"))
\ No newline at end of file
diff --git a/ocean_dp/plotting/panda_maker.py b/ocean_dp/plotting/panda_maker.py
new file mode 100755
index 0000000..b21add7
--- /dev/null
+++ b/ocean_dp/plotting/panda_maker.py
@@ -0,0 +1,126 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+import pandas as pd
+
+import warnings
+import scipy.stats as st
+import statsmodels as sm
+import matplotlib
+
+# this function creates a pandas Datatable object, searching through all the
+# netcdf files in the directory given, containing all the variables specified
+
+# "/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"
+
+# ["dTemp/dtime","dTemp/dSample","QC","Nominal depth","Deployment"]
+
+# qc selection!!!
+
+def panda_maker(dir_spec,var_list,qc_lim=2):
+
+ # creates an empty array to store the names of the SOTS deployments
+ deployments = []
+
+ checked_files = []
+
+ processed_files = []
+
+ # loops through all the folders and files contained in the folder
+ for x in os.listdir(dir_spec):
+
+ # if the folder/file name contains 'Pulse' or 'SOFS' and doesn't contain '.', append it to deployments
+ if (('Pulse' in x) or ('SOFS' in x)) and ('.p' not in x):
+
+ deployments.append(x)
+
+
+
+ # create a dataframe to store extract information
+ total_df = pd.DataFrame(columns = var_list)
+
+ # add deployment code to the dataframe
+ total_df.insert(len(var_list),'Deployment code',[])
+
+ # loops through all files in the directory
+ for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+
+ for fname in files:
+
+ # append the filename to the list of checked files
+ checked_files.append(fname)
+
+ # for each netcdf file labelled as FV01 and containing a deployment in its name
+ if fname.endswith('.nc') and 'FV01' in fname and any(ele in fname for ele in deployments):
+
+ # print the filename
+ print(fname)
+
+ # open the file
+ nc = Dataset(os.path.join(root,fname), mode = 'r')
+
+ # check file contains all the specified variables and the time format is correct
+ if (all(ele in list(nc.variables) for ele in var_list)) & (nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC'):
+
+ # create a current dataframe for the netcdf file, to be appended to the overall dataframe
+ cur_df = pd.DataFrame(columns=var_list)
+
+ # create a qc vector for the netcdf file
+ cur_qc = np.zeros(nc.variables["TIME"].shape)
+
+ for cur_var in var_list:
+
+ if np.array(nc.variables[cur_var]).size == 1:
+
+ filling = np.ones(nc.variables["TIME"].shape) * np.array(nc.variables[cur_var])
+
+ else:
+
+ filling = np.array(nc.variables[cur_var][:])
+
+ if cur_var + '_quality_control' in list(nc.variables):
+
+ cur_qc = np.maximum(cur_qc,np.array(nc.variables[cur_var + '_quality_control']))
+
+
+ cur_df[cur_var] = filling
+
+ cur_df['Deployment code'] = [nc.deployment_code] * len(np.array(nc.variables['TIME']))
+
+ # append the current netcdf's dataframe to the sots_temp_ensemble
+ total_df = total_df.append(cur_df.iloc[np.where(cur_qc<=qc_lim)])
+
+ # append the filename to the list of processed files
+ processed_files.append(fname)
+
+
+ nc.close()
+
+
+ return total_df
+
\ No newline at end of file
diff --git a/ocean_dp/processing/add_density.py b/ocean_dp/processing/add_density.py
index 6e56a53..9320ddf 100755
--- a/ocean_dp/processing/add_density.py
+++ b/ocean_dp/processing/add_density.py
@@ -57,8 +57,10 @@ def add_density(netCDFfile):
# calculates density
density = gsw.rho(SA, CT, p)
-
+ # generates a new variable 'DENSITY' in the netcdf
ncVarOut = ds.createVariable("DENSITY", "f4", ("TIME",), fill_value=np.nan, zlib=True) # fill_value=nan otherwise defaults to max
+
+ # assigns the calculated densities to the DENSITY variable, sets the units as kg/m^3, and comments on the variable's origin
ncVarOut[:] = density
ncVarOut.units = "kg/m^3"
ncVarOut.comment = "calculated using gsw-python https://teos-10.github.io/GSW-Python/index.html"
From 3d32c27082ae4fa98c06d7d7427a88aab4eb19b2 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 28 May 2020 16:19:49 +1000
Subject: [PATCH 50/59] Update panda_maker.py
---
ocean_dp/plotting/panda_maker.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ocean_dp/plotting/panda_maker.py b/ocean_dp/plotting/panda_maker.py
index b21add7..5c3aa70 100755
--- a/ocean_dp/plotting/panda_maker.py
+++ b/ocean_dp/plotting/panda_maker.py
@@ -121,6 +121,8 @@ def panda_maker(dir_spec,var_list,qc_lim=2):
nc.close()
+ total_df = total_df.reset_index()
+
return total_df
\ No newline at end of file
From 11b897320e45f92d7a6b1638a931254b0864d879 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 28 May 2020 16:22:22 +1000
Subject: [PATCH 51/59] Update panda_maker.py
---
ocean_dp/plotting/panda_maker.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ocean_dp/plotting/panda_maker.py b/ocean_dp/plotting/panda_maker.py
index 5c3aa70..2fc9bfb 100755
--- a/ocean_dp/plotting/panda_maker.py
+++ b/ocean_dp/plotting/panda_maker.py
@@ -121,7 +121,7 @@ def panda_maker(dir_spec,var_list,qc_lim=2):
nc.close()
- total_df = total_df.reset_index()
+ total_df = total_df.reset_index(drop=True)
return total_df
From d75303b06ab44354b656b30b1f760f6574092c27 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Fri, 29 May 2020 13:50:12 +1000
Subject: [PATCH 52/59] Create panda_merger.py
---
ocean_dp/processing/panda_merger.py | 73 +++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)
create mode 100755 ocean_dp/processing/panda_merger.py
diff --git a/ocean_dp/processing/panda_merger.py b/ocean_dp/processing/panda_merger.py
new file mode 100755
index 0000000..1425f5c
--- /dev/null
+++ b/ocean_dp/processing/panda_merger.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+from datetime import datetime as dt
+from datetime import timedelta
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+import pandas as pd
+
+nc1 = Dataset('IMOS_ABOS-SOTS_COPST_20180801_SOFS_FV00_SOFS-7.5-2018-SBE37SMP-ODO-RS232-03715971-200m_END-20190324_C-20200401.nc',mode='r')
+
+nc2 = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4048-45m_END-20190331_C-20200401.nc',mode='r')
+
+df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP':np.array(nc1.variables['TEMP'][:])})
+
+df1_time = np.array(nc1.variables['TIME'][:])
+
+df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP':np.array(nc2.variables['TEMP'][:])})
+
+# convert datenums to datetimes
+
+pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
+
+df1['TIME']=pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
+
+df1a=df1.set_index('TIME')
+
+# resample hourly
+
+df1h=df1a.resample('H',base=0.5).mean()
+
+df1h.index = df1h.index + pd.Timedelta('30 min')
+
+
+
+# merge
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From 1c07d8cc57efd37a82324a47ed9ea9081fbc1a85 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Fri, 29 May 2020 14:27:05 +1000
Subject: [PATCH 53/59] Update panda_merger.py
---
ocean_dp/processing/panda_merger.py | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/ocean_dp/processing/panda_merger.py b/ocean_dp/processing/panda_merger.py
index 1425f5c..6f69822 100755
--- a/ocean_dp/processing/panda_merger.py
+++ b/ocean_dp/processing/panda_merger.py
@@ -30,33 +30,40 @@
from sigfig import round
import pandas as pd
+# import two netcdf
nc1 = Dataset('IMOS_ABOS-SOTS_COPST_20180801_SOFS_FV00_SOFS-7.5-2018-SBE37SMP-ODO-RS232-03715971-200m_END-20190324_C-20200401.nc',mode='r')
nc2 = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4048-45m_END-20190331_C-20200401.nc',mode='r')
-df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP':np.array(nc1.variables['TEMP'][:])})
+# convert their time and temp data into dataframes
+df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP_200':np.array(nc1.variables['TEMP'][:])})
-df1_time = np.array(nc1.variables['TIME'][:])
+df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP_45':np.array(nc2.variables['TEMP'][:])})
-df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP':np.array(nc2.variables['TEMP'][:])})
+# convert the times from days since 01-01-1950 to a datetime object
+df1['TIME']=pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
-# convert datenums to datetimes
+df2['TIME']=pd.to_timedelta(df2['TIME'],unit='D')+dt(1950,1,1)
-pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
+# index the dataframes by time
+df1=df1.set_index('TIME')
-df1['TIME']=pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
+df2=df2.set_index('TIME')
-df1a=df1.set_index('TIME')
-# resample hourly
+# resample the data, calculating the mean over hourly periods, starting on the half hour
+df1=df1.resample('H',base=0.5).mean()
-df1h=df1a.resample('H',base=0.5).mean()
+df2=df2.resample('H',base=0.5).mean()
-df1h.index = df1h.index + pd.Timedelta('30 min')
+# reset the labels so they read the hour in the centre of the averaging period
+df1.index = df1.index + pd.Timedelta('30 min')
+df2.index = df2.index + pd.Timedelta('30 min')
-# merge
+# combine the two dataframes based on their time indicies, recording nan if one sensor doesn't have a reading for that timestamp
+total_df = pd.concat([df1,df2], join='outer', axis=1)
From 19a80341ed3d41e60d18b33255c1c47e758a2acc Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Fri, 29 May 2020 14:28:23 +1000
Subject: [PATCH 54/59] Update panda_merger.py
---
ocean_dp/processing/panda_merger.py | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/ocean_dp/processing/panda_merger.py b/ocean_dp/processing/panda_merger.py
index 6f69822..69de3f6 100755
--- a/ocean_dp/processing/panda_merger.py
+++ b/ocean_dp/processing/panda_merger.py
@@ -20,14 +20,7 @@
from datetime import datetime as dt
from datetime import timedelta
import numpy as np
-import argparse
-import glob
-import pytz
-import os
-import matplotlib.pyplot as plt
-from matplotlib import colors
-from matplotlib.ticker import PercentFormatter
-from sigfig import round
+
import pandas as pd
# import two netcdf
From fe99b4eebf283ba9faa2c1f3cab31be1d4724147 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Fri, 29 May 2020 15:46:23 +1000
Subject: [PATCH 55/59] Update panda_merger.py
---
ocean_dp/processing/panda_merger.py | 38 ++++++++++++++++++++++++++---
1 file changed, 34 insertions(+), 4 deletions(-)
diff --git a/ocean_dp/processing/panda_merger.py b/ocean_dp/processing/panda_merger.py
index 69de3f6..495b906 100755
--- a/ocean_dp/processing/panda_merger.py
+++ b/ocean_dp/processing/panda_merger.py
@@ -20,7 +20,7 @@
from datetime import datetime as dt
from datetime import timedelta
import numpy as np
-
+import glob
import pandas as pd
# import two netcdf
@@ -29,9 +29,9 @@
nc2 = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4048-45m_END-20190331_C-20200401.nc',mode='r')
# convert their time and temp data into dataframes
-df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP_200':np.array(nc1.variables['TEMP'][:])})
+df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP_'+str(nc1.variables['NOMINAL_DEPTH'][0]):np.array(nc1.variables['TEMP'][:])})
-df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP_45':np.array(nc2.variables['TEMP'][:])})
+df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP_'+str(nc2.variables['NOMINAL_DEPTH'][0]):np.array(nc2.variables['TEMP'][:])})
# convert the times from days since 01-01-1950 to a datetime object
df1['TIME']=pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
@@ -59,7 +59,37 @@
total_df = pd.concat([df1,df2], join='outer', axis=1)
-
+files = glob.glob('*FV00*.nc')
+
+var_name = 'TEMP'
+
+def panda_combine(files,var_name):
+
+ total_df = pd.DataFrame({'A' : []})
+
+ # make a sorting index for columns from nominal depths
+
+ for cur_file in files:
+
+ cur_nc = Dataset(cur_file,mode='r')
+
+ cur_df = pd.DataFrame({'TIME':np.array(cur_nc.variables['TIME'][:]),var_name+'_'+str(cur_nc.variables['NOMINAL_DEPTH'][0]):np.array(cur_nc.variables[var_name][:])})
+
+ cur_df['TIME']=pd.to_timedelta(cur_df['TIME'],unit='D')+dt(1950,1,1)
+
+ cur_df = cur_df.set_index('TIME')
+
+ cur_df = cur_df.resample('H',base=0.5).mean()
+
+ cur_df.index = cur_df.index + pd.Timedelta('30 min')
+
+ total_df = pd.concat([total_df,cur_df], join='outer', axis=1)
+
+ print(cur_file)
+
+ print(len(total_df))
+
+ return total_df
From afeb0670d6a2b8c809fb2c01f54fd96d90b14971 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Mon, 1 Jun 2020 15:28:35 +1000
Subject: [PATCH 56/59] beginning of netcdf to panda converter
---
ocean_dp/processing/netcdf_to_df.py | 94 +++++++++++++++++++++++++++++
ocean_dp/processing/panda_merger.py | 93 ++++++++++++++++++----------
2 files changed, 155 insertions(+), 32 deletions(-)
create mode 100755 ocean_dp/processing/netcdf_to_df.py
diff --git a/ocean_dp/processing/netcdf_to_df.py b/ocean_dp/processing/netcdf_to_df.py
new file mode 100755
index 0000000..dc1d6fa
--- /dev/null
+++ b/ocean_dp/processing/netcdf_to_df.py
@@ -0,0 +1,94 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from netCDF4 import Dataset, num2date
+import pandas as pd
+import numpy as np
+from datetime import datetime as dt
+
+
+# =============================================================================
+# Returns a list of the time series variables in a IMOS format
+# netcdf file. Takes an open netcdf as its argument.
+# =============================================================================
+def var_selector_inc_time(nc,qc=False,):
+
+ x = [x for x in list(nc.variables) if ('_quality_control_' not in x) & (nc.variables[x].shape!=())]
+
+ return x
+
+# =============================================================================
+#
+# =============================================================================
+def netcdf_to_df(target_file):
+
+ # open the inputted netcdf
+ nc = Dataset(target_file,mode='r')
+
+ # creates a the list of variables to transfer to the dataframe
+ vars_to_transfer = var_selector_inc_time(nc)
+
+ # creates the dataframe with column labels
+ df = pd.DataFrame(columns = vars_to_transfer)
+
+ # sorts the columns alphabetically, with the relevant qc variable following each timeseries variable
+ df.sort_index(axis=1, inplace=True)
+
+ # fill the dataframe from the netcdf, variable by variable
+ for cur_var in vars_to_transfer:
+
+ df[cur_var] = np.array(nc.variables[cur_var])
+
+ # convert time into a datetime object, this is optional, and not needed to continue in the IMOS format
+ #df['TIME']=pd.to_timedelta(df['TIME'],unit='D')+dt(1950,1,1)
+
+ # index the dataframe by time
+ df = df.set_index('TIME')
+
+ # extract the column names
+ col_names = list(df.columns)
+
+ # append the nominal depth to all column names
+ df.columns = [x.replace('quality_control',str(nc.variables['NOMINAL_DEPTH'][0])+'_quality_control') if 'quality_control' in x else x + '_' + str(nc.variables['NOMINAL_DEPTH'][0]) for x in col_names]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/processing/panda_merger.py b/ocean_dp/processing/panda_merger.py
index 495b906..13a789c 100755
--- a/ocean_dp/processing/panda_merger.py
+++ b/ocean_dp/processing/panda_merger.py
@@ -22,50 +22,34 @@
import numpy as np
import glob
import pandas as pd
+import re
-# import two netcdf
-nc1 = Dataset('IMOS_ABOS-SOTS_COPST_20180801_SOFS_FV00_SOFS-7.5-2018-SBE37SMP-ODO-RS232-03715971-200m_END-20190324_C-20200401.nc',mode='r')
-nc2 = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4048-45m_END-20190331_C-20200401.nc',mode='r')
-# convert their time and temp data into dataframes
-df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP_'+str(nc1.variables['NOMINAL_DEPTH'][0]):np.array(nc1.variables['TEMP'][:])})
-df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP_'+str(nc2.variables['NOMINAL_DEPTH'][0]):np.array(nc2.variables['TEMP'][:])})
-
-# convert the times from days since 01-01-1950 to a datetime object
-df1['TIME']=pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
-
-df2['TIME']=pd.to_timedelta(df2['TIME'],unit='D')+dt(1950,1,1)
-
-# index the dataframes by time
-df1=df1.set_index('TIME')
-
-df2=df2.set_index('TIME')
-
-
-# resample the data, calculating the mean over hourly periods, starting on the half hour
-df1=df1.resample('H',base=0.5).mean()
-
-df2=df2.resample('H',base=0.5).mean()
-
-# reset the labels so they read the hour in the centre of the averaging period
-df1.index = df1.index + pd.Timedelta('30 min')
-
-df2.index = df2.index + pd.Timedelta('30 min')
+files = glob.glob('*FV00*.nc')
+var_name = 'TEMP'
-# combine the two dataframes based on their time indicies, recording nan if one sensor doesn't have a reading for that timestamp
-total_df = pd.concat([df1,df2], join='outer', axis=1)
+def depth_from_file(file_in):
+
+ result = int(re.findall(r'(?<=-)\w+(?=m_END)', file_in)[0])
+
+ return result
+def var_selector(file):
+
+ [x for x in var_list if (x!='TIME') & ('_quality_control' not in x) & (nc.variables[x].shape!=())]
+
+ return x
-files = glob.glob('*FV00*.nc')
-var_name = 'TEMP'
def panda_combine(files,var_name):
- total_df = pd.DataFrame({'A' : []})
+ files.sort(key=depth_from_file)
+
+ total_df = pd.DataFrame({'dummy' : []})
# make a sorting index for columns from nominal depths
@@ -88,12 +72,57 @@ def panda_combine(files,var_name):
print(cur_file)
print(len(total_df))
+
+ total_df.drop(['dummy'],axis=1,inplace=True)
return total_df
+result = re.findall(r'(?<=-)\w+(?=m_END)', cur_file)
+
+
+
+
+
+
+# =============================================================================
+# Old proof of concept code
+# =============================================================================
+
+# # import two netcdf
+# nc1 = Dataset('IMOS_ABOS-SOTS_COPST_20180801_SOFS_FV00_SOFS-7.5-2018-SBE37SMP-ODO-RS232-03715971-200m_END-20190324_C-20200401.nc',mode='r')
+
+# nc2 = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4048-45m_END-20190331_C-20200401.nc',mode='r')
+
+# # convert their time and temp data into dataframes
+# df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP_'+str(nc1.variables['NOMINAL_DEPTH'][0]):np.array(nc1.variables['TEMP'][:])})
+
+# df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP_'+str(nc2.variables['NOMINAL_DEPTH'][0]):np.array(nc2.variables['TEMP'][:])})
+
+# # convert the times from days since 01-01-1950 to a datetime object
+# df1['TIME']=pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
+
+# df2['TIME']=pd.to_timedelta(df2['TIME'],unit='D')+dt(1950,1,1)
+
+# # index the dataframes by time
+# df1=df1.set_index('TIME')
+
+# df2=df2.set_index('TIME')
+
+
+# # resample the data, calculating the mean over hourly periods, starting on the half hour
+# df1=df1.resample('H',base=0.5).mean()
+
+# df2=df2.resample('H',base=0.5).mean()
+
+# # reset the labels so they read the hour in the centre of the averaging period
+# df1.index = df1.index + pd.Timedelta('30 min')
+
+# df2.index = df2.index + pd.Timedelta('30 min')
+# # combine the two dataframes based on their time indicies, recording nan if one sensor doesn't have a reading for that timestamp
+# total_df = pd.concat([df1,df2], join='outer', axis=1)
From b43195f3ff55166fec8847ce5980cfeb035011a8 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Tue, 2 Jun 2020 16:13:46 +1000
Subject: [PATCH 57/59] netcdf_to_df progress
---
ocean_dp/processing/netcdf_to_df.py | 87 +++++++++++++++++--
ocean_dp/qc/in_out_water.py | 127 +++++++++++++++++-----------
2 files changed, 158 insertions(+), 56 deletions(-)
diff --git a/ocean_dp/processing/netcdf_to_df.py b/ocean_dp/processing/netcdf_to_df.py
index dc1d6fa..e86b158 100755
--- a/ocean_dp/processing/netcdf_to_df.py
+++ b/ocean_dp/processing/netcdf_to_df.py
@@ -49,22 +49,93 @@ def netcdf_to_df(target_file):
# fill the dataframe from the netcdf, variable by variable
for cur_var in vars_to_transfer:
- df[cur_var] = np.array(nc.variables[cur_var])
-
- # convert time into a datetime object, this is optional, and not needed to continue in the IMOS format
- #df['TIME']=pd.to_timedelta(df['TIME'],unit='D')+dt(1950,1,1)
+ df[cur_var] = np.array(nc.variables[cur_var])
- # index the dataframe by time
- df = df.set_index('TIME')
+ # store deployment times in attributes
+ df.attrs['time_deployment_start'] = nc.time_deployment_start
+ df.attrs['time_deployment_end'] = nc.time_deployment_end
# extract the column names
col_names = list(df.columns)
# append the nominal depth to all column names
- df.columns = [x.replace('quality_control',str(nc.variables['NOMINAL_DEPTH'][0])+'_quality_control') if 'quality_control' in x else x + '_' + str(nc.variables['NOMINAL_DEPTH'][0]) for x in col_names]
-
+ df.columns = [x.replace('quality_control',str(nc.variables['NOMINAL_DEPTH'][0])+'_quality_control') if 'quality_control' in x else x if 'TIME' in x else x + '_' + str(nc.variables['NOMINAL_DEPTH'][0]) for x in col_names]
+
+ nc.close()
+
+ return df
+
+
+# =============================================================================
+# Takes
+# =============================================================================
+
+def combine_df(target_dfs):
+
+ # for each of the dataframes in the list provided
+ for cur_df in target_dfs:
+
+ # make a copy of the current dataframe to modify and combine
+ df = cur_df.copy()
+
+ # convert the IMOS format times to datetime
+ df['TIME']=pd.to_timedelta(df['TIME'],unit='D')+dt(1950,1,1)
+
+ # index the dataframe by time - for some reason this makes the df very slow to visually open and navigate!?
+ df = df.set_index('TIME')
+
+ # extract and convert deployment times to datetime
+ start_time = dt.strptime(df.attrs['time_deployment_start'],'%Y-%m-%dT%H:%M:%SZ')
+ end_time = dt.strptime(df.attrs['time_deployment_end'],'%Y-%m-%dT%H:%M:%SZ')
+
+ # trim the df to only include in water data
+ df = df.drop(df[(df.index < start_time) | (df.index > end_time)].index)
+
+ # resamples using the max method, to create a df of the correct dimensions to fill
+ df_to_fill = df.resample('H',base=0.5).max()
+
+
+ # gets list of column names
+ col_names = list(df.columns)
+ # makes a list of non qc column names
+ col_names_no_qc = [x for x in col_names if 'quality_control' not in x]
+ # for each of the time series data columns
+ for cur_col in col_names_no_qc:
+
+ # sets the value of non qc data to nan if the corresponding qc value is not satisfactory (0,1,2,7 at the moment)
+ df.loc[(df[cur_col+'_quality_control'] > 2) & (df[cur_col+'_quality_control'] != 7), cur_col] = np.nan
+
+ # extracts the time series data
+ dS = pd.Series(df[cur_col])
+
+ # makes a copy for bin counting
+ dS_1s = dS.copy()
+
+ dS_1s[:] = 1
+
+ # resamples the series, interpoling linearly
+ dS_resampled = dS.resample('H',base=0.5).interpolate()
+
+ # count how many data points are in each shoulder bin
+ dS_bin_counts = dS_1s.resample('H',base=0.5).sum()
+
+ # fill the interpolated data back into the dataframe
+ df_to_fill[cur_col] = dS_resampled
+
+ # give any interpolated point without any data within its hour window a qc code of 7
+ df_to_fill.loc[dS_bin_counts==0,[cur_col+'_quality_control']] = 7
+
+ # shift the timestamps to the middle of the hour sampling period
+ df_to_fill.index = df_to_fill.index + pd.Timedelta('30 min')
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/in_out_water.py b/ocean_dp/qc/in_out_water.py
index e8635ed..39da3a7 100644
--- a/ocean_dp/qc/in_out_water.py
+++ b/ocean_dp/qc/in_out_water.py
@@ -18,73 +18,104 @@
from netCDF4 import Dataset, num2date
import sys
-
+from datetime import datetime
import numpy as np
from dateutil import parser
import pytz
import os
-# flag out of water as QC value 7 (not_deployed), with wise leave as 0
+# flag out of water as QC value 6 (not_deployed), with wise leave as 0
def in_out_water(netCDFfile, var_name=None):
- ds = Dataset(netCDFfile, 'a')
-
- nc_vars = ds.variables
- to_add = []
- if var_name:
- to_add.append(var_name)
- else:
- for v in nc_vars:
- #print (vars[v].dimensions)
- if v != 'TIME':
- to_add.append(v)
-
- time_var = nc_vars["TIME"]
- time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
-
- time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
- time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
- print('deployment time', time_deploy)
+ out_file = []
+
+ for fn in netCDFfile:
+ ds = Dataset(fn, 'a')
+
+ nc_vars = ds.variables
+ to_add = []
+ if var_name:
+ to_add.append(var_name)
+ else:
+ for v in nc_vars:
+ if "TIME" in nc_vars[v].dimensions:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+ # remove any anx variables from the list
+ for v in nc_vars:
+ if 'ancillary_variables' in nc_vars[v].ncattrs():
+ remove = nc_vars[v].getncattr('ancillary_variables').split(' ')
+ print("remove ", remove)
+ for r in remove:
+ to_add.remove(r)
+
+ time_var = nc_vars["TIME"]
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
+ time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
+
+ print('file', fn)
+ print('deployment time', time_deploy)
+
+ print('var to add', to_add)
+
+ # create a mask for the time range
+ mask = (time <= time_deploy) | (time >= time_recovery)
+
+ for v in to_add:
+ print("var", v, ' dimensions ', nc_vars[v].dimensions)
+
+ ncVarOut = nc_vars[v + "_quality_control"]
+ ncVarOut[mask] = 6
+
+ # create a qc variable just for this test flags
+ if v + "_quality_control_io" in ds.variables:
+ ncVarOut = ds.variables[v + "_quality_control_io"]
+ ncVarOut[:] = 0
+ else:
+ ncVarOut = ds.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
- print(to_add)
+ ncVarOut[:] = 0
+ ncVarOut.long_name = "quality flag for " + nc_vars[v].long_name
+ try:
+ ncVarOut.standard_name = nc_vars[v].standard_name + " status_flag"
+ except AttributeError:
+ pass
- # create a mask for the time range
- mask = (time <= time_deploy) | (time >= time_recovery)
+ ncVarOut.quality_control_conventions = "IMOS standard flags"
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+ ncVarOut.comment = 'data flagged not deployed (6) when out of water'
- for v in to_add:
- if "TIME" in nc_vars[v].dimensions:
- if v.endswith("_quality_control"):
- print("QC time dim ", v)
+ ncVarOut[mask] = 6
+ # calculate the number of points marked as bad_data
+ marked = np.zeros_like(ncVarOut)
+ marked[mask] = 1
+ count = sum(marked)
- ncVarOut = nc_vars[v]
- ncVarOut[mask] = 7
- else:
- # create a qc variable just for this test flags
- if v + "_quality_control_io" in ds.variables:
- ncVarOut = ds.variables[v + "_quality_control_io"]
- else:
- ncVarOut = ds.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
- ncVarOut[:] = np.zeros(nc_vars[v].shape)
- ncVarOut.long_name = "quality flag for " + v
- ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
- ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+ ds.file_version = "Level 1 - Quality Controlled Data"
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
- nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
- ncVarOut[mask] = 7
-
- ds.variables[v + "_quality_control"][:] = np.maximum(ds.variables[v + "_quality_control_io"][:],ds.variables[v + "_quality_control"][:])
+ ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' : ' + ' marked ' + str(int(count)))
- ds.file_version = "Level 1 - Quality Controlled Data"
+ ds.close()
- ds.close()
+ out_file.append(fn)
- return netCDFfile
+ return out_file
if __name__ == "__main__":
if len(sys.argv) > 2 & sys.argv[1].startswith('-'):
- in_out_water(sys.argv[2], var_name=sys.argv[1][1:])
+ in_out_water(sys.argv[2:], var_name=sys.argv[1][1:])
else:
- in_out_water(sys.argv[1])
\ No newline at end of file
+ in_out_water(sys.argv[1:])
\ No newline at end of file
From fe0ab111fbe2b157127cfada96a2704db54dc9ff Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 11 Jun 2020 16:22:27 +1000
Subject: [PATCH 58/59] Various updates after convo with TT, not complete
---
ocean_dp/processing/netcdf_to_df.py | 54 +++++++++-
ocean_dp/qc/in_out_water.py | 144 ++++++++++++-------------
ocean_dp/qc/qc_checker.py | 7 ++
ocean_dp/sots_processing_runthrough.py | 5 +
4 files changed, 134 insertions(+), 76 deletions(-)
diff --git a/ocean_dp/processing/netcdf_to_df.py b/ocean_dp/processing/netcdf_to_df.py
index e86b158..4a409a4 100755
--- a/ocean_dp/processing/netcdf_to_df.py
+++ b/ocean_dp/processing/netcdf_to_df.py
@@ -17,7 +17,8 @@
import pandas as pd
import numpy as np
from datetime import datetime as dt
-
+import glob
+import re
# =============================================================================
# Returns a list of the time series variables in a IMOS format
@@ -54,6 +55,7 @@ def netcdf_to_df(target_file):
# store deployment times in attributes
df.attrs['time_deployment_start'] = nc.time_deployment_start
df.attrs['time_deployment_end'] = nc.time_deployment_end
+ df.attrs['nominal_depth'] = nc.variables['NOMINAL_DEPTH'][0]
# extract the column names
col_names = list(df.columns)
@@ -72,6 +74,8 @@ def netcdf_to_df(target_file):
def combine_df(target_dfs):
+ total_df = pd.DataFrame({'dummy' : []})
+
# for each of the dataframes in the list provided
for cur_df in target_dfs:
@@ -91,8 +95,11 @@ def combine_df(target_dfs):
# trim the df to only include in water data
df = df.drop(df[(df.index < start_time) | (df.index > end_time)].index)
+ # remove data with bad qc instead of setting to nan later in process, let resample do the work?
+ # but what if psal is bad but temp is good?? Need to think on this.
+
# resamples using the max method, to create a df of the correct dimensions to fill
- df_to_fill = df.resample('H',base=0.5).max()
+ df_to_fill = df.resample('H',base=0.5).min()
# gets list of column names
@@ -105,7 +112,9 @@ def combine_df(target_dfs):
for cur_col in col_names_no_qc:
# sets the value of non qc data to nan if the corresponding qc value is not satisfactory (0,1,2,7 at the moment)
- df.loc[(df[cur_col+'_quality_control'] > 2) & (df[cur_col+'_quality_control'] != 7), cur_col] = np.nan
+ #df.loc[(df[cur_col+'_quality_control'] > 2) & (df[cur_col+'_quality_control'] != 7), cur_col] = np.nan
+ # CAUSING UNEXPECTED NANS - FIX
+ df.loc[df[cur_col+'_quality_control'].isin([3,4,6,9]) , cur_col] = np.nan
# extracts the time series data
dS = pd.Series(df[cur_col])
@@ -116,7 +125,8 @@ def combine_df(target_dfs):
dS_1s[:] = 1
# resamples the series, interpoling linearly
- dS_resampled = dS.resample('H',base=0.5).interpolate()
+ dS_resampled = dS.resample('H',base=0.5).interpolate(method='index',axis=0,limit=1000000)
+
# count how many data points are in each shoulder bin
dS_bin_counts = dS_1s.resample('H',base=0.5).sum()
@@ -130,8 +140,44 @@ def combine_df(target_dfs):
# shift the timestamps to the middle of the hour sampling period
df_to_fill.index = df_to_fill.index + pd.Timedelta('30 min')
+ total_df = pd.concat([total_df,df_to_fill], join='outer', axis=1)
+
+ print(cur_df)
+
+ print(len(total_df))
+
+ total_df.drop(['dummy'],axis=1,inplace=True)
+
+ return total_df
+
+# =============================================================================
+#
+# =============================================================================
+def depth_from_file(file_in):
+
+ result = int(re.findall(r'(?<=-)\w+(?=m_END)', file_in)[0])
+
+ return result
+
+# =============================================================================
+#
+# =============================================================================
+
+netcdfs = glob.glob('*FV01*.nc')
+
+netcdfs = sorted(netcdfs,key=depth_from_file)
+
+df_list = list()
+
+for cur_netcdf in netcdfs:
+
+ df = netcdf_to_df(cur_netcdf)
+
+ df_list.append(df)
+
+
diff --git a/ocean_dp/qc/in_out_water.py b/ocean_dp/qc/in_out_water.py
index 39da3a7..7ec3e4d 100644
--- a/ocean_dp/qc/in_out_water.py
+++ b/ocean_dp/qc/in_out_water.py
@@ -27,89 +27,89 @@
# flag out of water as QC value 6 (not_deployed), with wise leave as 0
-def in_out_water(netCDFfile, var_name=None):
+def in_out_water(fn, var_name=None):
out_file = []
- for fn in netCDFfile:
- ds = Dataset(fn, 'a')
- nc_vars = ds.variables
- to_add = []
- if var_name:
- to_add.append(var_name)
- else:
- for v in nc_vars:
- if "TIME" in nc_vars[v].dimensions:
- #print (vars[v].dimensions)
- if v != 'TIME':
- to_add.append(v)
- # remove any anx variables from the list
- for v in nc_vars:
- if 'ancillary_variables' in nc_vars[v].ncattrs():
- remove = nc_vars[v].getncattr('ancillary_variables').split(' ')
- print("remove ", remove)
- for r in remove:
- to_add.remove(r)
-
- time_var = nc_vars["TIME"]
- time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
-
- time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
- time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
-
- print('file', fn)
- print('deployment time', time_deploy)
-
- print('var to add', to_add)
-
- # create a mask for the time range
- mask = (time <= time_deploy) | (time >= time_recovery)
-
- for v in to_add:
- print("var", v, ' dimensions ', nc_vars[v].dimensions)
-
- ncVarOut = nc_vars[v + "_quality_control"]
- ncVarOut[mask] = 6
-
- # create a qc variable just for this test flags
- if v + "_quality_control_io" in ds.variables:
- ncVarOut = ds.variables[v + "_quality_control_io"]
- ncVarOut[:] = 0
- else:
- ncVarOut = ds.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
- nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+ ds = Dataset(fn, 'a')
+ nc_vars = ds.variables
+ to_add = []
+ if var_name:
+ to_add.append(var_name)
+ else:
+ for v in nc_vars:
+ if "TIME" in nc_vars[v].dimensions:
+ #print (vars[v].dimensions)
+ if v != 'TIME':
+ to_add.append(v)
+ # remove any anx variables from the list
+ for v in nc_vars:
+ if 'ancillary_variables' in nc_vars[v].ncattrs():
+ remove = nc_vars[v].getncattr('ancillary_variables').split(' ')
+ print("remove ", remove)
+ for r in remove:
+ to_add.remove(r)
+
+ time_var = nc_vars["TIME"]
+ time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+
+ time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
+ time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
+
+ print('file', fn)
+ print('deployment time', time_deploy)
+
+ print('var to add', to_add)
+
+ # create a mask for the time range
+ mask = (time <= time_deploy) | (time >= time_recovery)
+
+ for v in to_add:
+ print("var", v, ' dimensions ', nc_vars[v].dimensions)
+
+ ncVarOut = nc_vars[v + "_quality_control"]
+ ncVarOut[mask] = 6
+
+ # create a qc variable just for this test flags
+ if v + "_quality_control_io" in ds.variables:
+ ncVarOut = ds.variables[v + "_quality_control_io"]
ncVarOut[:] = 0
- ncVarOut.long_name = "quality flag for " + nc_vars[v].long_name
- try:
- ncVarOut.standard_name = nc_vars[v].standard_name + " status_flag"
- except AttributeError:
- pass
-
- ncVarOut.quality_control_conventions = "IMOS standard flags"
- ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
- ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
- ncVarOut.comment = 'data flagged not deployed (6) when out of water'
-
- ncVarOut[mask] = 6
- # calculate the number of points marked as bad_data
- marked = np.zeros_like(ncVarOut)
- marked[mask] = 1
- count = sum(marked)
-
- ds.file_version = "Level 1 - Quality Controlled Data"
- # update the history attribute
+ else:
+ ncVarOut = ds.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True) # fill_value=0 otherwise defaults to max
+ nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+
+ ncVarOut[:] = 0
+ ncVarOut.long_name = "quality flag for " + nc_vars[v].long_name
try:
- hist = ds.history + "\n"
+ ncVarOut.standard_name = nc_vars[v].standard_name + " status_flag"
except AttributeError:
- hist = ""
+ pass
+
+ ncVarOut.quality_control_conventions = "IMOS standard flags"
+ ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+ ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+ ncVarOut.comment = 'data flagged not deployed (6) when out of water'
+
+ ncVarOut[mask] = 6
+ # calculate the number of points marked as bad_data
+ marked = np.zeros_like(ncVarOut)
+ marked[mask] = 1
+ count = sum(marked)
+
+ ds.file_version = "Level 1 - Quality Controlled Data"
+ # update the history attribute
+ try:
+ hist = ds.history + "\n"
+ except AttributeError:
+ hist = ""
- ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' : ' + ' marked ' + str(int(count)))
+ ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' : ' + ' marked ' + str(int(count)))
- ds.close()
+ ds.close()
- out_file.append(fn)
+ out_file.append(fn)
return out_file
diff --git a/ocean_dp/qc/qc_checker.py b/ocean_dp/qc/qc_checker.py
index 90e83e8..6fd7881 100755
--- a/ocean_dp/qc/qc_checker.py
+++ b/ocean_dp/qc/qc_checker.py
@@ -106,6 +106,13 @@ def qc_checker(nc,target_vars_in=[]):
# The qc process has succeeded
qc_behaving = True
+
+ # sets all data with a qc value of 0 to have a qc value of 1, having passed all the tests
+ nc.variables[current_var+"_quality_control"][qc_global_data==0] = 1
+
+ now=datetime.utcnow()
+
+ nc.history += ' ' + now.strftime("%Y%m%d:") + 'passed qc_checker, all qc=0 set to qc=1'
# Returns true if qc has succeeded, false if not
return qc_behaving
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
index c389d27..f9aee09 100755
--- a/ocean_dp/sots_processing_runthrough.py
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -62,6 +62,11 @@
# Run pressure_interpolator.py
fv01_pres_interp_files = pressure_interpolator.pressure_interpolator(netCDFfiles=fv01_files,agg=glob.glob('*IMOS_ABOS-SOTS*Aggregate*.nc')[0])
+# delete the defunct FV01 files
+for ncfile in fv01_files:
+
+ os.remove(ncfile)
+
# Global range test
for ncfile in fv01_pres_interp_files:
From 0ed9243197a720947016359c99d967f0a9b88d13 Mon Sep 17 00:00:00 2001
From: bweeding <57697604+bweeding@users.noreply.github.com>
Date: Thu, 25 Jun 2020 14:24:05 +1000
Subject: [PATCH 59/59] Create add_mld.py
---
ocean_dp/processing/add_mld.py | 101 +++++++++++++++++++++++++++++++++
1 file changed, 101 insertions(+)
create mode 100755 ocean_dp/processing/add_mld.py
diff --git a/ocean_dp/processing/add_mld.py b/ocean_dp/processing/add_mld.py
new file mode 100755
index 0000000..112ce35
--- /dev/null
+++ b/ocean_dp/processing/add_mld.py
@@ -0,0 +1,101 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from netCDF4 import Dataset, num2date
+import sys
+from datetime import datetime as dt
+import numpy as np
+import pandas as pd
+from scipy import interpolate
+
+def add_mld(nc_in,thresh_in=0.2):
+
+ # opens the supplied IMOS netcdf
+ nc = Dataset(nc_in,'a')
+
+ temp_na = np.array(nc.variables['TEMP'])
+
+ # create two nan filled arrays the length of the FV02 file, one for the mld and one for its uncertainty
+ nc_mld = np.full([1,temp_na.shape[1]], np.nan)[0]
+
+ nc_mld_uncert = np.full([1,temp_na.shape[1]], np.nan)[0]
+
+ # temp sensor depths
+ nc_temp_depths = np.array(nc.variables['DEPTH_TEMP'])
+
+ temp_na = temp_na[nc_temp_depths>5,:]
+
+ nc_temp_depths = nc_temp_depths[nc_temp_depths>5]
+
+ # boolean of sensors at the shallowest depth
+ shallowest_sensors = nc_temp_depths == np.min(nc_temp_depths)
+
+ # for each temperature profile where there is at least one non NaN value in the shallowest sensors
+ for i in np.where(~np.all(np.isnan(temp_na[shallowest_sensors]),axis=0))[0]:
+
+ # check there is at least one non NaN value in the deeper sensors
+ if np.any(~np.isnan(temp_na[~shallowest_sensors,i])):
+
+ # calculates the mean temperature of the available shallowest sensors to use as a reference to calculate MLD
+ shallow_temp = np.nanmean(temp_na[shallowest_sensors,i])
+
+ # extract temperature and depth data using a mean for the shallowest depth, and all non NaN data below
+ profile_temps = np.append(shallow_temp,temp_na[~shallowest_sensors,i][~np.isnan(temp_na[~shallowest_sensors,i])])
+
+ profile_depths = np.append(nc_temp_depths[0],nc_temp_depths[~shallowest_sensors][~np.isnan(temp_na[~shallowest_sensors,i])])
+
+ # check if the current profile contains any temperatures outside the specified threshold values
+ if np.any(temp_na[~shallowest_sensors,i]>=shallow_temp+thresh_in) or np.any(temp_na[~shallowest_sensors,i]<=shallow_temp-thresh_in):
+
+ # generate a linear interpolator for the profile, which returns nan if extrapolation is attempted
+ profile_interp_func = interpolate.interp1d(profile_temps,profile_depths,bounds_error=False,fill_value=np.nan)
+
+ # finds the shallowest depth at which the linear interpolation of the profile meets a threshold limit
+ nc_mld[i] = np.nanmin(profile_interp_func([shallow_temp+thresh_in,shallow_temp-thresh_in]))
+
+ # provides an estimate of uncertainty, by giving the distance to the furthest sensor used to interpolate the MLD
+ nc_mld_uncert[i] = np.max([np.abs(nc_mld[i]-[x for x in profile_depths if x < nc_mld[i]][-1]),np.abs(nc_mld[i]-next(x for x in profile_depths if x > nc_mld[i]))])
+
+
+ # if none of the sensors are outside the threshold
+ else:
+
+ # set the mld to the depth of the deepest non NaN sensor
+ nc_mld[i] = np.max(profile_depths)
+
+ # set the uncertainty to the distance between the sensor and the bottom
+ nc_mld_uncert[i] = 4600 - nc_mld[i]
+
+ # create the two variables
+ mld_var_out = nc.createVariable('MLDx', "f4", ("TIME",), fill_value=np.nan, zlib=True)
+ mld_var_out[:] = nc_mld
+ mld_var_out.units = 'm'
+ mld_var_out.comment = 'Calculated using the linear interpolation MLD algorithm found at: INSERT GITHUB ADDRESS'
+
+ mld_uncert_var_out = nc.createVariable('MLDx_standard_error', "f4", ("TIME",), fill_value=np.nan, zlib=True)
+ mld_uncert_var_out[:] = nc_mld_uncert
+ mld_uncert_var_out.units = 'm'
+
+ nc.close()
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file