diff --git a/matlab/add_interp_press.m b/matlab/add_interp_press.m
new file mode 100644
index 0000000..5ba9817
--- /dev/null
+++ b/matlab/add_interp_press.m
@@ -0,0 +1,275 @@
+% SOTS Pressure interpolator
+
+% This code imports pressure data from an aggregated file (constructed by
+% P.Jansen), and creates interpolated pressure records for FV00 raw
+% instrument files - firstly by interpolating along time series of pressure
+% readings in the aggregate file to find pressures at each time in a
+% particular FV00 file, and secondly by interpolating down nominal depths at
+% each timestamp to find a pressure value for each FV00 timestamp.
+
+% Ben Weeding - ben.weeding.26@gmail.com
+
+%% Load the filenames
+
+fv00_files = dir('*FV00*.nc');
+agg_files = dir('*Aggregate*.nc');
+
+%% Load the pressure data
+
+agg_pres = ncread(agg_files.name,'PRES');
+agg_pres_info = ncinfo(agg_files.name, 'PRES');
+agg_instrument_index = ncread(agg_files.name,'instrument_index');
+agg_nominal_depth  = ncread(agg_files.name,'NOMINAL_DEPTH');
+agg_time = ncread(agg_files.name,'TIME');
+
+% Here we prevent the use of bad data from Pulse 8
+
+if strfind(fv00_files(1).name,'Pulse-8')
+             
+    agg_pres(agg_instrument_index==2 & agg_time+datenum(1950,1,1,0,0,0) >= datenum('30-01-2012 05:00','dd-mm-yyyy HH:MM'))=NaN;
+   
+    %agg_pres(agg_instrument_index==2)=NaN;
+    
+end
+
+
+%% Interpolate the pressure and write the data into the FV00 file
+
+% Loop through each of the fv00 files
+for i=1:length(fv00_files)
+    
+    disp(fv00_files(i).name)
+    
+    % Extract the content from the FV00 file
+    fv00_contents = ncinfo(fv00_files(i).name);
+    
+    % Check if the FV00 file contains pressure data, run the interpolation
+    % code if not
+    
+    if (sum(contains({fv00_contents.Variables(:).Name}, 'PRES')) == 0)
+        
+        % Load the FV00 data requiring pressure        
+        %'days since 1950-01-01 00:00:00 UTC' for minilog T        
+        fv00_time = ncread(fv00_files(i).name,'TIME');        
+        fv00_depth = ncread(fv00_files(i).name,'NOMINAL_DEPTH');
+  
+        % Interpolate the agg pressure records at each nominal depth to
+        % provide pressure values at each timestamp in the current FV00
+        % file
+        
+        interp_agg_pres = nan(length(agg_nominal_depth)+1,length(fv00_time));
+        
+        % Include a row of zeros to set surface depth as 0 dbar
+        
+        interp_agg_pres(1,:) = zeros(size(fv00_time));
+        
+        agg_nominal_depth_with_0 = [0; agg_nominal_depth];
+        
+        % Loop through each nominal depth in the aggregate file, and get pressure for the FV00 file's time       
+        for j = 1:(length(agg_nominal_depth))
+            
+            % Select the relevant time and pressures
+            
+            time_selection = agg_time(agg_instrument_index == (j-1));            
+            pres_selection = agg_pres(agg_instrument_index == (j-1));
+                        
+            % Interpolate along each nominal depth
+
+            interp_agg_pres(j+1,:) = interp1(time_selection,pres_selection,fv00_time);
+        end   
+        
+        % Sort the nominal depths and pressures
+        
+        [agg_nominal_depth_with_0,sort_idx] = sort(agg_nominal_depth_with_0);
+        
+        interp_agg_pres = interp_agg_pres(sort_idx,:);
+        
+        
+        % Linearly interpolate at each timestamp to replace NaN values
+        
+        interp_agg_pres = fillmissing(interp_agg_pres,'linear','SamplePoints',agg_nominal_depth_with_0);
+        
+        
+        % At each timestamp in the FV00 record, interpolate a pressure
+        % value based on the FV00 nominal depth, and the interpolated
+        % pressures in interp_agg_pres. 
+        pres_interp_dummy = nan(size(fv00_time));     
+        
+       
+        for l = 1:length(fv00_time) 
+            
+            if sum(~isnan(interp_agg_pres(:,l))) > 1
+            
+                pres_interp_dummy(l) = interp1(agg_nominal_depth_with_0,interp_agg_pres(:,l),fv00_depth);                
+        
+            end
+            
+        end        
+        
+        pres_interp = pres_interp_dummy;
+        
+        % Create an FV01 version of the current FV00 file
+        
+        % Create the new FV01 file name
+        
+        fv01_name = strrep(fv00_files(i).name,'FV00','FV01');        
+        fv01_name(end-10:end-3)=datestr(now,'yyyymmdd');
+        
+        % Write the FV00 data into the FV01 file        
+        ncwriteschema(fv01_name, fv00_contents);
+        
+        % copy variable data to new file
+        for v = fv00_contents.Variables
+            ncwrite(fv01_name, v.Name, ncread(fv00_files(i).name, v.Name));
+        end
+        
+        % Modify the global attributes of the file to record processing,
+        % and add to the file history
+        
+        ncwriteatt(fv01_name,'/','file_version','Level 1 - partially processed');
+        hist = ncreadatt(fv00_files(i).name, '/', 'history');
+        ncwriteatt(fv01_name,'/','history',[hist newline datestr(now,'yyyy-mm-dd') ' : Added interpolated pressure from ' agg_files.name]);
+        
+        % Add and populate a PRES variable to the FV01 file
+        nccreate(fv01_name, 'PRES', 'Dimensions',{'TIME',size(pres_interp,1)}, 'FillValue',NaN);
+        ncwrite(fv01_name, 'PRES', pres_interp);
+                
+        % Add quality control variables to the FV01 file, assigning 8 to
+        % interpolated data in line with Argo
+        for v = fv00_contents.Variables
+            
+            if ~isempty(v.Dimensions)
+            
+                nccreate(fv01_name, v.Name + "_quality_control",'Dimensions',{v.Dimensions.Name,v.Dimensions.Length},'FillValue',99);
+                
+                ncwriteatt(fv01_name,v.Name + "_quality_control",'long_name',"quality_code for"+v.Name);
+                
+                ncwriteatt(fv01_name,v.Name,'ancillary_variables',v.Name + "_quality_control");
+                
+                if contains(v.Name,'PRES')
+                    
+                    ncwrite(fv01_name, v.Name + "_quality_control",8*ones(size(fv00_time)));
+                    
+                end
+                
+            end
+            
+        end
+        
+            
+            
+        % copy attributes from agg file to output file
+        pres_atts = agg_pres_info.Attributes; % get all attribtes from the aggregate file
+        for k=1:length(pres_atts)
+            if (strcmp(pres_atts(k).Name, '_FillValue') == 0)
+                ncwriteatt(fv01_name, 'PRES', pres_atts(k).Name, pres_atts(k).Value);
+            end
+        end
+        
+        % Add the relevant attributes to the PRES variable, including a
+        % comment noting that the data has been linearly interpolated        
+        ncwriteatt(fv01_name, 'PRES', 'comment','pressure data has been interpolated from surrounding pressure sensors');
+        
+    else
+        
+        % Load the FV00 data containing pressure        
+        %'days since 1950-01-01 00:00:00 UTC' for minilog T        
+        fv00_time = ncread(fv00_files(i).name,'TIME');        
+        fv00_depth = ncread(fv00_files(i).name,'NOMINAL_DEPTH');
+        fv00_pres = ncread(fv00_files(i).name,'PRES');
+        
+        % Remove bad data in pulse 8
+        
+        if strfind(fv00_files(i).name,'Pulse-8-2011-SBE16plusV2-01606330-34m')
+            
+            fv00_pres(4442:end) = NaN;
+            
+        end
+  
+        % Interpolate the agg pressure records at each nominal depth to
+        % provide pressure values at each timestamp in the current FV00
+        % file
+        
+        interp_agg_pres = nan(length(agg_nominal_depth)+1,length(fv00_time));
+        
+        % Include a row of zeros to set surface depth as 0 dbar
+        
+        interp_agg_pres(1,:) = zeros(size(fv00_time));
+        
+        agg_nominal_depth_with_0 = [0; agg_nominal_depth];
+        
+        % Loop through each nominal depth in the aggregate file, and get pressure for the FV00 file's time       
+        for j = 1:(length(agg_nominal_depth))
+            
+            % Select the relevant time and pressures
+            
+            time_selection = agg_time(agg_instrument_index == (j-1));            
+            pres_selection = agg_pres(agg_instrument_index == (j-1));
+                        
+            % Interpolate along each nominal depth
+
+            interp_agg_pres(j+1,:) = interp1(time_selection,pres_selection,fv00_time);
+        end   
+        
+        % Sort the nominal depths and pressures
+        
+        [agg_nominal_depth_with_0,sort_idx] = sort(agg_nominal_depth_with_0);
+        
+        interp_agg_pres = interp_agg_pres(sort_idx,:);
+        
+        
+        % Linearly interpolate at each timestamp to replace NaN values
+        
+        interp_agg_pres = fillmissing(interp_agg_pres,'linear','SamplePoints',agg_nominal_depth_with_0);
+        
+        for j = 1:length(fv00_pres)
+            
+            if isnan(fv00_pres(j))
+                
+                fv00_pres(j) = interp_agg_pres(agg_nominal_depth_with_0==fv00_depth,j);
+                
+            end
+            
+        end
+        
+        % Create an FV01 version of the current FV00 file
+        
+        % Create the new FV01 file name
+        
+        fv01_name = strrep(fv00_files(i).name,'FV00','FV01');        
+        fv01_name(end-10:end-3)=datestr(now,'yyyymmdd');
+        
+        % Write the FV00 data into the FV01 file        
+        ncwriteschema(fv01_name, fv00_contents);
+        
+        % copy variable data to new file
+        for v = fv00_contents.Variables
+            ncwrite(fv01_name, v.Name, ncread(fv00_files(i).name, v.Name));
+        end
+        
+        % Modify the global attributes of the file to record processing,
+        % and add to the file history
+        
+        ncwriteatt(fv01_name,'/','file_version','Level 1 - partially processed');
+        hist = ncreadatt(fv00_files(i).name, '/', 'history');
+        ncwriteatt(fv01_name,'/','history',[hist newline datestr(now,'yyyy-mm-dd') ' : Filled missing pressure with interpolated pressure from ' agg_files.name]);
+        
+        % Add and populate a PRES variable to the FV01 file
+        %nccreate(fv01_name, 'PRES', 'Dimensions',{'TIME',size(fv00_pres,1)}, 'FillValue',NaN);
+        ncwrite(fv01_name, 'PRES', fv00_pres);
+                
+        % copy attributes from agg file to output file
+        pres_atts = agg_pres_info.Attributes; % get all attribtes from the aggregate file
+        for k=1:length(pres_atts)
+            if (strcmp(pres_atts(k).Name, '_FillValue') == 0)
+                ncwriteatt(fv01_name, 'PRES', pres_atts(k).Name, pres_atts(k).Value);
+            end
+        end
+        
+        % Add the relevant attributes to the PRES variable, including a
+        % comment noting that the data has been linearly interpolated        
+        ncwriteatt(fv01_name, 'PRES', 'comment','originally missing pressure data has been interpolated from surrounding pressure sensors');
+        
+        
+    end    
+end
\ No newline at end of file
diff --git a/ocean_dp/aggregation/copyDataset.py b/ocean_dp/aggregation/copyDataset.py
index 0079d13..54eb960 100644
--- a/ocean_dp/aggregation/copyDataset.py
+++ b/ocean_dp/aggregation/copyDataset.py
@@ -15,14 +15,13 @@
 
 # similar more general tool project https://ncagg.readthedocs.io/en/latest/ (does not work on python3 2019-10-01)
 # has configurable way of dealing with attributes
-
 # file sets to test against
 # http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/NRS/NRSKAI/Temperature/catalog.html
 # http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/NRS/NRSKAI/Biogeochem_profiles/catalog.html
 # http://thredds.aodn.org.au/thredds/catalog/IMOS/ABOS/DA/EAC2000/catalog.html
 
 from dateutil.parser import parse
-
+ 
 
 def aggregate(files, varNames):
     # split this into   createCatalog - copy needed information into structure
@@ -39,7 +38,10 @@ def aggregate(files, varNames):
     # look over all files, create a time array from all files
     # TODO: maybe delete files here without variables we're not interested in
     # TODO: Create set of variables in all files
-
+    if not isinstance(varNames, list):
+        
+        varNames = [varNames]
+    
     filen = 0
     for path_file in files:
 
@@ -244,8 +246,8 @@ def aggregate(files, varNames):
 
     filen = 0
 
-    # variables we want regardless
-    varNames += ['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH']
+    # variables we want regardless 
+    varNames.extend(['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH'])
 
     # remove any duplicates
     varNamesOut = set(varNames)
@@ -344,7 +346,7 @@ def aggregate(files, varNames):
                         dMin = maVariableAll.max(0)
                         ncOut.setncattr("geospatial_vertical_max", dMax)
                         ncOut.setncattr("geospatial_vertical_min", dMin)
-
+                        
     dsIn.close()  # we're done with the varList now
 
     ncOut.close()
@@ -352,56 +354,56 @@ def aggregate(files, varNames):
     return outputName
 
 
-def collect_vars_to_agg(files):
+# def collect_vars_to_agg(files):
 
-    var_list = []
+#     var_list = []
 
-    nc = Dataset(files[0])
-    varList = nc.variables
+#     nc = Dataset(files[0])
+#     varList = nc.variables
 
-    # default to all variables in first file should no variable be specified
-    var_list.extend(varList.keys())
-    var_list.remove("TIME")
+#     # default to all variables in first file should no variable be specified
+#     var_list.extend(varList.keys())
+#     var_list.remove("TIME")
 
-    nc.close()
+#     nc.close()
 
-    print("collect_vars_to_agg::", var_list)
+#     print("collect_vars_to_agg::", var_list)
 
-    return var_list
+#     return var_list
 
 
-if __name__ == "__main__":
+# if __name__ == "__main__":
 
-    files = []
-    varToAgg = None # defaults to all in first file
+#     files = []
+#     varToAgg = None # defaults to all in first file
 
-    if len(sys.argv) > 1:
-        parser = argparse.ArgumentParser()
-        parser.add_argument('-v', action='append', dest='var', help='variable to include in output file (defaults to all)')
-        parser.add_argument('-f', dest='filelist', help='read file names from file')
-        parser.add_argument('file', nargs='*', help='input file name')
-        args = parser.parse_args()
+#     if len(sys.argv) > 1:
+#         parser = argparse.ArgumentParser()
+#         parser.add_argument('-v', action='append', dest='var', help='variable to include in output file (defaults to all)')
+#         parser.add_argument('-f', dest='filelist', help='read file names from file')
+#         parser.add_argument('file', nargs='*', help='input file name')
+#         args = parser.parse_args()
 
-        if not isinstance(args.filelist, type(None)):
-            with open(args.filelist, "r") as ins:
-                for line in ins:
-                    print(line)
-                    files.append(line.strip())
+#         if not isinstance(args.filelist, type(None)):
+#             with open(args.filelist, "r") as ins:
+#                 for line in ins:
+#                     print(line)
+#                     files.append(line.strip())
 
-        if len(args.file):
-            # files = args.file
-            for fn in args.file:
-                files.extend(glob.glob(fn))
+#         if len(args.file):
+#             # files = args.file
+#             for fn in args.file:
+#                 files.extend(glob.glob(fn))
 
-        varToAgg = args.var
+#         varToAgg = args.var
 
-    if isinstance(varToAgg, type(None)):
-        varToAgg = collect_vars_to_agg(files)
+#     if isinstance(varToAgg, type(None)):
+#         varToAgg = collect_vars_to_agg(files)
 
-    print("Aggregating variables ", varToAgg)
+#     print("Aggregating variables ", varToAgg)
 
-    outputName = aggregate(files, varToAgg)
+#     outputName = aggregate(files, varToAgg)
 
-    print("Output file :  %s" % outputName)
+#     print("Output file :  %s" % outputName)
 
 
diff --git a/ocean_dp/file_name/find_file_with.py b/ocean_dp/file_name/find_file_with.py
new file mode 100755
index 0000000..59225b3
--- /dev/null
+++ b/ocean_dp/file_name/find_file_with.py
@@ -0,0 +1,90 @@
+#!/usr/bin/python3
+
+# raw2netCDF
+# Copyright (C) 2019 Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import glob
+
+import sys
+import re
+
+from netCDF4 import Dataset
+
+
+def find_files_pattern(file_pattern):
+    match_files = []
+    files = glob.glob(file_pattern)
+
+    match_files.extend(files)
+    return match_files
+
+def find_global(files, attribute, regexp):
+
+    match_files = []
+    #print("find", file_pattern, files)
+    for f in files:
+        #print("check file", f)
+        ds = Dataset(f, 'r')
+        if attribute in ds.ncattrs():
+            if re.match(regexp, ds.getncattr(attribute)):
+                match_files.append(f)
+        ds.close()
+
+    return match_files
+
+
+def find_variable(files, variable):
+
+    match_files = []
+    for f in files:
+        #print("check file", f)
+        ds = Dataset(f, 'r')
+        if variable in ds.variables:
+            match_files.append(f)
+        ds.close()
+
+    return match_files
+
+
+def find_variable_attribute(files, attribute, value):
+
+    match_files = []
+    for f in files:
+        #print("check file", f)
+        ds = Dataset(f, 'r')
+        nv = {attribute : value}
+        find = ds.get_variables_by_attributes(**nv)
+        if len(find) > 0:
+            match_files.append(f)
+        ds.close()
+
+    return match_files
+
+
+if __name__ == "__main__":
+    fns = []
+    if sys.argv[1] == '-v':
+        files = find_files_pattern(sys.argv[3])
+        fns = find_variable(files, variable=sys.argv[2])
+    elif sys.argv[1] == '-a':
+        files = find_files_pattern(sys.argv[4])
+        fns = find_variable_attribute(files, attribute=sys.argv[2], value=sys.argv[3])
+    else:
+        files = find_files_pattern(sys.argv[4])
+        fns = find_global(files, attribute=sys.argv[1], regexp=sys.argv[2])
+
+    for f in fns:
+        print(f)
\ No newline at end of file
diff --git a/ocean_dp/plotting/batch-qc.pdf b/ocean_dp/plotting/batch-qc.pdf
new file mode 100644
index 0000000..5e7a87b
Binary files /dev/null and b/ocean_dp/plotting/batch-qc.pdf differ
diff --git a/ocean_dp/plotting/density_plot.py b/ocean_dp/plotting/density_plot.py
new file mode 100755
index 0000000..a798b02
--- /dev/null
+++ b/ocean_dp/plotting/density_plot.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import os
+
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/file_name')
+
+import find_file_with
+
+path = "/Users/Tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"
+
+sots_files = find_file_with.find_files_pattern(os.path.join(path, "IMOS*FV00*.nc"))
\ No newline at end of file
diff --git a/ocean_dp/plotting/panda_maker.py b/ocean_dp/plotting/panda_maker.py
new file mode 100755
index 0000000..2fc9bfb
--- /dev/null
+++ b/ocean_dp/plotting/panda_maker.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+import pandas as pd
+
+import warnings
+import scipy.stats as st
+import statsmodels as sm
+import matplotlib
+
+# this function creates a pandas Datatable object, searching through all the 
+# netcdf files in the directory given, containing all the variables specified
+
+# "/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"
+
+# ["dTemp/dtime","dTemp/dSample","QC","Nominal depth","Deployment"]
+
+#  qc selection!!!
+
+def panda_maker(dir_spec,var_list,qc_lim=2):
+
+    # creates an empty array to store the names of the SOTS deployments
+    deployments = []
+    
+    checked_files = []
+    
+    processed_files = []
+    
+    # loops through all the folders and files contained in the folder
+    for x in os.listdir(dir_spec):
+        
+        # if the folder/file name contains 'Pulse' or 'SOFS' and doesn't contain '.', append it to deployments
+        if (('Pulse' in x) or ('SOFS' in x)) and ('.p' not in x):
+            
+            deployments.append(x)
+        
+        
+        
+    # create a dataframe to store extract information
+    total_df = pd.DataFrame(columns = var_list)
+    
+    # add deployment code to the dataframe
+    total_df.insert(len(var_list),'Deployment code',[])
+    
+    # loops through all files in the directory
+    for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+        
+        for fname in files:
+            
+            # append the filename to the list of checked files
+            checked_files.append(fname)
+            
+            # for each netcdf file labelled as FV01 and containing a deployment in its name
+            if fname.endswith('.nc') and 'FV01' in fname and any(ele in fname for ele in deployments):
+            
+                # print the filename
+                print(fname)  
+                
+                # open the file
+                nc = Dataset(os.path.join(root,fname), mode = 'r')
+                
+                # check file contains all the specified variables and the time format is correct
+                if (all(ele in list(nc.variables) for ele in var_list)) & (nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC'):
+                        
+                    # create a current dataframe for the netcdf file, to be appended to the overall dataframe
+                    cur_df = pd.DataFrame(columns=var_list)
+                    
+                    # create a qc vector for the netcdf file
+                    cur_qc = np.zeros(nc.variables["TIME"].shape)
+
+                    for cur_var in var_list:
+                        
+                        if np.array(nc.variables[cur_var]).size == 1:
+                            
+                            filling = np.ones(nc.variables["TIME"].shape) * np.array(nc.variables[cur_var])
+                            
+                        else:
+                            
+                            filling = np.array(nc.variables[cur_var][:])
+                            
+                        if cur_var + '_quality_control' in list(nc.variables):
+                            
+                            cur_qc = np.maximum(cur_qc,np.array(nc.variables[cur_var + '_quality_control']))        
+                            
+                            
+                        cur_df[cur_var] = filling
+                    
+                    cur_df['Deployment code'] = [nc.deployment_code] * len(np.array(nc.variables['TIME']))
+
+                    # append the current netcdf's dataframe to the sots_temp_ensemble
+                    total_df = total_df.append(cur_df.iloc[np.where(cur_qc<=qc_lim)])
+                    
+                    # append the filename to the list of processed files
+                    processed_files.append(fname)
+                        
+                        
+                nc.close()
+                
+    total_df = total_df.reset_index(drop=True)
+                
+                
+    return total_df
+                        
\ No newline at end of file
diff --git a/ocean_dp/plotting/plotNetCDFmultiqc.py b/ocean_dp/plotting/plotNetCDFmultiqc.py
new file mode 100755
index 0000000..9442a56
--- /dev/null
+++ b/ocean_dp/plotting/plotNetCDFmultiqc.py
@@ -0,0 +1,340 @@
+#!/usr/bin/python3
+
+# raw2netCDF
+# Copyright (C) 2019 Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from netCDF4 import Dataset
+from netCDF4 import num2date
+import datetime as dt
+import numpy as np
+import matplotlib
+
+matplotlib.use('Agg')
+
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+import sys
+import os
+from matplotlib import rc
+
+# rc('text', usetex=True)
+
+for path_file in sys.argv[1:len(sys.argv)]:
+
+    nc = Dataset(path_file)
+
+    # get time variable
+    nctime = nc.variables['TIME'][:]
+    t_unit = nc.variables['TIME'].units  # get unit  "days since 1950-01-01T00:00:00Z"
+
+    try:
+        t_cal = nc.variables['TIME'].calendar
+    except AttributeError:  # Attribute doesn't exist
+        t_cal = u"gregorian"  # or standard
+
+    dt_time = [num2date(t, units=t_unit, calendar=t_cal) for t in nctime]
+
+    # work out variables to plot
+    nc_vars_to_plot = [var for var in nc.variables]
+
+    # remove any dimensions from the list to plot
+    nc_dims = [dim for dim in nc.dimensions]  # list of nc dimensions
+
+    for i in nc_dims:
+        try:
+            nc_vars_to_plot.remove(i)
+        except ValueError:
+            print('did not remove ', i)
+
+    # remove an auxiliary variables from the list to plot
+    aux_vars = list()
+    for var in nc.variables:
+        try:
+            aux_vars.append(nc.variables[var].getncattr('ancillary_variables'))
+        except AttributeError:
+            pass
+
+    # remove any variables without a TIME dimension from the list to plot
+    to_plot = list()
+
+    for var in nc.variables:
+        # print var
+        if var in nc_dims:
+            continue
+        if var in aux_vars:
+            continue
+        if 'TIME' in nc.variables[var].dimensions:
+            print('to plot ', var)
+            to_plot.append(var)
+
+    # pdffile = path_file[path_file.rfind('/')+1:len(path_file)] + '-' + nc.getncattr('deployment_code') + '-plot.pdf'
+
+    pdffile = path_file + '.pdf'
+
+    pp = PdfPages(pdffile)
+
+    txt = ""
+    lines = 0
+    plt.figure(figsize=(11.69, 8.27))
+
+    txt = 'file name : ' + os.path.basename(path_file) + '\n\n'
+
+    txt += 'Dimensions:\n'
+    for x in nc.dimensions:
+        txt += '    ' + x + ' (' + str(nc.dimensions[x].size) + ')\n'
+
+    txt += '\nVariables:\n'
+    for x in nc.variables:
+        v_atts = nc.variables[x]
+        var_line = '    ' + x + ' ' + str(v_atts.dimensions)
+
+        try:
+            var_line += ' : long_name = ' + v_atts.long_name
+        except AttributeError:
+            pass
+        try:
+            var_line += ' (' + v_atts.units + ')'
+        except AttributeError:
+            pass
+        var_line += ' : type ' + str(v_atts.datatype)
+
+        print(var_line)
+
+        lines = txt.count('\n') + var_line.count('\n')
+        # print("lines ", lines)
+        if lines > 57:
+            #print(txt)
+            print('new page')
+            plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+            plt.axis('off')
+            pp.savefig()
+            plt.close()
+            plt.figure(figsize=(11.69, 8.27))
+
+            txt = ""
+
+            lines = 0
+
+        txt += var_line + '\n'
+
+    plt.figure(figsize=(11.69, 8.27))
+
+    plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+    plt.axis('off')
+    pp.savefig()
+    plt.close()
+
+    txt = ""
+    plt.figure(figsize=(11.69, 8.27))
+
+    lines = 0
+    # print "NetCDF Global Attributes:"
+    for nc_attr in sorted(nc.ncattrs(), key=lambda s: s.lower()):
+        #print('\t%s:' % nc_attr, repr(nc.getncattr(nc_attr)))
+        attrib_txt = nc_attr + ' : ' + str(nc.getncattr(nc_attr)).replace('\n', '\n   ') + '\n'
+        lines = txt.count('\n') + attrib_txt.count('\n')
+        # print("lines ", lines)
+        if lines > 57:
+            #print(txt)
+            print('new page')
+            plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+            plt.axis('off')
+            pp.savefig()
+            plt.close()
+            plt.figure(figsize=(11.69, 8.27))
+
+            txt = ""
+
+            lines = 0
+
+        txt += attrib_txt
+
+        lines += 1
+
+    #print(txt)
+    plt.text(-0.1, -0.1, txt, fontsize=8, family='monospace')
+    plt.axis('off')
+    pp.savefig()
+    plt.close()
+
+    # plot each variable in the to_plot list
+    for plot in to_plot:
+
+        plot_var = nc.variables[plot]
+
+        var = plot_var[:]
+        shape_len = len(var.shape)
+
+        # create a page with information about the variable, and any aux variables
+        fig = plt.figure(figsize=(11.69, 8.27))
+
+        text = "Variable : " + plot_var.name + str(plot_var.dimensions) + "\n"
+        nc_attrs = plot_var.ncattrs()
+        # print "NetCDF Variable Attributes:"
+        for nc_attr in nc_attrs:
+            attrVal = plot_var.getncattr(nc_attr)
+            #print('\t%s:' % nc_attr, repr(plot_var.getncattr(nc_attr)), type(attrVal))
+            text += nc_attr + ' : ' + str(attrVal) + '\n'
+
+        if hasattr(plot_var, 'ancillary_variables'):
+            qc_var_name = plot_var.getncattr('ancillary_variables')
+            qc_var = nc.variables[qc_var_name]
+
+            text += "\nAUX : " + qc_var.name + str(qc_var.dimensions) + "\n"
+
+            nc_attrs = qc_var.ncattrs()
+            # print "NetCDF AUX Variable Attributes:"
+            for nc_attr in nc_attrs:
+                # print '\t%s:' % nc_attr, repr(nc.getncattr(nc_attr))
+                text += nc_attr + ' : ' + str(qc_var.getncattr(nc_attr)) + '\n'
+
+            qc = nc.variables[qc_var_name][:]
+
+            if plot_var.dimensions[0] != 'TIME':
+                qc = np.transpose(qc)
+
+            qc = np.squeeze(qc)
+        else:
+            qc = 0
+
+        plt.text(-0.1, 0.0, text, fontsize=8, family='monospace')
+        plt.axis('off')
+        pp.savefig(fig)
+        plt.close(fig)
+
+        print(plot_var.name, " shape ", var.shape, " len ", shape_len)
+
+        # now create a page with the plot
+
+        fig = plt.figure(figsize=(11.69, 8.27))
+        ax = plt.subplot(111)
+
+        if plot_var.dimensions[0] != 'TIME':
+            var = np.transpose(var)
+        var = np.squeeze(var)
+
+        # create range from only good data
+        qc_m = np.ma.masked_where((qc == 9) | (qc == 4) | (qc == 6), var)
+        mx = qc_m.max()
+        mi = qc_m.min()
+
+        marg = (mx - mi) * 0.1
+        print("max ", mx, " min ", mi)
+
+        plt.ylim([mi - marg, mx + marg])
+
+        # create a legend entry made from serial_number and depth
+        if hasattr(plot_var, 'sensor_serial_number'):
+            sn = plot_var.getncattr('sensor_serial_number').split('; ')
+        elif hasattr(plot_var, 'sensor_serial_number'):
+            sn = nc.getncattr('instrument_serial_number').split('; ')
+        else:
+            sn = 'not found'
+
+        if hasattr(plot_var, 'sensor_depth'):
+            dpth = plot_var.getncattr('sensor_depth').split('; ')
+        elif hasattr(plot_var, 'sensor_height'):
+            dpth = plot_var.getncattr('sensor_height').split('; ')
+        elif hasattr(nc, 'instrument_nominal_depth'):
+            dpth = str(nc.getncattr('instrument_nominal_depth')).split('; ')
+        else:
+            dpth = 'unknown'
+
+        print("depth ", dpth)
+
+        leg = [x + ' (' + y + ' m)' for x, y in zip(sn, dpth)]
+
+        # if less than 200 points plot with a dot and line
+        plot_marks = '-'
+        if len(dt_time) < 200:
+            plot_marks = '.-'
+
+        pl = ax.plot(dt_time, qc_m, plot_marks)
+
+        # mark qc>2 with yellow dot, qc>3 with red dot
+        qc_m = np.ma.masked_where((qc <= 2) | (qc == 8), var)
+        ax.plot(dt_time, qc_m, 'yo')
+        qc_m = np.ma.masked_where((qc <= 3) | (qc == 8), var)
+        ax.plot(dt_time, qc_m, 'ro')
+
+        # shrink the plot some
+        box = ax.get_position()
+        ax.set_position([box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9])
+
+        # add legend below plot
+        #plt.legend(iter(pl), leg, loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=5)
+
+        plt.legend(iter(pl), leg, bbox_to_anchor=(0.0, -0.2, 1.0, -0.15), loc=3, ncol=6, mode="expand", borderaxespad=0.0, fontsize='x-small')
+
+        # invert the yaxis if the units are dbar
+        try:
+            if plot_var.units == 'dbar':
+                plt.gca().invert_yaxis()
+        except AttributeError:
+            pass
+        try:
+            if plot_var.positive == 'down':
+                plt.gca().invert_yaxis()
+        except AttributeError:
+            pass
+
+        #fig.autofmt_xdate()
+        plt.grid()
+
+        # add deployment/instrument/standard name as title
+
+        # plt.title(nc.getncattr('deployment_code') + ' : ' + plot_var.sensor_name + ' ' + \
+        #          plot_var.sensor_serial_number + ' : ' + plot_var.name, fontsize=10)
+
+        # plt.title(nc.getncattr('deployment_code') + ' : ' + plot_var.getncattr('name'), fontsize=10)
+        try:
+            plt.title(nc.getncattr('deployment_code'), fontsize=10)
+        except AttributeError:
+            pass
+
+        # add units to Y axis
+        try:
+            plt.ylabel(plot + ' (' + plot_var.units + ')')
+        except AttributeError:
+            pass
+
+        date_time_start = None
+        date_time_end = None
+
+        # plot only the time of deployment
+        try:
+            date_time_start = dt.datetime.strptime(nc.getncattr('time_coverage_start'), '%Y-%m-%dT%H:%M:%SZ')
+            date_time_end = dt.datetime.strptime(nc.getncattr('time_coverage_end'), '%Y-%m-%dT%H:%M:%SZ')
+        except AttributeError:
+            pass
+        try:
+            date_time_start = dt.datetime.strptime(nc.getncattr('time_deployment_start'), '%Y-%m-%dT%H:%M:%SZ')
+            date_time_end = dt.datetime.strptime(nc.getncattr('time_deployment_end'), '%Y-%m-%dT%H:%M:%SZ')
+        except AttributeError:
+            pass
+
+        if date_time_start:
+            plt.xlim(date_time_start, date_time_end)
+
+        # plt.savefig(plot + '.pdf')
+        pp.savefig(fig, papertype='a4')
+        plt.close(fig)
+
+    # plt.show()
+
+    pp.close()
+
+    nc.close()
diff --git a/ocean_dp/plotting/plotQC.py b/ocean_dp/plotting/plotQC.py
new file mode 100755
index 0000000..979976a
--- /dev/null
+++ b/ocean_dp/plotting/plotQC.py
@@ -0,0 +1,88 @@
+#!/usr/bin/python3
+
+# raw2netCDF
+# Copyright (C) 2019 Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import xarray as xr
+
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+import seaborn as sns
+
+from pandas.plotting import register_matplotlib_converters
+
+import sys
+import os
+
+
+def plot_all(files):
+    register_matplotlib_converters()
+    plt.style.use('seaborn-darkgrid')
+    sns.set_context("paper")
+
+    pp = PdfPages(os.path.join(os.path.dirname(files[0]), "batch-qc.pdf"))
+
+    for f in files:
+        print("file ", f)
+        do_plot(f)
+
+        pp.savefig()
+        plt.close()
+
+    pp.close()
+
+
+def plot(fn):
+    register_matplotlib_converters()
+    plt.style.use('seaborn-darkgrid')
+    sns.set_context("paper")
+
+    pp = PdfPages(fn + "-qc.pdf")
+
+    do_plot(fn)
+
+    pp.savefig()
+    pp.close()
+    plt.close()
+
+
+def do_plot(fn):
+
+    #fn = sys.argv[1]
+    #fn = '/Users/pete/cloudstor/SOTS-Temp-Raw-Data/SOFS-7.5-2018/netCDF/IMOS_ABOS-SOTS_TIP_20180801_SOFS_FV01_SOFS-7.5-2018-Starmon-mini-4047-40m_END-20190331_C-20200429.nc'
+
+    DS = xr.open_dataset(fn)
+
+    ax1 = plt.subplot(2, 1, 1)
+    plt.plot(DS.TIME, DS.TEMP)
+    plt.title(DS.deployment_code + " - " + DS.instrument_model + ":" + DS.instrument_serial_number + " @ " + str(DS.instrument_nominal_depth), {'fontsize': 8})
+
+    ax2 = plt.subplot(2, 1, 2, sharex=ax1)
+    aux = DS.TEMP.ancillary_variables
+    a_vars = aux.split(" ")
+    for f in sorted(set(a_vars)):
+        print('aux var', f)
+        varn = f.split("_")
+        plt.plot(DS.TIME, DS.variables[f], label=varn[-1])
+    plt.ylim(0, 9)
+
+    plt.legend(prop={'size': 6})
+
+    DS.close()
+
+
+if __name__ == "__main__":
+    plot_all(sys.argv[1:])
\ No newline at end of file
diff --git a/ocean_dp/processing/addCO2.py b/ocean_dp/processing/addCO2.py
new file mode 100755
index 0000000..8a14af8
--- /dev/null
+++ b/ocean_dp/processing/addCO2.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan 21 14:58:20 2020
+
+@author: tru050
+"""
+
+
+
+
+from netCDF4 import Dataset
+import sys
+import gsw
+import numpy as np
+from datetime import datetime
+import pandas
+
+# addCO2 takes a SOTS FV02 gridded product netCDFfile as an input, and adds 
+# CO2 data (delivered from NOAA in a csv file) to the netCDFfile
+def addCO2(netCDFfile):
+    
+    # Import the SOTS netcdf
+    ds = Dataset(netCDFfile, 'a')
+    
+    # Extract the time variable, in serial date numbers (days since 01/01/1950)
+    var_time = ds.variables["TIME"]
+
+    # Convert the variable object to an array
+    netcdf_serials = np.array(var_time[:])
+    
+    # Read in the CO2 csv file, ignoring the first five rows
+    dcsv = pandas.read_csv('SOFS_prelimdata_Nov2019test.csv',header=5)
+    
+    # Convert the dataframe to an array
+    dc = dcsv.to_numpy()
+    
+    csv_dates = []
+    
+    # Create a list of datetimes from the csv
+    for i in range(len(dc)):
+    
+        csv_dates.append(datetime.strptime(dc[i,0],'%m/%d/%Y %H:%M'))
+        
+    # Calculate the difference between the csv dates and 01/01/1950 in order
+    # to convert them to the serial date format of the netcdf
+    time_offset_1950 = datetime(1950,1,1,0,0,0)
+    
+    csv_delta= []
+    
+    for i in range(len(dc)):
+    
+        csv_delta.append(csv_dates[i] - time_offset_1950)
+        
+    
+    # Convert the datetimes from the csv into an array of serial date numbers
+    csv_serials = []
+    
+    for i in range(len(dc)):
+    
+        csv_serials.append(csv_delta[i].days + csv_delta[i].seconds/86400)
+        
+    csv_serials = np.array(csv_serials)
+    
+    # Find the indices of timestamps of the csv file that are in the deployment
+    # period of the netcdf file
+    matching_index = (netcdf_serials[0] <= csv_serials) &  (csv_serials <= netcdf_serials[-1])
+    
+    new_vars = ['XCO2_PRES','XCO2_OCEAN','XCO2_AIR','XCO2_PSAL','XCO2_SSTEMP']
+    
+    new_units = ['kPa','umol/mol','umol/mol','Presumed PSU - not specified','deg C']
+    
+    # For each of the variables in the csv file (except time), linearly 
+    # interpolate to the timestamps of the netcdf file    
+    for i in range(0,len(new_vars)):
+        
+        ncVarOut = ds.createVariable(new_vars[i], "f4", ("TIME",), fill_value=np.nan, zlib=True)
+        
+        ncVarOut[:] =  np.interp(netcdf_serials,csv_serials[matching_index],np.array(dcsv[dcsv.columns[i+1]])[matching_index].astype('float64'))
+        
+        ncVarOut.units = new_units[i]
+        
+        ncVarOut.comment = "imported from 'SOFS_prelimdata_Nov2019.csv'"
+        
+    # update the history attribute
+    try:
+        hist = ds.history + "\n"
+    except AttributeError:
+        hist = ""
+
+    ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + " : added 'XCO2_PRES','XCO2_OCEAN','XCO2_AIR','XCO2_PSAL','XCO2_SSTEMP' from 'SOFS_prelimdata_Nov2019.csv'")
+
+    ds.close()
+        
\ No newline at end of file
diff --git a/ocean_dp/processing/add_density.py b/ocean_dp/processing/add_density.py
new file mode 100755
index 0000000..9320ddf
--- /dev/null
+++ b/ocean_dp/processing/add_density.py
@@ -0,0 +1,80 @@
+# Copyright (C) 2020 Ben Weeding and Peter Jansen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+from netCDF4 import Dataset
+import sys
+import gsw
+import numpy as np
+from datetime import datetime
+
+# add density to a data file with TEMP, PSAL, PRES variables, many assumptions are made about the input file
+# based on Peter Jansen's addPSAL.py, using TEOS-10
+
+def add_density(netCDFfile):
+    
+    # loads the netcdf file
+    ds = Dataset(netCDFfile, 'a')
+    
+    if 'DENSITY' in list(ds.variables):
+    
+        ds.close()
+        
+        return "file already contains density"
+
+    # extracts the variables from the netcdf
+    var_temp = ds.variables["TEMP"]
+    var_psal = ds.variables["PSAL"]
+    var_pres = ds.variables["PRES"]
+    var_lon = ds.variables["LONGITUDE"]
+    var_lat = ds.variables["LATITUDE"]
+
+    # extracts the data from the variables
+    t = var_temp[:]
+    psal = var_psal[:]
+    p = var_pres[:]
+    lon = var_lon[:]
+    lat = var_lat[:]
+    
+    # calculates absolute salinity
+    SA = gsw.SA_from_SP(psal, p, lon, lat)
+    
+    # calculates conservative temperature
+    CT = gsw.CT_from_t(SA, t, p)
+    
+    # calculates density
+    density = gsw.rho(SA, CT, p)
+    
+    # generates a new variable 'DENSITY' in the netcdf
+    ncVarOut = ds.createVariable("DENSITY", "f4", ("TIME",), fill_value=np.nan, zlib=True)  # fill_value=nan otherwise defaults to max
+    
+    # assigns the calculated densities to the DENSITY variable, sets the units as kg/m^3, and comments on the variable's origin
+    ncVarOut[:] = density
+    ncVarOut.units = "kg/m^3"
+    ncVarOut.comment = "calculated using gsw-python https://teos-10.github.io/GSW-Python/index.html"
+
+    # update the history attribute
+    try:
+        hist = ds.history + "\n"
+    except AttributeError:
+        hist = ""
+
+    ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + " : added DENSITY from TEMP, PSAL, PRES, LAT, LON")
+
+    ds.close()
+
+
+if __name__ == "__main__":
+    add_density(sys.argv[1])
diff --git a/ocean_dp/processing/add_mld.py b/ocean_dp/processing/add_mld.py
new file mode 100755
index 0000000..112ce35
--- /dev/null
+++ b/ocean_dp/processing/add_mld.py
@@ -0,0 +1,101 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from netCDF4 import Dataset, num2date
+import sys
+from datetime import datetime as dt
+import numpy as np
+import pandas as pd
+from scipy import interpolate
+
+def add_mld(nc_in,thresh_in=0.2):
+    
+    # opens the supplied IMOS netcdf
+    nc = Dataset(nc_in,'a')
+    
+    temp_na = np.array(nc.variables['TEMP'])
+
+    # create two nan filled arrays the length of the FV02 file, one for the mld and one for its uncertainty
+    nc_mld = np.full([1,temp_na.shape[1]], np.nan)[0]
+    
+    nc_mld_uncert = np.full([1,temp_na.shape[1]], np.nan)[0]
+    
+    # temp sensor depths
+    nc_temp_depths = np.array(nc.variables['DEPTH_TEMP'])
+    
+    temp_na = temp_na[nc_temp_depths>5,:]
+    
+    nc_temp_depths = nc_temp_depths[nc_temp_depths>5]
+
+    # boolean of sensors at the shallowest depth
+    shallowest_sensors = nc_temp_depths == np.min(nc_temp_depths)
+
+    # for each temperature profile where there is at least one non NaN value in the shallowest sensors
+    for i in np.where(~np.all(np.isnan(temp_na[shallowest_sensors]),axis=0))[0]:
+        
+        # check there is at least one non NaN value in the deeper sensors
+        if np.any(~np.isnan(temp_na[~shallowest_sensors,i])):
+        
+            # calculates the mean temperature of the available shallowest sensors to use as a reference to calculate MLD
+            shallow_temp = np.nanmean(temp_na[shallowest_sensors,i])
+            
+            # extract temperature and depth data using a mean for the shallowest depth, and all non NaN data below
+            profile_temps = np.append(shallow_temp,temp_na[~shallowest_sensors,i][~np.isnan(temp_na[~shallowest_sensors,i])])
+                
+            profile_depths = np.append(nc_temp_depths[0],nc_temp_depths[~shallowest_sensors][~np.isnan(temp_na[~shallowest_sensors,i])])
+                
+            # check if the current profile contains any temperatures outside the specified threshold values
+            if np.any(temp_na[~shallowest_sensors,i]>=shallow_temp+thresh_in) or np.any(temp_na[~shallowest_sensors,i]<=shallow_temp-thresh_in):
+                
+                # generate a linear interpolator for the profile, which returns nan if extrapolation is attempted
+                profile_interp_func = interpolate.interp1d(profile_temps,profile_depths,bounds_error=False,fill_value=np.nan)
+            
+                # finds the shallowest depth at which the linear interpolation of the profile meets a threshold limit
+                nc_mld[i] = np.nanmin(profile_interp_func([shallow_temp+thresh_in,shallow_temp-thresh_in]))
+                
+                # provides an estimate of uncertainty, by giving the distance to the furthest sensor used to interpolate the MLD
+                nc_mld_uncert[i] = np.max([np.abs(nc_mld[i]-[x for x in profile_depths if x < nc_mld[i]][-1]),np.abs(nc_mld[i]-next(x for x in profile_depths if x > nc_mld[i]))])
+            
+            
+            # if none of the sensors are outside the threshold
+            else:
+                
+                # set the mld to the depth of the deepest non NaN sensor
+                nc_mld[i] = np.max(profile_depths)
+
+                # set the uncertainty to the distance between the sensor and the bottom
+                nc_mld_uncert[i] = 4600 - nc_mld[i]
+
+    # create the two variables 
+    mld_var_out = nc.createVariable('MLDx', "f4", ("TIME",), fill_value=np.nan, zlib=True)
+    mld_var_out[:] = nc_mld
+    mld_var_out.units = 'm'
+    mld_var_out.comment = 'Calculated using the linear interpolation MLD algorithm found at: INSERT GITHUB ADDRESS'
+    
+    mld_uncert_var_out = nc.createVariable('MLDx_standard_error', "f4", ("TIME",), fill_value=np.nan, zlib=True)
+    mld_uncert_var_out[:] = nc_mld_uncert
+    mld_uncert_var_out.units = 'm'
+    
+    nc.close()
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
\ No newline at end of file
diff --git a/ocean_dp/processing/netcdf_to_df.py b/ocean_dp/processing/netcdf_to_df.py
new file mode 100755
index 0000000..4a409a4
--- /dev/null
+++ b/ocean_dp/processing/netcdf_to_df.py
@@ -0,0 +1,211 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from netCDF4 import Dataset, num2date
+import pandas as pd
+import numpy as np
+from datetime import datetime as dt
+import glob
+import re
+
+# =============================================================================
+# Returns a list of the time series variables in a IMOS format 
+# netcdf file. Takes an open netcdf as its argument.
+# =============================================================================
+def var_selector_inc_time(nc,qc=False,):
+    
+    x = [x for x in list(nc.variables) if ('_quality_control_' not in x) & (nc.variables[x].shape!=())]
+        
+    return x    
+
+# =============================================================================
+# 
+# =============================================================================
+def netcdf_to_df(target_file):
+    
+    # open the inputted netcdf
+    nc = Dataset(target_file,mode='r')
+    
+    # creates a the list of variables to transfer to the dataframe
+    vars_to_transfer = var_selector_inc_time(nc)
+    
+    # creates the dataframe with column labels
+    df = pd.DataFrame(columns = vars_to_transfer)
+    
+    # sorts the columns alphabetically, with the relevant qc variable following each timeseries variable
+    df.sort_index(axis=1, inplace=True)
+    
+    # fill the dataframe from the netcdf, variable by variable
+    for cur_var in vars_to_transfer:
+        
+        df[cur_var] = np.array(nc.variables[cur_var])    
+    
+    # store deployment times in attributes
+    df.attrs['time_deployment_start'] = nc.time_deployment_start
+    df.attrs['time_deployment_end'] = nc.time_deployment_end
+    df.attrs['nominal_depth'] = nc.variables['NOMINAL_DEPTH'][0]
+    
+    # extract the column names
+    col_names = list(df.columns)
+            
+    # append the nominal depth to all column names
+    df.columns = [x.replace('quality_control',str(nc.variables['NOMINAL_DEPTH'][0])+'_quality_control') if 'quality_control' in x else x if 'TIME' in x else x + '_' + str(nc.variables['NOMINAL_DEPTH'][0]) for x in col_names]        
+    
+    nc.close()
+       
+    return df
+    
+    
+# =============================================================================
+# Takes 
+# =============================================================================
+        
+def combine_df(target_dfs):
+    
+    total_df = pd.DataFrame({'dummy' : []})
+    
+    # for each of the dataframes in the list provided
+    for cur_df in target_dfs:
+        
+        # make a copy of the current dataframe to modify and combine
+        df = cur_df.copy()
+        
+        # convert the IMOS format times to datetime 
+        df['TIME']=pd.to_timedelta(df['TIME'],unit='D')+dt(1950,1,1)
+        
+        # index the dataframe by time - for some reason this makes the df very slow to visually open and navigate!?  
+        df = df.set_index('TIME')
+        
+        # extract and convert deployment times to datetime 
+        start_time = dt.strptime(df.attrs['time_deployment_start'],'%Y-%m-%dT%H:%M:%SZ')
+        end_time = dt.strptime(df.attrs['time_deployment_end'],'%Y-%m-%dT%H:%M:%SZ')
+        
+        # trim the df to only include in water data
+        df = df.drop(df[(df.index < start_time) | (df.index > end_time)].index)
+        
+        # remove data with bad qc instead of setting to nan later in process, let resample do the work?
+        # but what if psal is bad but temp is good?? Need to think on this.
+        
+        # resamples using the max method, to create a df of the correct dimensions to fill
+        df_to_fill = df.resample('H',base=0.5).min()
+        
+        
+        # gets list of column names
+        col_names = list(df.columns)
+        
+        # makes a list of non qc column names
+        col_names_no_qc = [x for x in col_names if 'quality_control' not in x]
+        
+        # for each of the time series data columns
+        for cur_col in col_names_no_qc:
+            
+            # sets the value of non qc data to nan if the corresponding qc value is not satisfactory (0,1,2,7 at the moment)
+            #df.loc[(df[cur_col+'_quality_control'] > 2) & (df[cur_col+'_quality_control'] != 7), cur_col] = np.nan
+            # CAUSING UNEXPECTED NANS - FIX
+            df.loc[df[cur_col+'_quality_control'].isin([3,4,6,9]) , cur_col] = np.nan
+            
+            # extracts the time series data
+            dS = pd.Series(df[cur_col])
+            
+            # makes a copy for bin counting
+            dS_1s = dS.copy()
+            
+            dS_1s[:] = 1
+            
+            # resamples the series, interpoling linearly
+            dS_resampled = dS.resample('H',base=0.5).interpolate(method='index',axis=0,limit=1000000)
+
+            
+            # count how many data points are in each shoulder bin
+            dS_bin_counts = dS_1s.resample('H',base=0.5).sum()
+            
+            # fill the interpolated data back into the dataframe
+            df_to_fill[cur_col] = dS_resampled
+            
+            # give any interpolated point without any data within its hour window a qc code of 7
+            df_to_fill.loc[dS_bin_counts==0,[cur_col+'_quality_control']] = 7
+            
+        # shift the timestamps to the middle of the hour sampling period
+        df_to_fill.index = df_to_fill.index + pd.Timedelta('30 min')
+        
+        total_df = pd.concat([total_df,df_to_fill], join='outer', axis=1)
+        
+        print(cur_df)
+        
+        print(len(total_df))
+        
+    total_df.drop(['dummy'],axis=1,inplace=True)
+    
+    return total_df
+
+
+# =============================================================================
+# 
+# =============================================================================
+        
+def depth_from_file(file_in):
+    
+    result = int(re.findall(r'(?<=-)\w+(?=m_END)', file_in)[0])
+    
+    return result
+
+# =============================================================================
+# 
+# =============================================================================
+
+netcdfs = glob.glob('*FV01*.nc')
+
+netcdfs = sorted(netcdfs,key=depth_from_file)
+
+df_list = list()
+
+for cur_netcdf in netcdfs:
+    
+    df = netcdf_to_df(cur_netcdf)
+    
+    df_list.append(df)
+    
+
+        
+        
+
+            
+        
+        
+        
+        
+        
+        
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+
+        
+    
+    
+
diff --git a/ocean_dp/processing/panda_merger.py b/ocean_dp/processing/panda_merger.py
new file mode 100755
index 0000000..13a789c
--- /dev/null
+++ b/ocean_dp/processing/panda_merger.py
@@ -0,0 +1,132 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+from datetime import datetime as dt
+from datetime import timedelta 
+import numpy as np
+import glob
+import pandas as pd
+import re
+
+
+
+
+files = glob.glob('*FV00*.nc')
+
+var_name = 'TEMP'
+
+def depth_from_file(file_in):
+    
+    result = int(re.findall(r'(?<=-)\w+(?=m_END)', file_in)[0])
+    
+    return result
+
+def var_selector(file):
+    
+    [x for x in var_list if (x!='TIME') & ('_quality_control' not in x) & (nc.variables[x].shape!=())]
+        
+    return x    
+
+
+
+def panda_combine(files,var_name):
+    
+    files.sort(key=depth_from_file)
+    
+    total_df = pd.DataFrame({'dummy' : []})
+    
+    # make a sorting index for columns from nominal depths
+    
+    for cur_file in files:
+        
+        cur_nc = Dataset(cur_file,mode='r')
+        
+        cur_df = pd.DataFrame({'TIME':np.array(cur_nc.variables['TIME'][:]),var_name+'_'+str(cur_nc.variables['NOMINAL_DEPTH'][0]):np.array(cur_nc.variables[var_name][:])})
+
+        cur_df['TIME']=pd.to_timedelta(cur_df['TIME'],unit='D')+dt(1950,1,1)
+            
+        cur_df = cur_df.set_index('TIME')
+        
+        cur_df = cur_df.resample('H',base=0.5).mean()
+        
+        cur_df.index = cur_df.index + pd.Timedelta('30 min')
+        
+        total_df = pd.concat([total_df,cur_df], join='outer', axis=1)
+        
+        print(cur_file)
+        
+        print(len(total_df))
+        
+    total_df.drop(['dummy'],axis=1,inplace=True)
+            
+    return total_df
+
+
+result = re.findall(r'(?<=-)\w+(?=m_END)', cur_file)
+
+
+
+
+
+
+# =============================================================================
+# Old proof of concept code
+# =============================================================================
+
+# # import two netcdf
+# nc1 = Dataset('IMOS_ABOS-SOTS_COPST_20180801_SOFS_FV00_SOFS-7.5-2018-SBE37SMP-ODO-RS232-03715971-200m_END-20190324_C-20200401.nc',mode='r')
+
+# nc2 = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4048-45m_END-20190331_C-20200401.nc',mode='r')
+
+# # convert their time and temp data into dataframes
+# df1 = pd.DataFrame({'TIME':np.array(nc1.variables['TIME'][:]),'TEMP_'+str(nc1.variables['NOMINAL_DEPTH'][0]):np.array(nc1.variables['TEMP'][:])})
+
+# df2 = pd.DataFrame({'TIME':np.array(nc2.variables['TIME'][:]),'TEMP_'+str(nc2.variables['NOMINAL_DEPTH'][0]):np.array(nc2.variables['TEMP'][:])})
+
+# # convert the times from days since 01-01-1950 to a datetime object
+# df1['TIME']=pd.to_timedelta(df1['TIME'],unit='D')+dt(1950,1,1)
+
+# df2['TIME']=pd.to_timedelta(df2['TIME'],unit='D')+dt(1950,1,1)
+
+# # index the dataframes by time
+# df1=df1.set_index('TIME')
+
+# df2=df2.set_index('TIME')
+
+
+# # resample the data, calculating the mean over hourly periods, starting on the half hour
+# df1=df1.resample('H',base=0.5).mean()
+
+# df2=df2.resample('H',base=0.5).mean()
+
+# # reset the labels so they read the hour in the centre of the averaging period
+# df1.index = df1.index + pd.Timedelta('30 min')
+
+# df2.index = df2.index + pd.Timedelta('30 min')
+
+
+# # combine the two dataframes based on their time indicies, recording nan if one sensor doesn't have a reading for that timestamp
+# total_df = pd.concat([df1,df2], join='outer', axis=1)
+
+
+
+
+
+
+
diff --git a/ocean_dp/processing/pressure_interpolator.py b/ocean_dp/processing/pressure_interpolator.py
new file mode 100755
index 0000000..0473144
--- /dev/null
+++ b/ocean_dp/processing/pressure_interpolator.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Feb  4 11:05:16 2020
+
+@author: tru050
+"""
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import numpy as np
+import sys
+from netCDF4 import Dataset
+import numpy
+import argparse
+import glob
+import pandas as pd
+import scipy
+import os
+import shutil
+
+# Supply netCDFfiles as a ['list'] of files, agg as a 'string'
+
+def pressure_interpolator(netCDFfiles = [],agg = []):
+    
+    files_out = []
+    
+    if netCDFfiles==[]:
+        
+        print('netcdffiles = none')
+    
+        # Load the filenames of the fv01 files in the current folder
+        netCDFfiles = glob.glob('*FV01*.nc')
+            
+    if agg == []:
+        
+        print('agg = none')
+        
+    # Extract the aggregate file data
+        agg = Dataset(glob.glob('*Aggregate*.nc')[0], mode="r")
+        
+    else:
+        
+        agg = Dataset(glob.glob(agg)[0], mode="r")
+        
+    # Loop through each of the fv01 files
+    for fn in netCDFfiles:
+        
+        print('File selected is '+fn)
+        
+        # Change the creation date in the filename to today
+        now=datetime.utcnow()
+            
+        fn_new_split = fn.split('_')
+            
+        fn_new_split[-1] = "C-" + now.strftime("%Y%m%d") + ".nc"
+        
+        fn_new_split[2] += 'IP'
+            
+        fn_new = '_'.join(fn_new_split)
+
+        
+        # If a new (different) filename has been successfully generated, make 
+        # a copy of the old file with the new name
+        if fn_new != fn:
+            
+            files_out.append(fn_new)
+            
+            print('copying file')
+            # copy file
+            shutil.copy(fn, fn_new)
+        
+        # Open and work in the new copy
+        fv01_contents = Dataset(fn_new,mode='a')
+        
+        print('copied file opened')
+        
+        # Check the current file doesn't contain pressure to run the following
+        # interpolator
+        if not 'PRES' in fv01_contents.variables:
+            
+            print("file doesn't contain pressure")
+            
+            print(fv01_contents.variables.keys())
+            print(agg.variables.keys())
+            
+            # Create a NaN array to fill with pressure values
+            interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv01_contents.variables["TIME"])),np.nan)
+            
+            # Set the first row as zeros to set 0m as 0dbar
+            interp_agg_pres[0,:] = 0
+            
+            # Create a new array representing the nominal depths of the agg file,
+            # including the 0m values
+            agg_nominal_depths = np.insert(np.array(agg.variables["NOMINAL_DEPTH"][:]),0,0)
+            
+            # For each nominal depth, interpolate the agg data at the fv01 times
+            for j in range(1,len(agg_nominal_depths)):
+                
+                time_selection = agg.variables["TIME"][agg.variables["instrument_index"][:]==(j-1)]
+                
+                pres_selection = agg.variables["PRES"][agg.variables["instrument_index"][:]==(j-1)]
+                
+                interp_agg_pres[j,:] = np.interp(fv01_contents.variables["TIME"][:],time_selection,pres_selection)
+                                           
+            # Sort the nominal depths and pressures according to nominal depth
+            interp_agg_pres = interp_agg_pres[np.argsort(agg_nominal_depths),:]
+        
+            agg_nominal_depths.sort()
+            
+            # If there are any NaN values, linearly interpolate profilewise
+            if np.isnan(np.sum(interp_agg_pres)):
+                
+                # Make a dataframe of the interpolated pressure to handle NaNs easily
+                interp_agg_pres_df = pd.DataFrame(data=interp_agg_pres,index=agg_nominal_depths)
+                
+                # Find all the columns where the lowest element is NaN
+                nan_cols = np.where(interp_agg_pres_df.iloc[-1].isna())
+                
+                # Select each column containing an NaN as the deepest value
+                for j in nan_cols:
+                    
+                    # Find the shallowest nominal depth that isn't NaN
+                    shallowest_val = pd.Series.last_valid_index(interp_agg_pres_df.iloc[:,j])
+                    
+                    # Find the index of that nominal depth
+                    shallowest_idx = interp_agg_pres_df.index.tolist().index(shallowest_val)
+                    
+                    # Starting at the shallowest NaN in a continous block of NaNs to the bottom
+                    for k in range(shallowest_idx+1,len(interp_agg_pres_df)):
+                        
+                        # Linearly interpolate from shallow to deep, based on a nominal depth difference of 1m equating to 1dbar
+                        interp_agg_pres_df.iloc[k,j] = interp_agg_pres_df.iloc[k-1,j]+np.diff(interp_agg_pres_df.index)[k-1]
+                        
+                # Linearly interpolate any remaining NaNs
+                interp_agg_pres_df = interp_agg_pres_df.interpolate(method="index")
+                
+                # Convert the DataFrame back to an array
+                interp_agg_pres =  interp_agg_pres_df.to_numpy()
+            
+            # Create a NaN array to receive the fv01 interpolated pressures
+            interp_fv01_pres = np.full((np.shape(fv01_contents.variables["TIME"][:])),np.nan)
+            
+            # At each timestamp, interpolate pressure for the fv01 data
+            for j in range(len(fv01_contents.variables["TIME"])):
+            
+                interp_fv01_pres[j] = np.interp(fv01_contents.variables["NOMINAL_DEPTH"][0],agg_nominal_depths,interp_agg_pres[:,j])
+        
+            # Create the PRES and PRES_quality_control variables, and their attributes
+                
+            pres_var = fv01_contents.createVariable('PRES','f8',fv01_contents.variables['TIME'].dimensions,fill_value=99, zlib=True)
+            
+            pres_atts = ['long_name','sea_water_pressure_due_to_sea_water','units','dbar',
+            'standard_name','coordinates','TIME LATITUDE LONGITUDE NOMINAL_DEPTH','sea_water_pressure_due_to_sea_water','valid_max',
+            12000,'valid_min',-15] 
+                
+            for att_name,att_value in zip(pres_atts[0::2],pres_atts[1::2]):
+                    
+                pres_var.setncattr(att_name,att_value)
+                        
+            pres_var[:] = interp_fv01_pres
+            
+            
+            pres_qc_var = fv01_contents.createVariable('PRES_quality_control','i1',fv01_contents.variables['TIME'].dimensions,fill_value=99, zlib=True)
+            
+            pres_qc_var.long_name = "quality_code for PRES"
+            
+            pres_qc_var.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9])
+            
+            pres_qc_var.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+                    
+            pres_qc_var[:] = 7
+            
+            pres_var.ancillary_variables = "PRES_quality_control"
+            
+            # Close the netcdf files
+            
+            fv01_contents.close()
+        
+        # Deal with files that already contain pressure, but contain NaNs            
+        elif any(np.isnan(np.array(fv01_contents.variables['PRES'][:]))):
+            
+            print("file contains pressure and agg contains NaNs")
+            
+            # Create a NaN array to fill with pressure values
+            interp_agg_pres = np.full((len(agg.variables["NOMINAL_DEPTH"])+1,len(fv01_contents.variables["TIME"])),np.nan)
+            
+            # Set the first row as zeros to set 0m as 0dbar
+            interp_agg_pres[0,:] = 0
+            
+            # Create a new array representing the nominal depths of the agg file,
+            # including the 0m values
+            agg_nominal_depths = np.insert(np.array(agg.variables["NOMINAL_DEPTH"][:]),0,0)
+            
+            # For each nominal depth, interpolate the agg data at the fv01 times
+            for j in range(1,len(agg_nominal_depths)):
+                
+                time_selection = agg.variables["TIME"][agg.variables["instrument_index"][:]==(j-1)]
+                
+                pres_selection = agg.variables["PRES"][agg.variables["instrument_index"][:]==(j-1)]
+                
+                interp_agg_pres[j,:] = np.interp(fv01_contents.variables["TIME"][:],time_selection,pres_selection)
+                                           
+            # Sort the nominal depths and pressures according to nominal depth
+            interp_agg_pres = interp_agg_pres[np.argsort(agg_nominal_depths),:]
+        
+            agg_nominal_depths.sort()
+                
+            # Make a dataframe of the interpolated pressure to handle NaNs easily
+            interp_agg_pres_df = pd.DataFrame(data=interp_agg_pres,index=agg_nominal_depths)
+            
+            # Find all the columns where the lowest element is NaN
+            nan_cols = np.where(interp_agg_pres_df.iloc[-1].isna())
+            
+            # Select each column containing an NaN as the deepest value
+            for j in nan_cols:
+                
+                # Find the shallowest nominal depth that isn't NaN
+                shallowest_val = pd.Series.last_valid_index(interp_agg_pres_df.iloc[:,j])
+                
+                # Find the index of that nominal depth
+                shallowest_idx = interp_agg_pres_df.index.tolist().index(shallowest_val)
+                
+                # Starting at the shallowest NaN in a continous block of NaNs to the bottom
+                for k in range(shallowest_idx+1,len(interp_agg_pres_df)):
+                    
+                    # Linearly interpolate from shallow to deep, based on a nominal depth difference of 1m equating to 1dbar
+                    interp_agg_pres_df.iloc[k,j] = interp_agg_pres_df.iloc[k-1,j]+np.diff(interp_agg_pres_df.index)[k-1]
+                    
+            # Linearly interpolate any remaining NaNs
+            interp_agg_pres_df = interp_agg_pres_df.interpolate(method="index")
+            
+            # Convert the DataFrame back to an array
+            interp_agg_pres =  interp_agg_pres_df.to_numpy()
+        
+            # Create a NaN array to receive the fv01 interpolated pressures
+            interp_fv01_pres = np.full((np.shape(fv01_contents.variables["TIME"][:])),np.nan)
+        
+            # Extract the interpolated pressures (NaNs removed) to store in netCDF4
+            interp_fv01_pres = interp_agg_pres_df[interp_agg_pres_df.index==fv01_contents.variables["NOMINAL_DEPTH"][:]]
+        
+            # Find indices where the netcdf data and interpolated data don't match (where the NaNs are in the netcdf)
+            nan_rep_idx = np.where(interp_fv01_pres!=fv01_contents.variables['PRES'][:])[1]
+            
+            #
+            fv01_contents.variables['PRES_quality_control'][nan_rep_idx] = 7
+        
+            print('QC altered in original press')
+        
+            # Insert pressure value with NaNs interpolated back into netcdf
+            fv01_contents.variables['PRES'][:] = interp_fv01_pres
+            
+            print('press altered in orginal press')
+            
+            fv01_contents.close()
+            
+    agg.close()
+    
+    return files_out        
+                
+                
+                
+                
+                
diff --git a/ocean_dp/qc/add_qc_flags.py b/ocean_dp/qc/add_qc_flags.py
index 7d00c63..1d1fa0e 100644
--- a/ocean_dp/qc/add_qc_flags.py
+++ b/ocean_dp/qc/add_qc_flags.py
@@ -18,11 +18,12 @@
 
 from netCDF4 import Dataset, num2date
 import sys
-
+from datetime import datetime
 import numpy as np
 from dateutil import parser
 import pytz
 import os
+import shutil
 
 # add QC variables to file
 
@@ -32,8 +33,31 @@ def add_qc(netCDFfile):
     new_name = [] # list of new file names
 
     # loop over all file names given
-    for fn in netCDFfile[1:]:
-        ds = Dataset(fn, 'a')
+    for fn in netCDFfile:
+
+        # rename the file FV00 to FV01 (imos specific)
+        fn_new = fn.replace("FV00", "FV01")
+        
+        # Change the creation date in the filename to today
+        now=datetime.utcnow()
+        
+
+        
+        fn_new = "".join((fn_new[0:-11],now.strftime("%Y%m%d"),fn_new[-3::]))
+        
+        # Add the new file name to the list of new file names
+        new_name.append(fn_new)
+
+        # If a new (different) filename has been successfully generated, make 
+        # a copy of the old file with the new name
+        if fn_new != fn:
+            # copy file
+            shutil.copy(fn, fn_new)
+              
+
+        print(fn_new)
+
+        ds = Dataset(fn_new, 'a')
 
         # read the variable names from the netCDF dataset
         vars = ds.variables
@@ -51,29 +75,33 @@ def add_qc(netCDFfile):
             if "TIME" in vars[v].dimensions:
                 # print("time dim ", v)
 
-                ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True)  # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
-                ncVarOut[:] = np.zeros(vars[v].shape)
-                ncVarOut.long_name = "quality_code for " + v
+                if v+"_quality_control" not in ds.variables:
+                    ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True)  # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
+                    ncVarOut[:] = np.zeros(vars[v].shape)
+                    ncVarOut.long_name = "quality_code for " + v
+                    ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9])
+                    ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+                    
 
-                vars[v].ancillary_variables = v + "_quality_control"
+                    vars[v].ancillary_variables = v + "_quality_control"
 
-        # update the file version attribute
+        # update the global attributes
         ds.file_version = "Level 1 - Quality Controlled Data"
+        
+        ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+        
+        ds.history += ' ' + now.strftime("%Y%m%d:") + ' converted to FV01 file, quality_control variables added.'
 
-        ds.close()
-
-        # rename the file FV00 to FV01 (imos specific)
-        fn_new = fn.replace("FV00", "FV01")
-        new_name.append(fn_new)
+        # ADD quality control attributes!!
 
-        if fn_new != fn:
-            # copy file
-            os.copy(fn, fn_new)
+        ds.close()
 
-        print(fn_new)
 
     return new_name
 
 
 if __name__ == "__main__":
-    add_qc(sys.argv)
+    add_qc(sys.argv[1:])
+    
+    
+    
\ No newline at end of file
diff --git a/ocean_dp/qc/agg_temp_plot.py b/ocean_dp/qc/agg_temp_plot.py
new file mode 100755
index 0000000..71ab427
--- /dev/null
+++ b/ocean_dp/qc/agg_temp_plot.py
@@ -0,0 +1,137 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+
+
+x=Dataset('IMOS_ABOS-SOTS_COPSTIP_20180822_SOFS_FV02_SOFS-Aggregate-TEMP_END-20190322_C-20200311.nc',mode='r')
+
+temp = np.array(x.variables['TEMP'][:])
+
+time = np.array(x.variables['TIME'][:])
+
+ins_idx = np.array(x.variables['instrument_index'][:])
+
+fig, ax = plt.subplots(6,5)
+
+ax=ax.flatten()
+
+label_coords = (0.1, 0.8)
+label_method = 'axes fraction' 
+
+# cmap = colors.ListedColormap(['black','green','blue','red','orange'])
+
+# boundaries = [0,5,10,20,40,80]
+
+cmap = colors.ListedColormap(['blue','orange','red'])
+
+boundaries = [0,20,30,500]
+
+norm = colors.BoundaryNorm(boundaries, cmap.N, clip=True)
+
+
+for i in set(np.array(ins_idx)):
+
+    cur_temp = temp[ins_idx==i]
+    
+    cur_time = time[ins_idx==i]
+    
+    cur_time_hr = cur_time*24
+    
+    # Calculate time changes
+    cur_time_hr_diffs = np.diff(cur_time_hr)
+    
+    cur_temp_diffs = np.diff(cur_temp)
+    
+    # Calculate the rate of change of temperature wrt time
+    cur_dtemp_dtime = np.divide(cur_temp_diffs,cur_time_hr_diffs)
+    
+    im = ax[i].scatter(cur_time,cur_temp,s=1,c=np.concatenate((np.array([0]),np.abs(cur_dtemp_dtime))),cmap=cmap,norm=norm)
+    
+    #ax[i].set_title(,fontsize=10) 
+    
+    ax[i].annotate('Ins:'+str(i),xy=label_coords, xycoords=label_method,fontsize=8)
+    
+    if i==27:
+        fig.colorbar(im)
+    
+    
+fig.colorbar(cmap)
+    
+i=1
+fig, ax = plt.subplots()
+ax.plot(time[ins_idx==i],temp[ins_idx==i])
+
+
+# Remove bad instruments
+good_vals = [a!=14 and a!=15 for a in ins_idx]
+
+fig, ax = plt.subplots()
+ax.hist(temp[good_vals],21)
+
+sofs75_temp_diffs = np.array([])
+
+good_ins = set(np.array(ins_idx))
+
+good_ins -= {14,15}
+
+for i in good_ins:
+    
+    cur_temp = temp[ins_idx==i]
+    
+    cur_time = time[ins_idx==i]
+    
+    cur_time_hr = cur_time*24
+    
+    # Calculate time changes
+    cur_time_hr_diffs = np.diff(cur_time_hr)
+    
+    cur_temp_diffs = np.diff(cur_temp)
+    
+    # Calculate the rate of change of temperature wrt time
+    cur_dtemp_dtime = np.divide(cur_temp_diffs,cur_time_hr_diffs)
+    
+    print('ins '+str(i)+':'+str(np.max(cur_dtemp_dtime)))
+    
+    sofs75_temp_diffs = np.concatenate((sofs75_temp_diffs,cur_dtemp_dtime))
+    
+    
+    
+    
+    
+    
+    
+    
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/arg_tester.py b/ocean_dp/qc/arg_tester.py
new file mode 100755
index 0000000..2bfe7b4
--- /dev/null
+++ b/ocean_dp/qc/arg_tester.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+def learn_arg_sys(text_in):
+    
+    print(text_in)
+
+# USE runfile('arg_tester.py', args='bing')
+
+
+#if __name__ == "__main__":
+    # usage is <file_name> <variable_name> <*args>
+learn_arg_sys(text_in=sys.argv[1])
\ No newline at end of file
diff --git a/ocean_dp/qc/flatline_test.py b/ocean_dp/qc/flatline_test.py
new file mode 100755
index 0000000..79ab981
--- /dev/null
+++ b/ocean_dp/qc/flatline_test.py
@@ -0,0 +1,135 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+
+# If files aren't specified, take all the IMOS*.nc files in the current folder
+def flatline_test_all_files(target_vars_in=[], window=5, flag=4):
+    target_files = glob.glob('IMOS*.nc')
+
+    flatline_test_files(target_files, target_vars_in=target_vars_in, window=window, flag=flag)
+
+
+def flatline_test_files(target_files, target_vars_in=[], window=5, flag=4):
+    
+    # Loop through each files in target_files
+    for current_file in target_files:
+        # Print each filename
+        print("input file %s" % current_file)
+
+        # Extract netcdf data into nc
+        nc = Dataset(current_file, mode="a")
+
+        # run the flat line test
+        flatline_test(nc=nc, target_vars_in=target_vars_in, window=window, flag=flag)
+
+
+def flatline_test(nc, target_vars_in=[], window=5, flag=4):
+    
+    print('Window is '+str(window))
+    
+    # If target_vars aren't user specified, set it to all the variables of 
+    # the current_file, removing unwanted variables
+    if target_vars_in == []:
+        
+        target_vars = list(nc.variables.keys())
+        
+        # Remove TIME
+        target_vars.remove('TIME')
+        
+        # Remove any quality_control variables
+        qc_vars = [s for s in target_vars if 'quality_control' in s]
+        target_vars = [s for s in target_vars if s not in qc_vars]
+                        
+        # Remove any variables of single length
+        single_vars = [s for s in target_vars if nc.variables[s].size==1]
+        target_vars = [s for s in target_vars if s not in single_vars]
+        
+        print('target_vars are '+' '.join(target_vars))
+        
+    else:
+        target_vars = target_vars_in
+        
+    # For each variable, extract the data 
+    for current_var in target_vars:
+        
+        # Extract the variable
+        nc_var = nc.variables[current_var]
+        
+        if nc_var.name + "_quality_control_flt" in nc.variables:
+            print('flt qc variable already present')
+            ncVarOut = nc.variables[nc_var.name + "_quality_control_flt"]
+            ncVarOut[:] = 0
+        else:
+            ncVarOut = nc.createVariable(nc_var.name + "_quality_control_flt", "i1", nc_var.dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+            ncVarOut[:] = 0
+            # print(all(nc.variables[nc_var.name + "_quality_control_flt"]==0))
+            ncVarOut.long_name = "quality flag for " + nc_var.name
+            ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+            ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+            # add new variable to list of aux variables
+            nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_flt"
+
+        var_data = np.array(nc.variables[current_var][:])
+
+        if (all(nc.variables[nc_var.name + "_quality_control_flt"][:] == 0)):
+            print('All test specific qc values are zero before filling')
+
+        print('checking ' + current_var)
+
+        print('Window is ' + str(window))
+
+        # Step through the data, one element at a time, using the window
+        for i in range(0, (len(var_data) - window + 1)):
+
+            # This is true if 'window' elements in a row are equal
+            if len(set(var_data[i:(i + window)])) == 1:
+                print(str(i))
+                # set corresponding QC value to...
+                ncVarOut[i:(i + window)] = flag
+
+        points_marked = len([elem for elem in ncVarOut[:] if elem == 4])
+        print('Data points flagged: ', points_marked)
+
+        qc_var = nc.variables[current_var + "_quality_control"]
+        qc_var[:] = np.maximum(ncVarOut[:], qc_var[:])
+    # update the history attribute
+    try:
+        hist = nc.history + "\n"
+    except AttributeError:
+        hist = ""
+        
+        
+
+    nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + 'flatline_test performed on [' + str(target_vars) + '], window '+str(window)+' consecutive values or more were flagged with '+str(flag) )
+
+    nc.close()
+    
+if __name__ == "__main__":
+    # usage is <file_name> <variable_name> <window> <flag value>
+    flatline_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], window=float(sys.argv[3]), flag=float(sys.argv[4]))
diff --git a/ocean_dp/qc/global_range.py b/ocean_dp/qc/global_range.py
index e11bf40..db3b80b 100644
--- a/ocean_dp/qc/global_range.py
+++ b/ocean_dp/qc/global_range.py
@@ -28,29 +28,72 @@
 # flag 4 (bad) when out of global range
 
 
-def global_range(netCDFfile, variable, max, min):
+def global_range(netCDFfile, variable, max, min, qc_value=4):
     ds = Dataset(netCDFfile, 'a')
 
-    var = ds.variables[variable]
+    nc_var = ds.variables[variable]
+    var_data = nc_var[:]
+    var_data.mask = False
 
     try:
-        var_qc = ds.variables[variable + "_quality_control"]
+        # find the existing quality_control variable in the auxillary variables list
+        aux_vars = nc_var.ancillary_variables
+        aux_var = aux_vars.split(" ")
+        qc_vars = [i for i in aux_var if i.endswith("_quality_control")]
+        qc_var = qc_vars[0]
+        print("QC var name ", qc_var)
+        var_qc = ds.variables[qc_var]
     except KeyError:
         print("no QC variable found")
         return None
 
+    # read existing quality_control flags
+    qc = var_qc[:]
+
     # this is where the actual QC test is done
-    mask = ((var[:] > max) | (var[:] < min))
+    mask = ((var_data > max) | (var_data < min))
+    print('mask data ', mask)
+
+    # create a qc variable just for this test flags
+    if nc_var.name + "_quality_control_gr" in ds.variables:
+        ncVarOut = ds.variables[nc_var.name + "_quality_control_gr"]
+        ncVarOut[:] = 0
+    else:
+        ncVarOut = ds.createVariable(nc_var.name + "_quality_control_gr", "i1", nc_var.dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+        ncVarOut[:] = 0
+        ncVarOut.long_name = "quality flag for " + nc_var.name
+        ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+        ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+
+    # add new variable to list of aux variables
+    nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_gr"
+
+    # store the qc flags
+    ncVarOut[mask] = qc_value
 
-    mask = mask & (var_qc[:] < 1) # only mark data that has not been QCd already
+    # store qc flags to main quality_control flags variable
+    mask = mask & (qc < 1)  # only mark data that has not been QCd already
+    print('mask other qc ', mask)
 
-    var_qc[mask] = 4
-    count = sum(mask)
-    print('marked records ', count)
+    qc[mask] = qc_value  # mark the out of range points with bad_data
+
+    # calculate the number of points marked as bad_data
+    marked = np.zeros_like(qc)
+    marked[mask] = 1
+    count = sum(marked)
+    print('marked records ', count, mask, qc)
+
+    # write flags back to main QC variable
+    var_qc[:] = qc
 
     # update the history attribute
-    history = ds.history
-    ds.setncattr("history", history + "\n" + datetime.utcnow().strftime("%Y-%m-%d") + " " + variable + " global range min = " + str(min) + " max = " + str(max) + " marked " + str(count))
+    try:
+        hist = ds.history + "\n"
+    except AttributeError:
+        hist = ""
+    ds.setncattr("history", hist + datetime.utcnow().strftime("%Y-%m-%d") + " " + variable + " global range min = " + str(min) + " max = " + str(max) + " marked " + str(count))
+
+    ds.variables[variable + "_quality_control"][:] = np.maximum(ds.variables[variable + "_quality_control_gr"][:],ds.variables[variable + "_quality_control"][:])
 
     ds.close()
 
@@ -59,5 +102,8 @@ def global_range(netCDFfile, variable, max, min):
 
 if __name__ == "__main__":
 
-    # usage is <file_name> <variable_name> <max> <min>
-    global_range(sys.argv[1], sys.argv[2], float(sys.argv[3]), float(sys.argv[4]))
+    # usage is <file_name> <variable_name> <max> <min> <qc value>
+    if len(sys.argv) > 5:
+        global_range(sys.argv[1], sys.argv[2], max=float(sys.argv[3]), min=float(sys.argv[4]), qc_value=int(sys.argv[5]))
+    else:
+        global_range(sys.argv[1], sys.argv[2], max=float(sys.argv[3]), min=float(sys.argv[4]))
\ No newline at end of file
diff --git a/ocean_dp/qc/in_out_water.py b/ocean_dp/qc/in_out_water.py
index ac14840..7ec3e4d 100644
--- a/ocean_dp/qc/in_out_water.py
+++ b/ocean_dp/qc/in_out_water.py
@@ -18,53 +18,104 @@
 
 from netCDF4 import Dataset, num2date
 import sys
-
+from datetime import datetime
 import numpy as np
 from dateutil import parser
 import pytz
 import os
 
-# flag out of water as QC value 7 (not_deployed), with wise leave as 0
+# flag out of water as QC value 6 (not_deployed), with wise leave as 0
 
 
-def in_out_water(netCDFfile):
-    ds = Dataset(netCDFfile, 'a')
+def in_out_water(fn, var_name=None):
 
-    vars = ds.variables
+    out_file = []
 
-    to_add = []
-    for v in vars:
-        #print (vars[v].dimensions)
-        if v != 'TIME':
-            to_add.append(v)
 
-    time_var = vars["TIME"]
+    ds = Dataset(fn, 'a')
+
+    nc_vars = ds.variables
+    to_add = []
+    if var_name:
+        to_add.append(var_name)
+    else:
+        for v in nc_vars:
+            if "TIME" in nc_vars[v].dimensions:
+                #print (vars[v].dimensions)
+                if v != 'TIME':
+                    to_add.append(v)
+        # remove any anx variables from the list
+        for v in nc_vars:
+            if 'ancillary_variables' in nc_vars[v].ncattrs():
+                remove = nc_vars[v].getncattr('ancillary_variables').split(' ')
+                print("remove ", remove)
+                for r in remove:
+                    to_add.remove(r)
+
+    time_var = nc_vars["TIME"]
     time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
 
     time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
     time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
 
-    print(time_deploy)
-
-    print(to_add)
-    for v in to_add:
-        if "TIME" in vars[v].dimensions:
-
-            if v.endswith("_quality_control"):
+    print('file', fn)
+    print('deployment time', time_deploy)
 
-                print("QC time dim ", v)
+    print('var to add', to_add)
 
-                ncVarOut = vars[v]
-                mask = (time <= time_deploy) | (time >= time_recovery)
-                ncVarOut[mask] = np.ones(vars[v].shape)[mask] * 7
+    # create a mask for the time range
+    mask = (time <= time_deploy) | (time >= time_recovery)
 
+    for v in to_add:
+        print("var", v, ' dimensions ', nc_vars[v].dimensions)
+
+        ncVarOut = nc_vars[v + "_quality_control"]
+        ncVarOut[mask] = 6
+
+        # create a qc variable just for this test flags
+        if v + "_quality_control_io" in ds.variables:
+            ncVarOut = ds.variables[v + "_quality_control_io"]
+            ncVarOut[:] = 0
+        else:
+            ncVarOut = ds.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+            nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+
+        ncVarOut[:] = 0
+        ncVarOut.long_name = "quality flag for " + nc_vars[v].long_name
+        try:
+            ncVarOut.standard_name = nc_vars[v].standard_name + " status_flag"
+        except AttributeError:
+            pass
+
+        ncVarOut.quality_control_conventions = "IMOS standard flags"
+        ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+        ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+        ncVarOut.comment = 'data flagged not deployed (6) when out of water'
+
+        ncVarOut[mask] = 6
+        # calculate the number of points marked as bad_data
+        marked = np.zeros_like(ncVarOut)
+        marked[mask] = 1
+        count = sum(marked)
 
     ds.file_version = "Level 1 - Quality Controlled Data"
+    # update the history attribute
+    try:
+        hist = ds.history + "\n"
+    except AttributeError:
+        hist = ""
+
+    ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' :  ' + ' marked ' + str(int(count)))
 
     ds.close()
 
-    return netCDFfile
+    out_file.append(fn)
+
+    return out_file
 
 
 if __name__ == "__main__":
-    in_out_water(sys.argv[1])
+    if len(sys.argv) > 2 & sys.argv[1].startswith('-'):
+        in_out_water(sys.argv[2:], var_name=sys.argv[1][1:])
+    else:
+        in_out_water(sys.argv[1:])
\ No newline at end of file
diff --git a/ocean_dp/qc/netcdf_gen.py b/ocean_dp/qc/netcdf_gen.py
new file mode 100755
index 0000000..1b56de8
--- /dev/null
+++ b/ocean_dp/qc/netcdf_gen.py
@@ -0,0 +1,163 @@
+		 # Copyright (C) 2020 Ben Weeding
+ #
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ #
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from netCDF4 import Dataset, date2num
+import sys
+from datetime import datetime
+import numpy as np
+import shutil
+
+ # Provide the function with a filename (don't include .nc), a nominal depth,
+# and pairs of names and arrays containing the data to be included as variables.
+# A time dimension/variable is created by default, starting at 01/01/2020 using
+# 1 hour timestamps
+
+ # For example, netcdf_gen('test',30,'PRES',pres_data,'TEMP',temp_data)
+# from the command line gen_test_data.py test 30 PRES 10,20,30 TEMP 11,12,NaN
+
+
+
+def netcdf_gen(file_name, nominal_depth, *args):
+    # Convert the args tuple to a list
+    args = list(args)
+    #print(args, type(args[1]))
+
+    file_name = "IMOS_" + file_name + ".nc" # if we insist on not wanting to pass these
+
+     # deal with passing nominal depth as a string
+    if isinstance(nominal_depth, str):
+        nominal_depth = float(nominal_depth)
+        print('nominal depth :', nominal_depth)
+
+     # Check the args are paired
+    if len(args) % 2 == 0:
+
+         # Assign the names and data to lists
+        var_names = args[0::2]
+
+         # deal with passing data as a string list of values
+        if isinstance(args[1], str):
+            var_data = [[float(b) for b in a.split(',')] for a in args[1::2]]
+            #print('var_data split', var_data)
+        else:
+            var_data = args[1::2]
+
+         # Check if first of each pair is a string
+        if all(isinstance(x, str) for x in var_names):
+
+             # Check if second of each pair are all equal in shape
+            if all(np.shape(var_data[0]) == np.shape(x) for x in var_data):
+
+                 # Create the netcdf with IMOS tag
+                ds = Dataset(file_name, "w", format="NETCDF4")
+
+                 # Create time dimension with length to match data
+                time_dim = ds.createDimension("TIME", len(var_data[0]))
+
+                time_var = ds.createVariable("TIME", "f8", ("TIME"))
+
+                time_var.setncattr('long_name', 'time')
+                time_var.setncattr('standard_name', 'time')
+                time_var.setncattr('units', 'days since 1950-01-01 00:00:00 UTC')
+                time_var.setncattr('calendar', 'gregorian')
+                time_var.setncattr('axis', 'T')
+                time_var.setncattr('valid_max', 90000)
+                time_var.setncattr('valid_min', 0)
+
+                t0 = date2num(datetime(2020, 1, 1), units=time_var.units)
+                ds.variables['TIME'][:] = np.linspace(t0,t0 + (1 / 24) * (len(var_data[0])-1),num=len(var_data[0]))
+
+                 # Create the nominal depth variable
+                nom_depth_var = ds.createVariable("NOMINAL_DEPTH", "f8")
+                nom_depth_var.setncattr('long_name', 'nominal depth')
+                nom_depth_var.setncattr('units', 'dbar')
+                nom_depth_var.setncattr('positive', 'down')
+                nom_depth_var.setncattr('axis', 'Z')
+                nom_depth_var.setncattr('valid_max', 12000)
+                nom_depth_var.setncattr('valid_min', -5)
+                nom_depth_var.setncattr('reference_datum', 'sea surface')
+
+                ds.variables["NOMINAL_DEPTH"][:] = nominal_depth
+
+                 # Create variables from input data
+                for name_in, data_in in zip(var_names, var_data):
+                    ds.createVariable(name_in, "f8", ("TIME"))
+                    ds.variables[name_in][:] = data_in
+
+
+                # read the variable names from the netCDF dataset
+                vars = ds.variables
+        
+                # create a list of variables, don't include the 'TIME' variable
+                # TODO: detect 'TIME' variable using the standard name 'time'
+                to_add = []
+                
+                for v in vars:
+                    #print (vars[v].dimensions)
+                    if v != 'TIME':
+                        to_add.append(v)
+        
+                # for each variable, add a new ancillary variable <VAR>_quality_control to each which has 'TIME' as a dimension
+                for v in to_add:
+                    if "TIME" in vars[v].dimensions:
+                        # print("time dim ", v)
+        
+                        if v+"_quality_control" not in ds.variables:
+                            ncVarOut = ds.createVariable(v+"_quality_control", "i1", vars[v].dimensions, fill_value=99, zlib=True)  # fill_value=99 otherwise defaults to max, imos-toolbox uses 99
+                            ncVarOut[:] = np.zeros(vars[v].shape)
+                            ncVarOut.long_name = "quality_code for " + v
+                            ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9])
+                            ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+                            
+        
+                            vars[v].ancillary_variables = v + "_quality_control"
+
+                # update the global attributes
+                ds.file_version = "Level 1 - Quality Controlled Data"
+                
+                ds.history = datetime.utcnow().strftime("%Y%m%d:") + ' converted to FV01 file, quality_control variables added.'
+        
+                # ADD quality control attributes!!
+        
+                ds.close()
+                
+                #add_qc(file_name)
+                
+                print("generated ", file_name)
+
+                return (file_name)
+            
+                
+
+            else:
+                print('Data arrays not of equal length')
+
+
+        else:
+            print('Labels not in string format')
+
+    else:
+        print('Data not passed in pairs')
+
+
+if __name__ == "__main__":
+    netcdf_gen(sys.argv[1], sys.argv[2], *sys.argv[3:])
+    
+    
+    
+    
+    
+    
+    
\ No newline at end of file
diff --git a/ocean_dp/qc/psal_stat_plot.py b/ocean_dp/qc/psal_stat_plot.py
new file mode 100755
index 0000000..cb9cbc7
--- /dev/null
+++ b/ocean_dp/qc/psal_stat_plot.py
@@ -0,0 +1,357 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+import pandas as pd
+
+############################# Data extraction ################################
+
+# creates an empty array to store the names of the SOTS deployments
+deployments = []
+
+checked_files = []
+
+processed_files = []
+
+# loops through all the folders and files contained in the folder
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    # if the folder/file name contains 'Pulse' or 'SOFS' and doesn't contain '.', append it to deployments
+    if (('Pulse' in x) or ('SOFS' in x)) and ('.p' not in x):
+        
+        deployments.append(x)
+        
+        
+# create a dataframe to store extract information
+sots_psal_ensemble = pd.DataFrame(columns = ["dPsal/dtime","dPsal/dSample","QC","Nominal depth","Deployment"])
+
+
+# loops through all files in the directory
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    for fname in files:
+        
+        # append the filename to the list of checked files
+        checked_files.append(fname)
+        
+        # for each netcdf file labelled as FV01 and containing a deployment in its name
+        if fname.endswith('.nc') and 'FV01' in fname and any(ele in fname for ele in deployments):
+        
+            # print the filename
+            print(fname)  
+            
+            # open the file
+            nc = Dataset(os.path.join(root,fname), mode = 'a')
+            
+            # check file contains psalerature data
+            if 'PSAL' in list(nc.variables):
+            
+                # check that the in_out_water test has been run on the file, if not run in_out_water code
+                if not 'PSAL_quality_control_io' in list(nc.variables):
+                
+                    # run in_out_water script - uncommented at this point as just copied and pasted
+                    var_name = 'PSAL'
+                    nc_vars = nc.variables
+                    to_add = []
+                    if var_name:
+                        to_add.append(var_name)
+                    else:
+                        for v in nc_vars:
+                            #print (vars[v].dimensions)
+                            if v != 'TIME':
+                                to_add.append(v)
+                
+                    time_var = nc_vars["TIME"]
+                    time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+                
+                    time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+                    time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+                
+                    print('deployment time', time_deploy)
+                
+                    print(to_add)
+                
+                    # create a mask for the time range
+                    mask = (time <= time_deploy) | (time >= time_recovery)
+                
+                    for v in to_add:
+                        if "TIME" in nc_vars[v].dimensions:
+                            if v.endswith("_quality_control"):
+                                print("QC time dim ", v)
+                
+                                ncVarOut = nc_vars[v]
+                                ncVarOut[mask] = 7
+                            else:
+                                # create a qc variable just for this test flags
+                                if v + "_quality_control_io" in nc.variables:
+                                    ncVarOut = nc.variables[v + "_quality_control_io"]
+                                else:
+                                    ncVarOut = nc.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+                                ncVarOut[:] = np.zeros(nc_vars[v].shape)
+                                ncVarOut.long_name = "quality flag for " + v
+                                ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+                                ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+                
+                                nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+                                ncVarOut[mask] = 7
+                        
+                        nc.variables[v + "_quality_control"][:] = np.maximum(nc.variables[v + "_quality_control_io"][:],nc.variables[v + "_quality_control"][:])
+                
+                    nc.file_version = "Level 1 - Quality Controlled Data"
+                
+                
+                
+                # check that the file has a single dimension psalerature vector, and that the time format is correct
+                if np.array(nc.variables['PSAL'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                    
+                    # calculate psalerature changes for in water data
+                    nc_psal_diffs = np.diff(np.array(nc.variables['PSAL'][np.array(nc.variables['PSAL_quality_control'][:])!=7]))
+                
+                    # extract the time data
+                    nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['PSAL_quality_control'][:])!=7])
+            
+                    # Convert from days to hours
+                    nc_time_hr = nc_time*24
+                    
+                    # Calculate time changes in hours
+                    nc_time_hr_diffs = np.diff(nc_time_hr)
+                    
+                    # calculate the rate of change of psalerature wrt time (degrees °C per hour)
+                    nc_dpsal_dtime = np.divide(nc_psal_diffs,nc_time_hr_diffs)
+                    
+                    
+                    
+                    # extract psal_qc data
+                    nc_psal_qc = np.array(nc.variables['PSAL_quality_control'][np.array(nc.variables['PSAL_quality_control'][:])!=7])
+                    
+                    # calculate qc values for each nc_dpsal_dtime by taking the maximum of the qc values of the two contributing psals
+                    nc_dpsal_dtime_qc = pd.Series(nc_psal_qc).rolling(2).max().dropna().to_numpy()
+                    
+                    
+                    
+                    # extract sensor nominal depth
+                    nc_nom_depth = np.array(nc.variables['NOMINAL_DEPTH'])
+                    
+                    # create a vector the same size as nc_dpsal_dtime with the nominal depth
+                    nc_nom_depth_vector = np.repeat(nc_nom_depth,len(nc_dpsal_dtime))
+                    
+                    
+                    
+                    # extract deployment name
+                    nc_deployment = nc.deployment_code
+                    
+                    # create a list the same size as nc_dpsal_dtime with the deployment name
+                    nc_deployment_list = [nc_deployment] * len(nc_dpsal_dtime)
+                    
+                    
+                    
+                    # combine information into an length x 4 dataframe
+                    nc_psal_ensemble = pd.DataFrame({"dPsal/dtime":nc_dpsal_dtime,"dPsal/dSample":nc_psal_diffs,"QC":nc_dpsal_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
+                    
+                    # append the current netcdf's dataframe to the sots_psal_ensemble
+                    sots_psal_ensemble = sots_psal_ensemble.append(nc_psal_ensemble)
+                    
+                    # append the filename to the list of processed files
+                    processed_files.append(fname)
+                    
+                    
+            nc.close()
+                    
+                    
+############################# Data processing ################################
+            
+# creates a new dataframe containing only data with QC < 3
+sots_psal_ensemble_qc210 = sots_psal_ensemble[sots_psal_ensemble["QC"]<3]
+
+# calculates overall standard deviation
+std_total = np.std(sots_psal_ensemble_qc210["dPsal/dtime"])
+
+
+
+
+# creates an emply list to store data deployment by deployment
+std_by_deployment_data = []
+
+# creates a dict of deployment names and standard deviations
+for i in sots_psal_ensemble_qc210.Deployment.unique():
+    std_by_deployment_data.append(
+        {
+            'Deployment': i,
+            'STD': np.std(sots_psal_ensemble_qc210["dPsal/dtime"][sots_psal_ensemble_qc210["Deployment"]==i]),
+            'STD sample': np.std(sots_psal_ensemble_qc210["dPsal/dSample"][sots_psal_ensemble_qc210["Deployment"]==i])
+        }
+    )
+
+# creates a Dataframe from the dict
+std_by_deployment = pd.DataFrame(std_by_deployment_data)
+
+
+
+
+
+# =============================================================================
+# std_by_depth: this function takes two compulsary arguments (top: the shallowest 
+# depth(m)), bottom: the deepest depth(m)) and one option argument (deployment_in: 
+# the deployment from which data will be taken). The function will return the standard
+# deviation of the d(psal)/d(Time) data from sensors with nominal depths at and 
+# between the two depths, and from only the deployment_in if specified.
+#
+# sample call: std_by_depth(500,10000,'SOFS-7.5-2018')
+#
+# this will give the std of all d(psal)/d(Time) data from SOFS-7.5-2018 from 
+# sensors with 500m <= nominal depth <= 10000m
+# =============================================================================
+
+def std_by_depth_psal(top,bottom,deployment_in=None,rate='time'):
+    
+    selection = ''
+    
+    if rate == 'time':
+        
+        selection = "dPsal/dtime"
+        
+    elif rate == 'sample':
+        
+        selection = "dPsal/dSample"
+        
+    else:
+        
+        return "incorrect rate specification"
+    
+    
+    
+    # if user incorrectly inputs depths, swap them and run the code
+    if top > bottom:
+        
+        top, bottom = bottom, top
+    
+    if deployment_in == None:
+    
+        # subsamples sots_psal_ensemble_qc210 based on depth
+        target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom)]
+        
+    elif isinstance(deployment_in, list):
+        
+        # subsamples sots_psal_ensemble_qc210 based on depth
+        target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom) & (sots_psal_ensemble_qc210.Deployment.isin(deployment_in))]
+        
+        
+    else:   
+    
+        # subsamples sots_psal_ensemble_qc210 based on depth
+        target_ensemble = sots_psal_ensemble_qc210[(sots_psal_ensemble_qc210["Nominal depth"]>=top) & (sots_psal_ensemble_qc210["Nominal depth"]<=bottom) & (sots_psal_ensemble_qc210["Deployment"]==deployment_in)]
+    
+    # if not data is available with the given choices, end the function
+    if len(target_ensemble)==0:
+        
+        return 'No data available for those choices'
+        
+        
+    # calculates the mean of the subsample
+    target_mean = np.mean(target_ensemble[selection])
+            
+    # calculates the standard deviation of the subsample
+    target_std = np.std(target_ensemble[selection])
+        
+    # sets line thickness for plot
+    line_thick = 1
+    
+    # creates axes for histogram
+    ax_hist=plt.axes()
+    
+    # plots a histogram of the data selected
+    target_ensemble.hist(column=selection,bins=100,log=True,ax=ax_hist)
+    
+    # draws lines at the mean +- 3 STD on the histogram
+    ax_hist.axvline(x=target_mean+3*target_std,color='r',linewidth=line_thick) 
+
+    ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick) 
+    
+    # sets the x label
+    if rate == 'time':
+        
+         ax_hist.set_xlabel('PSU/hr')
+        
+    elif rate == 'sample':
+        
+        ax_hist.set_xlabel('PSU/sample')
+    
+    
+    label_coords = (0.70, 0.99)
+    label_method = 'axes fraction' 
+    
+    anno = 'mean = '+str(round(float(target_mean),sigfigs=3))
+    
+    anno += '\n3 STD = ' + str(round(float(3*target_std),sigfigs=3))
+    
+    anno += '\nno. samples = ' + str(len(target_ensemble))
+    
+    anno += '\n'+str(top)+'m <= depth <= '+str(bottom)+'m'
+    
+    if deployment_in == None:
+        
+        anno += '\nall available data'
+        
+    elif isinstance(deployment_in, list):
+        
+        anno += '\n'
+        
+        anno += '\n'.join(deployment_in)
+        
+    else:
+        
+        anno += '\n'+deployment_in
+            
+    ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8,va = "top", ha="left")
+    
+    # returns the standard deviation of the subsample
+    return target_std
+    
+
+
+
+
+
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
\ No newline at end of file
diff --git a/ocean_dp/qc/qc_checker.py b/ocean_dp/qc/qc_checker.py
new file mode 100755
index 0000000..6fd7881
--- /dev/null
+++ b/ocean_dp/qc/qc_checker.py
@@ -0,0 +1,137 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+# Code is design to check that netcdf files processed using the SOTS methods 
+# conform to the QC labelling designed by Peter Jansen.
+
+def qc_checker_all_files(target_vars_in=[]):
+    
+    successful_files=[]
+    
+    target_files = glob.glob('IMOS*.nc')
+
+    # Returns the files that conform to the qc labelling
+    successful_files = qc_checker_files(target_files, target_vars_in=target_vars_in)
+    
+
+def qc_checker_files(target_files,target_vars_in=[]):
+    
+    successful_files=[]
+    
+    # Loop through each files in target_files
+    for current_file in target_files:
+        # Print each filename
+        print("input file %s" % current_file)
+
+        # Extract netcdf data into nc
+        nc = Dataset(current_file, mode="a")
+
+        # If the qc_checker was successfull, add the filename to the list
+        if qc_checker(nc,target_vars_in=target_vars_in):
+        
+            successful_files.append(current_file)
+    
+    # Returns the files that conform to the qc labelling    
+    return successful_files
+
+
+# Enter args as variable name and rate of change limit, ie. 'TEMP',4
+def qc_checker(nc,target_vars_in=[]):
+    
+    # Collect all the variables from the netcdf
+    all_vars = list(nc.variables.keys())
+    
+    # If target_vars aren't user specified, set it to all the variables of 
+    # the current_file, removing unwanted variables (qc, single length, TIME)
+    if target_vars_in == []:
+                
+        target_vars = [s for s in all_vars if 'TIME' not in s and 'quality_control' not in s and nc.variables[s].size!=1]
+        
+        print('target_vars are '+' '.join(target_vars))
+        
+    else:
+        target_vars = target_vars_in    
+           
+    # For each of the variables selected    
+    for current_var in target_vars:
+        
+        # Collect the global qc data
+        qc_global_data = np.array(nc.variables[current_var+"_quality_control"][:])
+        
+        # Collect the names of all the other test specific qc vectors
+        qc_test_specific = [s for s in all_vars if current_var+"_quality_control" in s and not s.endswith('control')]
+        
+        # For each of the other test specific qc vectors
+        for current_qc_test in qc_test_specific:
+            
+            # Extract the data
+            qc_test_data = np.array(nc.variables[current_qc_test])
+            
+            #print('checking '+current_qc_test)
+            
+            # If any of the test specific qc vectors ever have a great value than the global qc vector at a timestamp, the qc process has failed
+            if any(np.less(qc_global_data,qc_test_data)):
+                
+                print(current_qc_test + "failed")
+                
+                qc_behaving = False
+                
+            else:
+                
+                # The qc process has succeeded 
+                qc_behaving = True
+                
+                # sets all data with a qc value of 0 to have a qc value of 1, having passed all the tests
+                nc.variables[current_var+"_quality_control"][qc_global_data==0] = 1
+                
+                now=datetime.utcnow()
+                
+                nc.history += ' ' + now.strftime("%Y%m%d:") + 'passed qc_checker, all qc=0 set to qc=1'
+    
+    # Returns true if qc has succeeded, false if not
+    return qc_behaving
+    
+
+    # Close the current netcdf file
+    nc.close()
+    
+    
+# def qc_check_plot(target_file,target_var)
+
+#     nc = Dataset(target_file,'r')
+
+        
+    
+    
+    
+    
+    
+    
+    
+    
\ No newline at end of file
diff --git a/ocean_dp/qc/rate_of_change_test.py b/ocean_dp/qc/rate_of_change_test.py
new file mode 100755
index 0000000..84f8e4f
--- /dev/null
+++ b/ocean_dp/qc/rate_of_change_test.py
@@ -0,0 +1,249 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+# how to specific rate of change? Will the function take rate of change as an 
+# argument?
+
+# Need a way of linking the rate of change to each different variable type
+
+# Could max rate of change be something we upload into the netcdf file atts? Or just in history?
+
+# Tell the function how much change you tolerate, and over what period of time - in sec?
+
+# Then convert to match the file timesteps
+
+# If files aren't specified, take all the IMOS*.nc files in the current folder
+def roc_test_all_files(*args,target_vars_in=[]):
+    target_files = glob.glob('IMOS*.nc')
+
+    roc_test_files(target_files, target_vars_in=target_vars_in,*args)
+
+def roc_test_files(target_files,*args,target_vars_in=[]):
+    
+    # Loop through each files in target_files
+    for current_file in target_files:
+        # Print each filename
+        print("input file %s" % current_file)
+        
+        print(args)
+
+        # Extract netcdf data into nc
+        nc = Dataset(current_file, mode="a")
+
+        # run the spike test - specifying *args here makes python unpack args to be passed again successfully as separate items
+        roc_test(nc,*args, target_vars_in=target_vars_in)
+
+
+# Enter args as variable name and rate of change limit, ie. 'TEMP',4
+def roc_test(nc,*args,target_vars_in=[]):
+    
+    # Check the time format
+    if nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+        
+        # Convert the args tuple to a list
+        args = list(args)
+        
+        # If a single rate of change limit is supplied
+        if len(args) == 1:
+            
+            change_per_hr = args[0]
+        
+            print('One rate of change limit will be applied to all variables')
+            
+            # If target_vars aren't user specified, set it to all the variables of 
+            # the current_file, removing unwanted variables
+            if target_vars_in == []:
+                
+                all_vars = list(nc.variables.keys())
+                
+                target_vars = [s for s in all_vars if 'TIME' not in s and 'quality_control' not in s and nc.variables[s].size!=1]
+                
+                print('target_vars are '+' '.join(target_vars))
+                
+            else:
+                target_vars = target_vars_in
+            
+            # Extract the time data
+            nc_time = np.array(nc.variables['TIME'][:])
+            
+            # Convert from days to hours
+            nc_time_hr = nc_time*24
+            
+            # For each variable
+            for current_var in target_vars:
+                
+                # Extract the variable
+                nc_var = nc.variables[current_var]
+                
+                if nc_var.name + "_quality_control_roc" in nc.variables:
+                    ncVarOut = nc.variables[nc_var.name + "_quality_control_roc"]
+                    ncVarOut[:] = np.zeros(nc.variables[nc_var.name + "_quality_control_roc"].shape)
+                else:
+                    ncVarOut = nc.createVariable(nc_var.name + "_quality_control_roc", "i1", nc_var.dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+                    ncVarOut[:] = 0
+                    ncVarOut.long_name = "quality flag for " + nc_var.name
+                    ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+                    ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+            
+                    # add new variable to list of aux variables
+                    nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
+                
+                # Extract the variable data
+                var_data = np.array(nc.variables[current_var][:])
+                
+                print('Not equal to zero test type 1')
+                print(str(np.where(ncVarOut[:]!=0)))
+                
+                if (all(ncVarOut[:] == 0)):
+                    print('All test specific qc values are zero before filling')
+                
+                # Calculate dvar/dtime
+                var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
+                
+                # For any change greater than change_per_hr, assign a qc value of 4
+                ncVarOut[[x for x in abs(np.insert(var_roc,0,0)) > change_per_hr]] = 4
+
+                # Extract global qc
+                qc_var = nc.variables[current_var + "_quality_control"]
+                
+                # Overwrite global qc with any higher values from test specific qc
+                qc_var[:] = np.maximum(ncVarOut[:], qc_var[:])
+                
+                
+                print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > change_per_hr])) + ' changes found above '+str(change_per_hr)+' '+nc.variables[current_var].units+' per hour')
+                    
+            # update the history attribute
+            try:
+                hist = nc.history + "\n"
+            except AttributeError:
+                hist = ""
+    
+            nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + 'rate of change test performed, with all changes above '+str(change_per_hr)+' flagged as 4')        
+            
+
+        # If multiple rate of change limits are supplied, with variable names
+        elif len(args) % 2 == 0 and all(isinstance(x,str) for x in args[0::2]) and all(isinstance(y,(float,int)) for y in args[1::2]):
+            
+            # Take target variables from args
+            target_vars = args[0::2]
+            
+            print('target_vars are '+' '.join(target_vars))
+        
+            # Convert arguments to dict
+            rate_spec = dict(zip(args[0::2],args[1::2]))
+            
+            # Extract the time data
+            nc_time = np.array(nc.variables['TIME'][:])
+            
+            # Convert from days to hours
+            nc_time_hr = nc_time*24
+            
+            # For each variable
+            for current_var in target_vars:
+                
+                # Extract the variable
+                nc_var = nc.variables[current_var]
+                
+                if nc_var.name + "_quality_control_roc" in nc.variables:
+                    ncVarOut = nc.variables[nc_var.name + "_quality_control_roc"]
+                    ncVarOut[:] = np.zeros(nc.variables[nc_var.name + "_quality_control_roc"].shape)
+                else:
+                    ncVarOut = nc.createVariable(nc_var.name + "_quality_control_roc", "i1", nc_var.dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+                    ncVarOut[:] = 0
+                    ncVarOut.long_name = "quality flag for " + nc_var.name
+                    ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+                    ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+            
+                    # add new variable to list of aux variables
+                    nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_roc"
+                       
+                
+                # Extract the data
+                var_data = np.array(nc.variables[current_var])
+                
+                print('Not equal to zero test type 2')
+                print(str(np.where(ncVarOut[:]!=0)))
+                
+                if (all(ncVarOut[:] == 0)):
+                    print('All test specific qc values are zero before filling')
+                
+                # Calculate dvar/dtime
+                var_roc = np.divide(np.diff(var_data),np.diff(nc_time_hr))
+                
+                # For any change greater than change_per_hr, assign a qc value of 4
+                ncVarOut[[x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]]] = 4
+                
+                print('ncVarOut 4s')
+                print(str(np.where(ncVarOut[:]==4)))
+                
+                print('Netcdf variable 4s')
+                print(str(np.where(nc.variables[nc_var.name + "_quality_control_roc"][:]==4)))
+                
+                nc.variables[nc_var.name + "_quality_control_roc"][:] = ncVarOut[:]
+                
+                print('Netcdf variable 4s after assignment')
+                print(str(np.where(nc.variables[nc_var.name + "_quality_control_roc"][:]==4)))
+                
+                
+                # Extract global qc
+                qc_var = nc.variables[current_var + "_quality_control"]
+                
+                # Overwrite global qc with any higher values from test specific qc
+                qc_var[:] = np.maximum(ncVarOut[:], qc_var[:])
+                
+                print(current_var + ' tested: '+str(sum([x for x in abs(np.insert(var_roc,0,0)) > rate_spec[current_var]])) + ' changes found above '+str(rate_spec[current_var])+' '+nc.variables[current_var].units+' per hour')
+                    
+            # update the history attribute
+            try:
+                hist = nc.history + "\n"
+            except AttributeError:
+                hist = ""
+    
+            nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ': rate of change test performed, with all changes above those specified in the following list flagged as 4: '+str(args))        
+            
+            
+        else:
+            print('Arguments passed do not match the required format. No roc test performed.')
+        
+        
+    # If the time format doesn't match IMOS requirements    
+    else:
+        print('Time format does not match the required IMOS form of: days since 1950-01-01 00:00:00 UTC')
+    
+    
+    nc.close()
+    
+    
+# Not sure how to sys.argv[] with both *args and a keyword argument
+if __name__ == "__main__":
+    # usage is <file_name> <variable_name> <*args>
+    roc_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], *sys.argv[3:])
+
+    
+
+
diff --git a/ocean_dp/qc/select_in_water.py b/ocean_dp/qc/select_in_water.py
new file mode 100755
index 0000000..8c296c1
--- /dev/null
+++ b/ocean_dp/qc/select_in_water.py
@@ -0,0 +1,120 @@
+#!/usr/bin/python3
+
+# ocean_dp
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from dateutil.parser import parse
+from netCDF4 import Dataset, num2date, date2num
+from datetime import datetime, timedelta
+import sys
+from datetime import datetime
+import numpy as np
+from dateutil import parser
+import pytz
+import os
+import shutil
+
+# Submit argument as a list
+
+
+
+def select_in_water(netCDFfiles):
+    
+    new_name = [] # list of new file names
+    now = datetime.utcnow()            
+
+    # loop over all file names given
+    for fn in netCDFfiles:
+        
+        # Check the file is an IMOS formatted file
+        if fn.split('_')[0]=='IMOS':        
+            fn_new = fn.replace("FV00", "FV01")           
+            fn_new_split = fn_new.split('_')            
+            # Change the creation date in the filename to today
+            fn_new_split[-1] = "C-" + now.strftime("%Y%m%d") + ".nc"            
+            fn_new = '_'.join(fn_new_split)
+        else:
+            fn_new = fn.replace(".nc", "-trim.nc")
+            
+        # Add the new file name to the list of new file names
+        new_name.append(fn_new)
+
+        # Load the original netcdf file
+        ods = Dataset(fn,'a')
+
+        # Extract the time dimension, and the deployment start and end    
+        # TODO: check this works
+        v = ods.get_variables_by_attributes(standard_name='time')
+        time = np.array(v[0][:])
+
+        inw = parse(ods.time_deployment_start)
+        outw = parse(ods.time_deployment_end)
+
+        # Convert the start and end to the number format used in TIME
+        inw_num = date2num(inw.replace(tzinfo=None), units=ods.variables['TIME'].units)
+        outw_num = date2num(outw.replace(tzinfo=None), units=ods.variables['TIME'].units)
+
+        # Create logical index of deployed times        
+        deployed = np.logical_and(time>=inw_num, time<=outw_num)
+
+        # Determine the length of the new time dimension        
+        time_dim_len = len(time[deployed])
+
+        # Create the new netcdf file
+        ds = Dataset(fn_new, "w", format="NETCDF4")
+
+        new_time_dim = ds.createDimension("TIME", time_dim_len)
+
+        # Copy global attributes        
+        for att in ods.ncattrs():
+            ds.setncattr(att, ods.getncattr(att))
+
+        # Copy variables            
+        for v_name, varin in ods.variables.items():
+
+            varout = ds.createVariable(v_name, varin.datatype, varin.dimensions)
+
+            # Copy variable attributes
+            varout.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()})
+
+            # Fill variables with deployed data            
+            # TODO: should check if the dimensions for the variable include TIME, and truncate that dimension
+            if np.array(varin[:]).size == 1:
+                varout[:] = varin[:]
+            else:
+                varout[:] = np.array(varin[:])[deployed]
+
+        ds.date_created = now.strftime("%Y-%m-%dT%H:%M:%SZ")     
+
+        # update the time coverage attributes            
+        ds.time_coverage_start = ods.time_deployment_start            
+        ds.time_coverage_end = ods.time_deployment_end
+
+        # update the history attribute
+        try:
+            hist = ds.history + "\n"
+        except AttributeError:
+            hist = ""        
+        ds.history += hist + now.strftime("%Y%m%d:") + 'Data subset to only contain deployed (in water) data - the full record can be found in the corresponding FV00 file.'        
+
+        ds.close()       
+        ods.close()
+        
+    return new_name
+
+
+if __name__ == "__main__":
+    select_in_water(sys.argv[1:]) 
diff --git a/ocean_dp/qc/spike_test.py b/ocean_dp/qc/spike_test.py
new file mode 100755
index 0000000..089cf63
--- /dev/null
+++ b/ocean_dp/qc/spike_test.py
@@ -0,0 +1,176 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+from datetime import datetime, timedelta
+from netCDF4 import num2date, date2num
+from netCDF4 import stringtochar
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+
+default_high = 100
+
+default_low = 50
+
+
+# If files aren't specified, take all the IMOS*.nc files in the current folder
+def spike_test_all_files(target_vars_in=[], thresh_low=default_low, thresh_high=default_high, flag_low=3, flag_high=4):
+    target_files = glob.glob('IMOS*.nc')
+
+    spike_test_files(target_files, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
+
+
+def spike_test_files(target_files, target_vars_in=[], thresh_low=default_low, thresh_high=default_high, flag_low=3, flag_high=4):
+    
+    # Loop through each files in target_files
+    for current_file in target_files:
+        # Print each filename
+        print("input file %s" % current_file)
+
+        # Extract netcdf data into nc
+        nc = Dataset(current_file, mode="a")
+
+        # run the spike test
+        spike_test(nc=nc, target_vars_in=target_vars_in, thresh_low=thresh_low,thresh_high=thresh_high,flag_low=flag_low, flag_high=flag_high)
+
+
+def spike_test(nc, target_vars_in=[], thresh_low=default_low, thresh_high=default_high, flag_low=3, flag_high=4):
+    
+    # If target_vars aren't user specified, set it to all the variables of 
+    # the current_file, removing unwanted variables
+    if target_vars_in == []:
+        
+        target_vars = list(nc.variables.keys())
+        
+        # Remove TIME
+        target_vars.remove('TIME')
+        
+        # Remove any quality_control variables
+        qc_vars = [s for s in target_vars if 'quality_control' in s]
+        target_vars = [s for s in target_vars if s not in qc_vars]
+                        
+        # Remove any variables of single length
+        single_vars = [s for s in target_vars if nc.variables[s].size==1]
+        target_vars = [s for s in target_vars if s not in single_vars]
+        
+        print('target_vars are '+' '.join(target_vars))
+        
+    else:
+        target_vars = target_vars_in
+        
+    # For each variable
+    for current_var in target_vars:
+        
+        # Extract the variable
+        nc_var = nc.variables[current_var]
+        
+        # Create a test specific qc variable if it  doesn't already exist
+        if nc_var.name + "_quality_control_spk" in nc.variables:
+            ncVarOut = nc.variables[nc_var.name + "_quality_control_spk"]
+        else:
+            ncVarOut = nc.createVariable(nc_var.name + "_quality_control_spk", "i1", nc_var.dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+            ncVarOut[:] = np.zeros(nc_var.shape)
+            ncVarOut.long_name = "quality flag for " + nc_var.name
+            ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+            ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+    
+        # add new variable to list of aux variables
+        nc_var.ancillary_variables = nc_var.ancillary_variables + " " + nc_var.name + "_quality_control_spk"
+                
+        # Extract the variable data
+        var_data = np.array(nc.variables[current_var][:])
+        
+        print('checking '+current_var+' for high spikes')
+        
+        # Step through the data, one element at a time, starting from the 2nd element
+        for i in range(1,(len(var_data)-1)):
+            
+            # Calculate the mean of the i-1 and i+1 elements
+            shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+            
+            # Calculate the step changes
+            shoulder_diff = np.diff(var_data[i-1:i+2])
+            
+            # Check for spike exceeding high threshold
+            if (abs(var_data[i]-shoulder_mean) > thresh_high) & (True in (shoulder_diff>0)) & (True in (shoulder_diff<0)):# & (1.25*abs(shoulder_diff[0]) >= abs(x[1]) >= 0.75*abs(shoulder_diff[0])):
+                
+                print('High spike found')
+                
+                #set corresponding QC value to...
+                ncVarOut[i] = flag_high
+                
+        
+        # Find the indices where qc isn't set to 4 (high spike), removing the final element as it can't be check for a spike
+        low_spike_chk_idx = np.where(ncVarOut[:]!=4)[0][0:-1]
+
+        
+        # Remove from the indices those that are either side of a high spike
+        for i in ncVarOut[:]==4:
+            
+            low_spike_chk_idx=low_spike_chk_idx[low_spike_chk_idx!=[i-1]]
+            
+            low_spike_chk_idx=low_spike_chk_idx[low_spike_chk_idx!=[i+1]]
+        
+        #print(low_spike_chk_idx)
+        
+        print('checking '+current_var+' for low spikes')
+        
+        # For each of the remaining indices
+        for i in low_spike_chk_idx:
+            
+            #print('i is '+str(i))
+            
+            # Calculate the mean of the i-1 and i+1 elements
+            shoulder_mean = np.mean(np.take(var_data,[i-1,i+1]))
+            
+            # Calculate the step changes
+            shoulder_diff = np.diff(var_data[i-1:i+2])
+            
+            # Check for spike exceeding low threshold
+            if (abs(var_data[i]-shoulder_mean) > thresh_low) & (True in (shoulder_diff>0)) & (True in (shoulder_diff<0)): #& (1.25*abs(shoulder_diff[0]) >= abs(x[1]) >= 0.75*abs(shoulder_diff[0])):
+                
+                print('Low spike found')
+                
+                #set corresponding QC value to...
+                ncVarOut[i] = flag_low   
+                
+    nc.variables[current_var  + "_quality_control"][:] = np.maximum(nc.variables[current_var  + "_quality_control_spk"][:],nc.variables[current_var  + "_quality_control"][:])
+
+    # update the history attribute
+    try:
+        hist = nc.history + "\n"
+    except AttributeError:
+        hist = ""
+
+    nc.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ' :spike_test performed on [' + str(target_vars) + '], with spikes greater than '+str(thresh_high)+' flagged as '+str(flag_high)+' and spikes greater than '+str(thresh_low)+' flagged as '+str(flag_low))
+
+    nc.close()
+    
+if __name__ == "__main__":
+    # usage is <file_name> <variable_name> <window> <flag value>
+    spike_test_files(target_files=[sys.argv[1]], target_vars_in=[sys.argv[2]], thresh_low=float(sys.argv[3]), thresh_high=float(sys.argv[4]), flag_low= float(sys.argv[5]), flag_high= float(sys.argv[6]))
+
+    
+
+
+
+
+
diff --git a/ocean_dp/qc/temp_diff_hist_extra.py b/ocean_dp/qc/temp_diff_hist_extra.py
new file mode 100755
index 0000000..14dc6cf
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_hist_extra.py
@@ -0,0 +1,83 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+import glob
+from netCDF4 import num2date
+from dateutil import parser
+import datetime
+
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    for fname in files:
+      
+        if fname.endswith('.nc') and 'FV01' in fname:
+        
+            print(fname)  #Here, the wanted file name is printed
+
+            ds = Dataset(os.path.join(root,fname), 'a')
+        
+            vars = ds.variables
+        
+            to_add = []
+            for v in vars:
+                #print (vars[v].dimensions)
+                if v != 'TIME':
+                    to_add.append(v)
+        
+            time_var = vars["TIME"]
+            time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+        
+            time_deploy = parser.parse(ds.time_deployment_start, ignoretz=True)
+            time_recovery = parser.parse(ds.time_deployment_end, ignoretz=True)
+        
+            print(time_deploy)
+        
+            print(to_add)
+            for v in to_add:
+                if "TIME" in vars[v].dimensions:
+        
+                    if v.endswith("_quality_control"):
+        
+                        print("QC time dim ", v)
+        
+                        ncVarOut = vars[v]
+                        mask = (time <= time_deploy) | (time >= time_recovery)
+                        ncVarOut[mask] = np.ones(vars[v].shape)[mask] * 7
+        
+        
+            ds.file_version = "Level 1 - Quality Controlled Data"
+            
+            # update the history attribute
+            try:
+                hist = ds.history + "\n"
+                
+            except AttributeError:
+                hist = ""
+            
+            ds.setncattr('history', hist + datetime.utcnow().strftime("%Y-%m-%d") + ': in water test performed, with out of water data flagged at QC=7')        
+                    
+
+            ds.close()
\ No newline at end of file
diff --git a/ocean_dp/qc/temp_diff_hist_glob b/ocean_dp/qc/temp_diff_hist_glob
new file mode 100755
index 0000000..a87ca72
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_hist_glob
@@ -0,0 +1,121 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+import glob
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    if ('Pulse' in x) or ('SOFS' in x):
+        
+        deployments.append(x)
+        
+
+fv01_files = glob.glob("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data/*/*/*FV01*.nc")
+
+
+fig, ax = plt.subplots(4,4,sharex='all', sharey='all')
+
+ax=ax.flatten()
+
+
+for current_deployment, plt_idx in zip(deployments, range(0,16)):
+    
+    
+    
+    for fname in files:
+      
+        if fname.find(current_deployment) and fname.endswith('.nc') and 'FV01' in fname:
+        
+            print(fname)  #Here, the wanted file name is printed
+
+            nc = Dataset(os.path.join(root,fname), mode = 'r')
+            
+            if 'TEMP_quality_control' in list(nc.variables) and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                
+                # Calculate temperature changes
+                nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+                
+                # Extract the time data
+                nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+            
+                # Convert from days to hours
+                nc_time_hr = nc_time*24
+                
+                # Calculate time changes
+                nc_time_hr_diffs = np.diff(nc_time_hr)
+                
+                # Calculate the rate of change of temperature wrt time
+                nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+                
+                # Add the results for this netcdf to the record for all files
+                #all_dtemp_dtime = np.concatenate((all_dtemp_dtime,nc_dtemp_dtime))
+                
+                #all_dtemp_dtime_deps += ([nc.deployment_code] * len(nc_dtemp_dtime))
+                
+                #netcdffiles.append(fname)
+                
+                #mins.append(np.amin(nc_dtemp_dtime))
+                
+                #maxs.append(np.amax(nc_dtemp_dtime))
+                
+                
+            
+            nc.close()
+            
+    ax[plt_idx].hist(nc_dtemp_dtime,100,log=True)
+    
+    ax[plt_idx].set_ylim(bottom=0.1,top=10E5)
+
+    ax[plt_idx].set_xlim(left=-500, right=500)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/temp_diff_hist_glob.py b/ocean_dp/qc/temp_diff_hist_glob.py
new file mode 100755
index 0000000..1e63d14
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_hist_glob.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+import glob
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    if ('Pulse' in x) or ('SOFS' in x):
+        
+        deployments.append(x)
+        
+
+deployment_dtemp_dtime = np.array([])
+
+fv01_files = glob.glob("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data/*/*/*FV01*.nc")
+
+
+fig, ax = plt.subplots(4,4,sharex='all', sharey='all')
+
+ax=ax.flatten()
+
+
+for current_deployment, plt_idx in zip(deployments, range(0,len(deployments))):
+    
+    print(current_deployment + 'files')
+    
+    for fname in fv01_files:
+      
+        if current_deployment in fname:
+        
+            #print(fname + ' contains ' + current_deployment)  #Here, the wanted file name is printed
+
+            nc = Dataset(fname, mode = 'r')
+            
+            if 'TEMP_quality_control' in list(nc.variables) and np.array(nc.variables['TEMP'][:]).ndim == 1:
+                
+                print(fname)
+                
+                # Calculate temperature changes
+                nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+                
+                # Extract the time data
+                nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+            
+                # Convert from days to hours
+                nc_time_hr = nc_time*24
+                
+                # Calculate time changes
+                nc_time_hr_diffs = np.diff(nc_time_hr)
+                
+                # Calculate the rate of change of temperature wrt time
+                nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+                
+                # Add the results for this netcdf to the record for the deployment
+                deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+                
+            nc.close()
+            
+    print('plotting '+ str(len(deployment_dtemp_dtime)) + ' values')
+            
+    ax[plt_idx].hist(deployment_dtemp_dtime,100)
+    
+    #ax[plt_idx].set_ylim(bottom=0.1,top=10E5)
+
+    #ax[plt_idx].set_xlim(left=-10, right=10)
+    
+    deployment_dtemp_dtime = np.array([])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/temp_diff_histograms.py b/ocean_dp/qc/temp_diff_histograms.py
new file mode 100755
index 0000000..a3f8420
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_histograms.py
@@ -0,0 +1,335 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    if ('Pulse' in x) or ('SOFS' in x):
+        
+        deployments.append(x)
+
+
+
+# check for in water test in history of netcdf file, if not perform the test
+
+netcdffiles = []
+
+mins=[]
+
+maxs=[]
+
+all_dtemp_dtime = np.array([])
+
+all_dtemp_dtime_deps = []
+
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    for fname in files:
+      
+        if fname.endswith('.nc') and 'FV01' in fname:
+        
+            print(fname)  #Here, the wanted file name is printed
+
+      
+            nc = Dataset(os.path.join(root,fname), mode = 'r')
+            
+            if 'TEMP_quality_control' in list(nc.variables) and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                
+                # Calculate temperature changes
+                nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+                
+                # Extract the time data
+                nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+            
+                # Convert from days to hours
+                nc_time_hr = nc_time*24
+                
+                # Calculate time changes
+                nc_time_hr_diffs = np.diff(nc_time_hr)
+                
+                # Calculate the rate of change of temperature wrt time
+                nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+                
+                # Add the results for this netcdf to the record for all files
+                all_dtemp_dtime = np.concatenate((all_dtemp_dtime,nc_dtemp_dtime))
+                
+                all_dtemp_dtime_deps += ([nc.deployment_code] * len(nc_dtemp_dtime))
+                
+                netcdffiles.append(fname)
+                
+                mins.append(np.amin(nc_dtemp_dtime))
+                
+                maxs.append(np.amax(nc_dtemp_dtime))
+            
+            nc.close()
+
+
+fig, ax = plt.subplots()
+
+bins = np.linspace(-450,450,901)
+
+line_thick = 0.5
+
+counts,bins,bars = ax.hist(all_dtemp_dtime,bins,log=True)                   
+
+ax.axvline(x=3*np.std(all_dtemp_dtime),color='r',linewidth=line_thick) 
+
+ax.axvline(x=-3*np.std(all_dtemp_dtime),color='r',linewidth=line_thick) 
+
+ax.set_title('Hourly temp changes from all FV01 files in SOTS-TEMP-Raw_Data')    
+
+label_coords = (0.01, 0.9)
+
+label_method = 'axes fraction'
+
+ax.annotate('~1.84E7 measurements',xy=label_coords, xycoords=label_method)
+
+
+
+def last_four(entry):
+    
+    output = entry[-4::]
+    
+    return output
+
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    if ('Pulse' in x) or ('SOFS' in x):
+        
+        deployments.append(x)
+
+deployments.sort(key=last_four)
+
+
+#######
+
+
+
+
+all_deployment_dtemp_dtime = [None] * len(deployments)
+
+for current_deployment, plt_idx in zip(deployments, range(0,len(deployments))):
+    
+    print('current deployment is '+current_deployment)
+    
+    deployment_dtemp_dtime = np.array([])
+    
+    for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+        for fname in files:
+          
+            if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+            
+                #print(fname)  #Here, the wanted file name is printed
+
+                nc = Dataset(os.path.join(root,fname), mode = 'r')
+                
+                if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                    
+                    time_var = nc.variables["TIME"]
+                    
+                    time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+                
+                    time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+                    
+                    time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+                    
+                    #print('using '+fname)
+                    
+                    temp_extract = np.array(nc.variables['TEMP'][:][(time >= time_deploy) | (time <= time_recovery)])
+                    
+                    # Calculate temperature changes
+                    nc_temp_diffs = np.diff(temp_extract)
+                    
+                    # Extract the time data
+                    nc_time = np.array(nc.variables['TIME'][:][(time >= time_deploy) | (time <= time_recovery)])
+                
+                    # Convert from days to hours
+                    nc_time_hr = nc_time*24
+                    
+                    # Calculate time changes
+                    nc_time_hr_diffs = np.diff(nc_time_hr)
+                    
+                    # Calculate the rate of change of temperature wrt time
+                    nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+                    
+                    # Add the results for this netcdf to the record for the deployment
+                    deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+                    
+                    all_deployment_dtemp_dtime[plt_idx] = deployment_dtemp_dtime
+                
+                nc.close()
+                
+                
+
+
+
+
+
+
+fig, ax = plt.subplots(4,4)
+
+ax=ax.flatten()             
+
+line_thick = 1  
+
+label_coords = (0.6, 0.6)
+label_method = 'axes fraction' 
+                
+for plt_idx,dep_name in zip(range(0,len(deployments)),deployments):            
+
+    print('plotting '+ str((all_deployment_dtemp_dtime[plt_idx])) + ' values')
+            
+    hist_data = ax[plt_idx].hist(all_deployment_dtemp_dtime[plt_idx],21,log=True)
+    
+    ax[plt_idx].set_title(dep_name,fontsize=10) 
+    
+    #ax[plt_idx].axvline(x=3*np.mean(all_deployment_dtemp_dtime[plt_idx]),color='g',linewidth=line_thick)
+    
+    ax[plt_idx].axvline(x=np.mean(all_deployment_dtemp_dtime[plt_idx])+3*np.std(all_deployment_dtemp_dtime[plt_idx]),color='r',linewidth=line_thick) 
+
+    ax[plt_idx].axvline(x=np.mean(all_deployment_dtemp_dtime[plt_idx])-3*np.std(all_deployment_dtemp_dtime[plt_idx]),color='r',linewidth=line_thick) 
+    
+    anno = 'mean = '+str(round(float(np.mean(all_deployment_dtemp_dtime[plt_idx])),sigfigs=3))
+    
+    anno += '\n3SD = ' + str(round(float(3*np.std(all_deployment_dtemp_dtime[plt_idx])),sigfigs=3))
+    
+    anno += '\nsamples = ' + str(len(all_deployment_dtemp_dtime[plt_idx]))
+    
+    ax[plt_idx].annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
+    
+    #ax[plt_idx].set_ylim(bottom=0,top=np.max(hist_data[0]))
+    
+    #ax[plt_idx].set_xlim(left=-450, right=450)      np.linspace(-450,450,901)
+    
+#ax[-1].axis('off')
+    
+all_data = np.concatenate(all_deployment_dtemp_dtime)    
+    
+hist_data = ax[15].hist(all_data,21,log=True)
+
+ax[15].set_title('All data',fontsize=10) 
+
+#ax[plt_idx].axvline(x=3*np.mean(all_deployment_dtemp_dtime[plt_idx]),color='g',linewidth=line_thick)
+
+ax[15].axvline(x=np.mean(all_data)+3*np.std(all_data),color='r',linewidth=line_thick) 
+
+ax[15].axvline(x=np.mean(all_data)-3*np.std(all_data),color='r',linewidth=line_thick) 
+
+anno = 'mean = '+str(round(float(np.mean(all_data)),sigfigs=3))
+
+anno += '\n3SD = ' + str(round(float(3*np.std(all_data)),sigfigs=3))
+
+anno += '\nsamples = ' + str(len(all_data))
+
+ax[15].annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8)
+
+    
+fig.subplots_adjust(left=0.05,right=0.99,bottom=0.1,top=0.9,wspace=0.15,hspace=0.4)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+files = glob.glob('*.nc')
+
+temp_diffs = np.array([])
+
+for current_file in files:
+    
+    nc = Dataset(current_file,mode='r')
+    
+    temp_diffs = np.concatenate((temp_diffs,np.diff(np.array(nc.variables['TEMP'][:]))))
+
+fig, ax = plt.subplots()
+
+ax.hist(temp_diffs,100,log=True)
+
+
+
+
+# use os.walk??? to run in each netcdf folder?? os.scandir()?
+
+
+
+
+
+# sofs75_60m = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4051-60m_END-20190331_C-20200204.nc',mode='r')
+# sofs75_70m = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4052-70m_END-20190331_C-20200204.nc',mode='r')
+# sofs75_75m = Dataset('IMOS_ABOS-SOTS_T_20180801_SOFS_FV00_SOFS-7.5-2018-Starmon-mini-4053-75m_END-20190331_C-20200204.nc',mode='r')
+
+
+# temp_60 = np.array(sofs75_60m.variables['TEMP'][:])
+# temp_70 = np.array(sofs75_70m.variables['TEMP'][:])
+# temp_75 = np.array(sofs75_75m.variables['TEMP'][:])
+
+# label_coords = (0.01, 0.85)
+# label_method = 'axes fraction'
+
+# fig, axs = plt.subplots(3, 1, sharey=True)
+# axs[0].set_title('Temp sensor comparison SOFS7.5')
+
+# axs[0].hist(np.diff(temp_60),bins=100,log=True, histtype='bar', stacked=True)
+# axs[0].set_ylim(bottom=0.1,top=10E5)
+# axs[0].set_xlim(left=-40, right=40)
+# axs[0].annotate('60m',xy=label_coords, xycoords=label_method)
+# axs[0].tick_params(labelbottom=False)
+
+# axs[1].hist(np.diff(temp_70),bins=100,log=True)
+# axs[1].set_ylim(bottom=0.1,top=10E5)
+# axs[1].set_xlim(left=-40, right=40)
+# axs[1].annotate('70m',xy=label_coords, xycoords=label_method)
+# axs[1].tick_params(labelbottom=False)
+
+# axs[2].hist(np.diff(temp_75),bins=100,log=True)
+# axs[2].set_ylim(bottom=0.1,top=10E5)
+# axs[2].set_xlim(left=-40, right=40)
+# axs[2].annotate('75m',xy=label_coords, xycoords=label_method)
+
+
+
+# fig.savefig('test.pdf')
+
+
+
+
+
+
diff --git a/ocean_dp/qc/temp_diff_timeseries_from_fv00.py b/ocean_dp/qc/temp_diff_timeseries_from_fv00.py
new file mode 100755
index 0000000..9a38ccf
--- /dev/null
+++ b/ocean_dp/qc/temp_diff_timeseries_from_fv00.py
@@ -0,0 +1,267 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+
+def last_four(entry):
+    
+    output = entry[-4::]
+    
+    return output
+
+
+def sp_layout(num_in):
+    
+    sp_nums = np.array([1,2,4,6,9,12,16,20,25,30])
+    
+    sp_dict={1:[1,1],2:[2,1],4:[2,2],6:[3,2],9:[3,3],12:[4,3],16:[4,4],20:[5,4],25:[5,5],30:[6,5]}
+    
+    return sp_dict[sp_nums[np.where(num_in<=sp_nums)[0][0]]]
+
+
+
+cmap = colors.ListedColormap(['blue','orange','red'])
+
+boundaries = [0,20,30,2000]
+
+norm = colors.BoundaryNorm(boundaries, cmap.N, clip=True)
+
+
+
+deployments = []
+
+for x in next(os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"))[1]:
+    
+    if (('Pulse' in x) or ('SOFS' in x)):
+
+        
+        deployments.append(x)
+
+deployments.sort(key=last_four)
+
+
+
+
+
+
+for current_deployment in deployments:
+    
+    acceptable_files = []
+    
+    acceptable_depths = []
+    
+    print('current deployment is '+current_deployment)
+    
+    deployment_dtemp_dtime = np.array([])
+    
+    for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+        for fname in files:
+            
+            print('checking '+fname)
+          
+            if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+                
+                print('opening '+fname)
+                
+                nc = Dataset(os.path.join(root,fname), mode = 'r')
+            
+                if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                
+                    acceptable_files.append(os.path.join(root,fname))
+                    
+                    acceptable_depths.append(nc.instrument_nominal_depth)
+                    
+                    print(fname+' accepted')
+                
+                nc.close()
+                
+    
+    acceptable_files = [x for _,x in sorted(zip(acceptable_depths,acceptable_files))]            
+                
+                    
+    fig, ax = plt.subplots(sp_layout(len(acceptable_files))[0],sp_layout(len(acceptable_files))[1],figsize=((12,8)),constrained_layout=True,sharex=True)
+
+    ax=ax.flatten()
+    
+    fig.suptitle(current_deployment, fontsize=14)
+    
+                
+    for fpath,f_idx in zip(acceptable_files, range(0,len(acceptable_files))):      
+            
+        nc = Dataset(fpath, mode = 'r') #mismatching files and folders?? looking for sofs7.5 file in pulse 7 folder!?!?
+            
+        time_var = nc.variables["TIME"]
+        
+        time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+    
+        time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+        
+        time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+        
+        #print('using '+fname)
+        
+        temp_extract = np.array(nc.variables['TEMP'][:][(time > time_deploy) & (time < time_recovery)])
+        
+        # Calculate temperature changes
+        nc_temp_diffs = np.diff(temp_extract)
+        
+        # Extract the time data
+        nc_time = np.array(nc.variables['TIME'][:][(time > time_deploy) & (time < time_recovery)])
+    
+        # Convert from days to hours
+        nc_time_hr = nc_time*24
+        
+        # Calculate time changes
+        nc_time_hr_diffs = np.diff(nc_time_hr)
+        
+        # Calculate the rate of change of temperature wrt time
+        nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+        
+        im=ax[f_idx].scatter(nc_time,temp_extract,s=0.2,c=np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime))),cmap=cmap,norm=norm)
+        
+        im=ax[f_idx].scatter(nc_time[np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))>20],temp_extract[np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))>20],s=0.5,c=np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))[np.concatenate((np.array([0]),np.abs(nc_dtemp_dtime)))>20],cmap=cmap,norm=norm)
+        
+        ax[f_idx].set_title(str(nc.instrument_nominal_depth)+'m',fontsize=10)
+        
+        # Add the results for this netcdf to the record for the deployment
+        deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+        
+        nc.close()
+        
+        if f_idx==0:
+            
+            fig.colorbar(im)
+            
+    for f_idx in range(len(acceptable_files),len(ax)):       
+            
+        ax[f_idx].set_axis_off()
+
+
+##############################################################################
+
+fig, ax = plt.subplots(sp_layout(len(deployments))[0],sp_layout(len(deployments))[1],figsize=((12,8)),constrained_layout=True,sharex=False)
+
+ax=ax.flatten()
+
+for current_deployment,d_idx in zip(deployments,range(0,len(deployments))):
+    
+    acceptable_files = []
+    
+    acceptable_depths = []
+    
+    print('current deployment is '+current_deployment)
+    
+    deployment_dtemp_dtime = np.array([])
+    
+    for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+        for fname in files:
+            
+            print('checking '+fname)
+          
+            if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+                
+                print('opening '+fname)
+                
+                nc = Dataset(os.path.join(root,fname), mode = 'r')
+            
+                if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                
+                    acceptable_files.append(os.path.join(root,fname))
+                    
+                    acceptable_depths.append(nc.instrument_nominal_depth)
+                    
+                    print(fname+' accepted')
+                
+                nc.close()
+                
+    
+    acceptable_files = [x for _,x in sorted(zip(acceptable_depths,acceptable_files))]            
+                    
+                
+    for fpath in acceptable_files:      
+            
+        nc = Dataset(fpath, mode = 'r') #mismatching files and folders?? looking for sofs7.5 file in pulse 7 folder!?!?
+            
+        time_var = nc.variables["TIME"]
+        
+        time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+    
+        time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+        
+        time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+        
+        #print('using '+fname)
+        
+        temp_extract = np.array(nc.variables['TEMP'][:][(time > time_deploy) & (time < time_recovery)])
+        
+        # Calculate temperature changes
+        nc_temp_diffs = np.diff(temp_extract)
+        
+        # Extract the time data
+        nc_time = np.array(nc.variables['TIME'][:][(time > time_deploy) & (time < time_recovery)])
+    
+        # Convert from days to hours
+        nc_time_hr = nc_time*24
+        
+        # Calculate time changes
+        nc_time_hr_diffs = np.diff(nc_time_hr)
+        
+        # Calculate the rate of change of temperature wrt time
+        nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+        
+        # Add the results for this netcdf to the record for the deployment
+        deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+        
+        nc.close()
+        
+    ax[d_idx].hist(deployment_dtemp_dtime,21,log=True)
+    
+    ax[d_idx].set_title(current_deployment,fontsize=10)
+        
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ocean_dp/qc/temp_stat_plot.py b/ocean_dp/qc/temp_stat_plot.py
new file mode 100755
index 0000000..5b31a82
--- /dev/null
+++ b/ocean_dp/qc/temp_stat_plot.py
@@ -0,0 +1,365 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+import pandas as pd
+
+import warnings
+import scipy.stats as st
+import statsmodels as sm
+import matplotlib
+
+
+############################# Data extraction ################################
+
+# creates an empty array to store the names of the SOTS deployments
+deployments = []
+
+checked_files = []
+
+processed_files = []
+
+# loops through all the folders and files contained in the folder
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    # if the folder/file name contains 'Pulse' or 'SOFS' and doesn't contain '.', append it to deployments
+    if (('Pulse' in x) or ('SOFS' in x)) and ('.p' not in x):
+        
+        deployments.append(x)
+    
+# create a dataframe to store extract information
+sots_temp_ensemble = pd.DataFrame(columns = ["dTemp/dtime","dTemp/dSample","QC","Nominal depth","Deployment"])
+
+# loops through all files in the directory
+for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    for fname in files:
+        
+        # append the filename to the list of checked files
+        checked_files.append(fname)
+        
+        # for each netcdf file labelled as FV01 and containing a deployment in its name
+        if fname.endswith('.nc') and 'FV01' in fname and any(ele in fname for ele in deployments):
+        
+            # print the filename
+            print(fname)  
+            
+            # open the file
+            nc = Dataset(os.path.join(root,fname), mode = 'a')
+            
+            # check file contains temperature data
+            if 'TEMP' in list(nc.variables):
+            
+                # check that the in_out_water test has been run on the file, if not run in_out_water code
+                if not 'TEMP_quality_control_io' in list(nc.variables):
+                
+                    # run in_out_water script - uncommented at this point as just copied and pasted
+                    var_name = 'TEMP'
+                    nc_vars = nc.variables
+                    to_add = []
+                    if var_name:
+                        to_add.append(var_name)
+                    else:
+                        for v in nc_vars:
+                            #print (vars[v].dimensions)
+                            if v != 'TIME':
+                                to_add.append(v)
+                
+                    time_var = nc_vars["TIME"]
+                    time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+                
+                    time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+                    time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+                
+                    print('deployment time', time_deploy)
+                
+                    print(to_add)
+                
+                    # create a mask for the time range
+                    mask = (time <= time_deploy) | (time >= time_recovery)
+                
+                    for v in to_add:
+                        if "TIME" in nc_vars[v].dimensions:
+                            if v.endswith("_quality_control"):
+                                print("QC time dim ", v)
+                
+                                ncVarOut = nc_vars[v]
+                                ncVarOut[mask] = 7
+                            else:
+                                # create a qc variable just for this test flags
+                                if v + "_quality_control_io" in nc.variables:
+                                    ncVarOut = nc.variables[v + "_quality_control_io"]
+                                else:
+                                    ncVarOut = nc.createVariable(v + "_quality_control_io", "i1", nc_vars[v].dimensions, fill_value=99, zlib=True)  # fill_value=0 otherwise defaults to max
+                                ncVarOut[:] = np.zeros(nc_vars[v].shape)
+                                ncVarOut.long_name = "quality flag for " + v
+                                ncVarOut.flag_values = np.array([0, 1, 2, 3, 4, 6, 7, 9], dtype=np.int8)
+                                ncVarOut.flag_meanings = 'unknown good_data probably_good_data probably_bad_data bad_data not_deployed interpolated missing_value'
+                
+                                nc_vars[v].ancillary_variables = nc_vars[v].ancillary_variables + " " + v + "_quality_control_io"
+                                ncVarOut[mask] = 7
+                        
+                        nc.variables[v + "_quality_control"][:] = np.maximum(nc.variables[v + "_quality_control_io"][:],nc.variables[v + "_quality_control"][:])
+                
+                    nc.file_version = "Level 1 - Quality Controlled Data"
+                
+                
+                
+                # check that the file has a single dimension temperature vector, and that the time format is correct
+                if np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                    
+                    # calculate temperature changes for in water data
+                    nc_temp_diffs = np.diff(np.array(nc.variables['TEMP'][np.array(nc.variables['TEMP_quality_control'][:])!=7]))
+                
+                    # extract the time data
+                    nc_time = np.array(nc.variables['TIME'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+            
+                    # Convert from days to hours
+                    nc_time_hr = nc_time*24
+                    
+                    # Calculate time changes in hours
+                    nc_time_hr_diffs = np.diff(nc_time_hr)
+                    
+                    # calculate the rate of change of temperature wrt time (degrees °C per hour)
+                    nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+                    
+                    
+                    
+                    # extract temp_qc data
+                    nc_temp_qc = np.array(nc.variables['TEMP_quality_control'][np.array(nc.variables['TEMP_quality_control'][:])!=7])
+                    
+                    # calculate qc values for each nc_dtemp_dtime by taking the maximum of the qc values of the two contributing temps
+                    nc_dtemp_dtime_qc = pd.Series(nc_temp_qc).rolling(2).max().dropna().to_numpy()
+                    
+                    
+                    
+                    # extract sensor nominal depth
+                    nc_nom_depth = np.array(nc.variables['NOMINAL_DEPTH'])
+                    
+                    # create a vector the same size as nc_dtemp_dtime with the nominal depth
+                    nc_nom_depth_vector = np.repeat(nc_nom_depth,len(nc_dtemp_dtime))
+                    
+                    
+                    
+                    # extract deployment name
+                    nc_deployment = nc.deployment_code
+                    
+                    # create a list the same size as nc_dtemp_dtime with the deployment name
+                    nc_deployment_list = [nc_deployment] * len(nc_dtemp_dtime)
+                    
+                    
+                    
+                    # combine information into an length x 4 dataframe
+                    nc_temp_ensemble = pd.DataFrame({"dTemp/dtime":nc_dtemp_dtime,"dTemp/dSample":nc_temp_diffs,"QC":nc_dtemp_dtime_qc,"Nominal depth":nc_nom_depth_vector,"Deployment":nc_deployment_list})
+                    
+                    # append the current netcdf's dataframe to the sots_temp_ensemble
+                    sots_temp_ensemble = sots_temp_ensemble.append(nc_temp_ensemble)
+                    
+                    # append the filename to the list of processed files
+                    processed_files.append(fname)
+                    
+                    
+            nc.close()
+                    
+                    
+############################# Data processing ################################
+            
+# creates a new dataframe containing only data with QC < 3
+sots_temp_ensemble_qc210 = sots_temp_ensemble[sots_temp_ensemble["QC"]<3]
+
+# calculates overall standard deviation
+std_time_total = np.std(sots_temp_ensemble_qc210["dTemp/dtime"])
+
+
+
+
+# creates an emply list to store data deployment by deployment
+std_by_deployment_data = []
+
+# creates a dict of deployment names and standard deviations
+for i in sots_temp_ensemble_qc210.Deployment.unique():
+    std_by_deployment_data.append(
+        {
+            'Deployment': i,
+            'STD time': np.std(sots_temp_ensemble_qc210["dTemp/dtime"][sots_temp_ensemble_qc210["Deployment"]==i]),
+            'STD sample': np.std(sots_temp_ensemble_qc210["dTemp/dSample"][sots_temp_ensemble_qc210["Deployment"]==i])
+        }
+    )
+
+# creates a Dataframe from the dict
+std_by_deployment = pd.DataFrame(std_by_deployment_data)
+
+
+
+
+
+# =============================================================================
+# std_by_depth: this function takes two compulsary arguments (top: the shallowest 
+# depth(m)), bottom: the deepest depth(m)) and one option argument (deployment_in: 
+# the deployment from which data will be taken). The function will return the standard
+# deviation of the d(Temp)/d(Time) data from sensors with nominal depths at and 
+# between the two depths, and from only the deployment_in if specified.
+#
+# sample call: std_by_depth(500,10000,'SOFS-7.5-2018')
+#
+# this will give the std of all d(Temp)/d(Time) data from SOFS-7.5-2018 from 
+# sensors with 500m <= nominal depth <= 10000m
+# =============================================================================
+
+def std_by_depth_temp(top,bottom,deployment_in=None,rate='time'):
+    
+    selection = ''
+    
+    if rate == 'time':
+        
+        selection = "dTemp/dtime"
+        
+    elif rate == 'sample':
+        
+        selection = "dTemp/dSample"
+        
+    else:
+        
+        return "incorrect rate specification"
+    
+    
+    
+    # if user incorrectly inputs depths, swap them and run the code
+    if top > bottom:
+        
+        top, bottom = bottom, top
+    
+    if deployment_in == None:
+    
+        # subsamples sots_temp_ensemble_qc210 based on depth
+        target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom)]
+        
+    elif isinstance(deployment_in, list):
+        
+        # subsamples sots_temp_ensemble_qc210 based on depth
+        target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom) & (sots_temp_ensemble_qc210.Deployment.isin(deployment_in))]
+        
+        
+    else:   
+    
+        # subsamples sots_temp_ensemble_qc210 based on depth
+        target_ensemble = sots_temp_ensemble_qc210[(sots_temp_ensemble_qc210["Nominal depth"]>=top) & (sots_temp_ensemble_qc210["Nominal depth"]<=bottom) & (sots_temp_ensemble_qc210["Deployment"]==deployment_in)]
+    
+    # if not data is available with the given choices, end the function
+    if len(target_ensemble)==0:
+        
+        return 'No data available for those choices'
+        
+        
+    # calculates the mean of the subsample
+    target_mean = np.mean(target_ensemble[selection])
+            
+    # calculates the standard deviation of the subsample
+    target_std = np.std(target_ensemble[selection])
+        
+    # sets line thickness for plot
+    line_thick = 1
+    
+    # creates axes for histogram
+    ax_hist=plt.axes()
+    
+    # plots a histogram of the data selected
+    target_ensemble.hist(column=selection,bins=100,log=True,ax=ax_hist)
+    
+    # draws lines at the mean +- 3 STD on the histogram
+    ax_hist.axvline(x=target_mean+3*target_std,color='r',linewidth=line_thick) 
+
+    ax_hist.axvline(x=target_mean-3*target_std,color='r',linewidth=line_thick) 
+    
+    # sets the x label
+    if rate == 'time':
+        
+         ax_hist.set_xlabel('°C/hr')
+        
+    elif rate == 'sample':
+        
+        ax_hist.set_xlabel('°C/sample')
+    
+    
+    label_coords = (0.70, 0.99)
+    label_method = 'axes fraction' 
+    
+    anno = 'mean = '+str(round(float(target_mean),sigfigs=3))
+    
+    anno += '\n3 STD = ' + str(round(float(3*target_std),sigfigs=3))
+    
+    anno += '\nno. samples = ' + str(len(target_ensemble))
+    
+    anno += '\n'+str(top)+'m <= depth <= '+str(bottom)+'m'
+    
+    if deployment_in == None:
+        
+        anno += '\nall available data'
+        
+    elif isinstance(deployment_in, list):
+        
+        anno += '\n'
+        
+        anno += '\n'.join(deployment_in)
+        
+    else:
+        
+        anno += '\n'+deployment_in
+            
+    ax_hist.annotate(anno,xy=label_coords, xycoords=label_method,fontsize=8,va = "top", ha="left")
+    
+    # returns the standard deviation of the subsample
+    return target_std
+    
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+            
+            
+            
\ No newline at end of file
diff --git a/ocean_dp/qc/temp_time_diff_plots.py b/ocean_dp/qc/temp_time_diff_plots.py
new file mode 100755
index 0000000..cf42a13
--- /dev/null
+++ b/ocean_dp/qc/temp_time_diff_plots.py
@@ -0,0 +1,117 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy.ma as ma
+import sys
+from netCDF4 import Dataset, num2date
+from dateutil import parser
+import numpy as np
+import argparse
+import glob
+import pytz
+import os
+import matplotlib.pyplot as plt
+from matplotlib import colors
+from matplotlib.ticker import PercentFormatter
+from sigfig import round
+
+
+
+def last_four(entry):
+    
+    output = entry[-4::]
+    
+    return output
+
+
+deployments = []
+
+for x in os.listdir("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+    if ('Pulse' in x) or ('SOFS' in x):
+        
+        deployments.append(x)
+
+deployments.sort(key=last_four)
+
+
+fig, ax = plt.subplots(4,4)
+
+ax=ax.flatten()  
+
+
+all_deployment_dtemp_dtime = [None] * len(deployments)
+
+for current_deployment, plt_idx in zip(deployments, range(0,len(deployments))):
+    
+    print('current deployment is '+current_deployment)
+    
+    deployment_dtemp_dtime = np.array([])
+    
+    for root, dirs, files in os.walk("/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data"):
+    
+        for fname in files:
+          
+            if current_deployment in fname and fname.endswith('.nc') and 'FV00' in fname:
+            
+                #print(fname)  #Here, the wanted file name is printed
+
+                nc = Dataset(os.path.join(root,fname), mode = 'r')
+                
+                if 'TEMP' in nc.variables and np.array(nc.variables['TEMP'][:]).ndim == 1 and nc.variables['TIME'].getncattr('units') =='days since 1950-01-01 00:00:00 UTC':
+                    
+                    time_var = nc.variables["TIME"]
+                    
+                    time = num2date(time_var[:], units=time_var.units, calendar=time_var.calendar)
+                
+                    time_deploy = parser.parse(nc.time_deployment_start, ignoretz=True)
+                    
+                    time_recovery = parser.parse(nc.time_deployment_end, ignoretz=True)
+                    
+                    #print('using '+fname)
+                    
+                    temp_extract = np.array(nc.variables['TEMP'][:][(time >= time_deploy) | (time <= time_recovery)])
+                    
+                    # Calculate temperature changes
+                    nc_temp_diffs = np.diff(temp_extract)
+                    
+                    # Extract the time data
+                    nc_time = np.array(nc.variables['TIME'][:][(time >= time_deploy) | (time <= time_recovery)])
+                
+                    # Convert from days to hours
+                    nc_time_hr = nc_time*24
+                    
+                    ax[plt_idx].plot(nc_time,temp_extract)
+                    
+                    # Calculate time changes
+                    nc_time_hr_diffs = np.diff(nc_time_hr)
+                    
+                    # Calculate the rate of change of temperature wrt time
+                    nc_dtemp_dtime = np.divide(nc_temp_diffs,nc_time_hr_diffs)
+                    
+                    # Add the results for this netcdf to the record for the deployment
+                    deployment_dtemp_dtime = np.concatenate((deployment_dtemp_dtime,nc_dtemp_dtime))
+                    
+                    all_deployment_dtemp_dtime[plt_idx] = deployment_dtemp_dtime
+                
+                nc.close()
+                
+                
+                
+
+
+
+
+
diff --git a/ocean_dp/sots_processing_runthrough.py b/ocean_dp/sots_processing_runthrough.py
new file mode 100755
index 0000000..f9aee09
--- /dev/null
+++ b/ocean_dp/sots_processing_runthrough.py
@@ -0,0 +1,94 @@
+# Copyright (C) 2020 Ben Weeding
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# Initial package import
+import sys
+import glob
+import fnmatch
+import os
+import time
+
+# Addition of folder containing user defined packages/modules
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/qc')
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/aggregation')
+sys.path.append('/Users/tru050/Documents/GitHub/imos-tools/ocean_dp/processing')
+
+# Import user defined packages
+import add_qc_flags
+import in_out_water
+import copyDataset
+import pressure_interpolator
+
+import global_range
+import rate_of_change_test
+import spike_test
+import flatline_test
+import qc_checker
+
+start = time.time()
+
+# Set the working directory
+#os.chdir('‎/Users/tru050/Desktop/cloudstor/Shared/SOTS-Temp-Raw-Data/SOFS-5-2015')
+
+# Make a list of FV00 filenames
+fv00_files = glob.glob('*IMOS_ABOS-SOTS*FV00*.nc')
+
+# Run add_qc_flags.py and collect FV01 filenames
+fv01_files = add_qc_flags.add_qc(fv00_files)
+
+# Run in_out_water.py
+for ncfile in fv01_files:
+    
+    in_out_water.in_out_water(ncfile,var_name='TEMP')
+
+# Select pressure files using matching = fnmatch.filter(sofs75filesfv01,'*SOTS*P*_2*.nc')
+pres_files = fnmatch.filter(fv01_files,'*IMOS_ABOS-SOTS*P*_2*FV01*.nc')
+
+# Run copyDataset.py
+copyDataset.aggregate(pres_files,'PRES')
+
+# Run pressure_interpolator.py
+fv01_pres_interp_files = pressure_interpolator.pressure_interpolator(netCDFfiles=fv01_files,agg=glob.glob('*IMOS_ABOS-SOTS*Aggregate*.nc')[0])
+
+# delete the defunct FV01 files
+for ncfile in fv01_files:
+    
+    os.remove(ncfile)
+
+# Global range test
+for ncfile in fv01_pres_interp_files:
+    
+    print(ncfile)
+    
+    global_range.global_range(ncfile,'TEMP',40,-2)
+
+# Rate of change
+rate_of_change_test.roc_test_files(fv01_pres_interp_files,'TEMP',3.36)
+
+# Spike
+spike_test.spike_test_files(fv01_pres_interp_files,target_vars_in=['TEMP'])
+
+# Flatline
+flatline_test.flatline_test_files(fv01_pres_interp_files,['TEMP'],window=20)
+
+# Check qc process has worked
+fv01_qc_checked = qc_checker.qc_checker_files(fv01_pres_interp_files,['TEMP'])
+
+end = time.time()
+
+print('time elapsed: '+str(end-start))
+
+
+