diff --git a/metadata/manual_corrections.csv b/metadata/manual_corrections.csv new file mode 100644 index 0000000..c6449e4 --- /dev/null +++ b/metadata/manual_corrections.csv @@ -0,0 +1,62 @@ +start,end,columns,action,comment +"2015-01-01","2015-01-18","sun_diffuse;sun_total","remove" +"2015-01-01","2015-02-18","DNI;DHI","remove","30.1.15 er configfilen med de rigtige korrektionskonstanter til pyranometre sat i drift. Programmet læser ikke configfilen. Det problem er rettet af Lars Kokholm 18/2 2015" +"2015-01-01","2015-03-11 15:04","GHI","remove","11/3-15 kl 15:04:50: korrektionsfaktorerne for globalstrålingen f(CM11) og diffus stråling på vandret med skyggering (CM11) er ført ind i config-filen og cRIO er genstartet." +"2015-05-07 14:30","2015-05-13 19:20","GHI","remove","GHI was taken down for calibration (corresponds to Elsa's log file)" +"2017-09-06 08:45","2017-09-06 10:15","GHI","remove","GHI - Elsa's log states - 6/9 2017. CM11 til globalstråling er flyttet op på trackeren" +"2018-02-07 06:27","2018-02-07 16:00","DHI","remove","DHI pyranometer shows odd behavior (no diffuse on cloudy day which is clearly wrong)" +"2018-02-20 15:30","2018-02-20 16:30","DHI","remove","DHI went to zero" +"2018-03-08 00:00","2018-03-21 23:59","DHI","remove","DHI pyranometer stopped working (seems to be continuously decreasing starting March 8th 2018)" +"2018-03-21","2018-06-21 16:35","DHI","factor:8.34/8.59","DHI pyranometer 128764 (broken) was replaced by 128762 (sensitivity was first updated later)" +"'2015-03-11 00:00","2015-05-13 17:20","GHI","factor:4.98/5.065","Then the new calibration coefficient (4.98) for the GHI pyranometer was put in place on May 13 2015, teh closure equation was much better. Therefore, this coefficient is backfilled for the first five months of the year." +"2018-06-21 12:20","2018-06-21 15:10","DHI","remove","DHI pyranometer (128767) was recalibrated on June 21st 2018" +"2018-05-03 00:00","2018-06-07 11:00","DNI","remove","DNI was out for calibration in May and early June 2018" +"2018-07-20 18:30","2018-07-20 18:45","DNI","remove","Adjustment of the DNI sensor" +"2019-03-25 00:00","2019-05-27 09:00","sun_total;sun_diffuse","remove","Logbook notes that there was dew inside the SPN1." +"2019-05-27 09:00","2025-03-31","sun_total;sun_diffuse","remove","SPN1 was taken down" +"2017-12-21 00:00","2018-01-11 09","sun_total;sun_diffuse","remove","Stuck at high value" +"2018-05-03 00:00","2018-05-23","weather","remove","The weather station was down for calibration." +"2018-11-06 09:30","2018-11-06 12:00","weather","remove","Weather station down for check of rain sensor - rain sensor 'enabled' - Elsa" +"2018-05-23","2018-05-23 07:30","weather","remove","Weather station returned from calibration and had weird values" +"2018-08-13","2018-11-06","precipitation","remove","Rain sensor had been disabled in its configuration" +"2021-01-04 11:48","2021-01-27 16:15","DNI","remove","Instrument calibration" +"2021-01-04 12:46","2021-01-28 11:34","DHI","remove","Instrument calibration" +"2021-01-28 11:25","2021-02-22 17:25","GHI","remove","Instrument calibration" +"2023-01-25 14:56","2023-02-03 11:13","LWD","remove","Pyrgeometer was taken down for maitenance, specifically replacement of the bubble level. Dates correspond to log book entries" +"2023-03-28 04:00","2023-03-28 08:38","GHI","remove","Strange behavior of GHI pyranometer (measured much too low on clearsky day)" +"2024-03-07 13:47","2024-03-07 14:40","DNI","remove","DNI issue - check log book for other instruments?" +"2024-04-24 08:53","2024-04-24 14:24","GHI;DHI;DNI;LWD","remove","Change tracker location" +"2024-05-11 00:00","2024-05-16 23:59","GHI","remove","Humidity issues in GHI sensors" +"2024-08-17 05:00","2024-08-17 06:40","GHI;DHI;DNI","remove","Unknown deviation between GHI/GHI_calc - probably one sensor has dew" +"2024-09-19 05:00","2024-09-19 07:00","GHI;DHI;DNI","remove","DNI measurement too low - probably dew - clear-sky day" +"2024-09-21 05:00","2024-09-21 07:05","GHI;DHI;DNI","remove","DNI measurement too low - probably dew - clear-sky day" +"2015-01-01 00:00","2017-09-20 23:59","LWD","remove","Pyrgeometer measuring LWD was first correctly installed in September 2017" +"2017-11-06 18:00","2017-11-07 06:00","LWD","remove","sharp drop to negative valeus" +"2017-11-13 18:00","2017-11-14 06:00","LWD","remove","sharp drop to negative valeus" +"2017-10-31 05:36","2017-10-31 05:36","LWD","remove","sharp drop to negative valeus" +"2018-03-21 13:15","2018-03-21 13:32","LWD","remove","sharp increase" +"2018-11-07 00:00","2018-11-15 13:32","LWD","remove","sharp increase" +"2021-01-04 00:00","2021-02-23 00:00","LWD","remove","Out of service due to calibration" +"2025-03-27 12:00","2025-03-31 23:59","LWD","remove","Sensor was disconnected" +"2024-01-31 13:36","2024-01-31 13:37","LWD","remove","sharp increase" +"2024-03-07 14:37","2024-03-07 14:40","LWD","remove","sharp increase" +"2024-06-17 11:10","2024-06-17 11:26","LWD","remove","sharp increase" +"2024-03-29 17:43","2024-05-22 13:00","LWD","remove","Disconnected sensor cable to the LWD sensor" +"2025-03-27 13:15","2025-03-31 23:59","GHI;DHI;DNI;LWD","remove","Disconnected sensors, start of new time series." +"2021-10-04 18:00","2021-10-16 00:00","DNI","remove","DNI is zero when it should not be" +"2016-04-14 06:15","2016-04-14 07:26","DHI","remove","Unresonable low DHI value, could be due or other obstacle" +"2018-01-07 07:40","2018-01-07 12:00","DHI","remove","Believed to be icing or snow cover of the diffuse sensor" +"2018-01-08 07:40","2018-01-08 12:00","DHI","remove","Believed to be icing or snow cover of the diffuse sensor" +"2017-09-14 11:24","2017-09-14 12:22","DHI","remove","DHI sensor output dropped to zero for about an hour." +"2017-09-15 07:25","2017-09-15 12:40","DHI","remove","DHI sensor output dropped to zero for about an hour." +"2017-09-16 00:00","2017-09-16 13:15","DHI","remove","DHI sensor output is too low" +"2024-02-01 07:49","2024-03-07 15:30","GHI","replace","Ventilator heater was on but the ventilator itself was not powered" +"2024-01-19 00:00","2025-03-31 23:59","Azimuth;Azimuth_adjust;Zenith;Zenith_adjust","remove","Angles returned from the Solys2 were zero or missing for this period." +"2017-02-24 00:00","2017-02-24 12:00","GHI","remove","Likely snow or icing of the pyranometer. GHI has strange signal and is lower than calculated GHI." +"2018-03-20 00:00","2018-03-20 10:00","GHI","remove","Kn is higher than Kt. Perfect clear sky day, so issue must be with GHI which also has a strange profile." +"2018-03-21 00:00","2018-03-21 14:30","DHI","remove","DHI was near zero, unrealistic. Perhas a loose wire." +"2020-11-27 07:30","2020-11-27 10:00","DNI","remove","GHI_calc is lower than GHI and DNI seems to deviate from clear sky profile." +"2020-11-28 08:00","2020-11-28 11:00","GHI","remove","GHI does not follow clear sky profile whereas DHI and DNI does." +"2021-12-24 12:00","2021-12-25 23:59","GHI","remove","GHI is too low for a clear-sky day. Perhaps covered in layer of snow or ice." +"2022-12-16 00:00","2022-12-16 23:59","GHI","remove","GHI is too low for a clear-sky day. Perhaps covered in layer of snow or ice." +"2023-03-02 06:00","2023-03-02 09:00","GHI","remove","GHI has dew strikes." diff --git a/scripts/parse_dtu_climate_station_data.py b/scripts/parse_dtu_climate_station_data.py new file mode 100644 index 0000000..f18542a --- /dev/null +++ b/scripts/parse_dtu_climate_station_data.py @@ -0,0 +1,313 @@ + +import glob +import pandas as pd +import pvlib +import pvanalytics +import os +import numpy as np +import warnings +import matplotlib.pyplot as plt + +data_path = 'C:/Users/arajen/OneDrive - Danmarks Tekniske Universitet/Skrivebord/Climate station data/' + +# %% To do +# When were the partly shaded pyranometers taken down? Make a list +# What is LWD really? Is it the upwelling or? + +# %% + +parameter_dict = { + 'Mod2Ch1': 'LWD', + 'Mod2Ch2': 'DHI_shadow_band', + 'Mod2Ch3': 'DHI', + 'Mod2Ch5': 'DNI', + 'Mod2Ch6': 'GHI', + 'Mod2Ch7': 'GTI_270_90', + 'Mod2Ch8': 'GTI_0_90', + 'Mod2Ch9': 'GTI_90_90', + 'Mod2Ch10': 'GTI_180_90', + 'Mod2Ch11': 'GTI_180_45', + 'RTD0_Pyrheliometer': 'DNI_temperature', + 'RTD2': 'LWD_temperature', +} + +weather_parameters = [ + 'wind_dir_min', 'wind_dir_avg', 'wind_dir_max', + 'wind_speed_min', 'wind_speed_avg', 'wind_speed_max', + 'air_temperature', 'relative_humidity', 'air_pressure', + 'rain_accumulation', 'rain_duration', 'rain_intensity', + 'hail_accumulation', 'hail_duration', 'hail_intensity', +] + + +precipitation_parameters = [ + 'rain_duration', 'rain_intensity', 'rain_accumulation', + 'hail_accumulation', 'hail_duration', 'hail_intensity', +] + +channels_not_in_use = [ + 'Mod2Ch4', # Not in use according to report + 'Mod3Ch1', 'Mod3Ch2', 'Mod3Ch3', 'Mod3Ch4', 'Mod3Ch5', 'Mod3Ch6', 'Mod3Ch7', 'Mod3Ch8', + 'Mod3Ch9', 'Mod3Ch10', 'Mod3Ch11', 'Mod3Ch12', 'Mod3Ch13', 'Mod3Ch14', 'Mod3Ch15', 'Mod3Ch16', + 'Mod2Ch12', 'Mod2Ch13', 'Mod2Ch14', 'Mod2Ch15', 'Mod2Ch16', +] + +half_dome_sensors = [f"Mod1Ch{i}" for i in range(1, 17)] + +irradiance_cols = ['GHI', 'DHI', 'DNI', 'sun_diffuse', 'sun_total'] + +gti_columns = ['GTI_270_90', 'GTI_0_90', 'GTI_90_90', 'GTI_180_90', 'GTI_180_45'] + +# %% Timestamp parsing + + +def date_parser(date_str): + """Parse timestamps in mixed formats.""" + if isinstance(date_str, (int, float)): + return pd.NaT + if date_str[2] == '-': # e.g., 04-01-2014 23:59 + format = '%m-%d-%Y %H:%M' + elif date_str[2] == ':': # e.g., 00:00:01 08/26/2014 + format = '%H:%M:%S %m/%d/%Y' + else: + raise UserWarning(f"Timestamp {date_str} does not match any format") + return pd.to_datetime(date_str, format=format) + + +# %% Sort filenames according to date + +def get_file_date(filename): + return pd.to_datetime(filename[-12:-4], format='%m-%d-%y') + + +filenames = glob.glob(data_path + '*') +filenames = [f for f in filenames if get_file_date(f).year != 2014] +file_dates = sorted([get_file_date(f) for f in filenames]) +filenames = sorted(filenames, key=lambda s: file_dates.index(get_file_date(s))) + +complete_dates = pd.date_range(min(file_dates), max(file_dates), freq='1d') +missing_dates = sorted(set(complete_dates).difference(file_dates)) + +print("Missing days\n", pd.Series(missing_dates).dt.strftime('%Y-%m').value_counts()) + +# %% Read in data +dfs = [] +for f in filenames: + basename = os.path.basename(f) + file_date = get_file_date(f) + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + + dfi = pd.read_csv(f, sep='\t', skip_blank_lines=True, + on_bad_lines='warn', na_values=[-9999]) + + dfi['file_date'] = file_date + + if len(w) == 1: # pd.read_csv engine choice specific + warning_messages = str(w[0].message).strip().split('\n') + warning_lines = [int(warning_messages[0].split()[2].strip(':')) + for m in warning_messages] + print(f"{basename.rstrip('.txt')} Warning lines: {len(warning_lines)}") + if len(w) > 1: + raise ValueError("Multiple warning types") + + dfs.append(dfi) + +# %% +df = pd.concat(dfs, axis=0) +df['Time(utc)'] = df['Time(utc)'].apply(lambda s: date_parser(s)) +df = df.set_index('Time(utc)') +df = df[df.index.notna()] +df = df.rename(columns=parameter_dict) +df.index = df.index.round('min') +# drop 2 instances of duplicated indexes (due to rounding) +df = df[~df.index.duplicated()] +# remove few incorrect time stamps (7 instances found in 2015 files) +df = df[df.index != '1904-01-01'] +# Ensure consistent frequency (add missing timestamps) +df = df.asfreq('1min') + + +# Note from Elsa's logbook: +# 11-5-2018: P1-P16 samt kupler er pillet ned. Dog sidder pyranometeret Mod1Ch7 stadig og måler +# med en helt lukket kuppel. +df.loc['2018-05-11':, half_dome_sensors] = np.nan + +# %% Drop unused/unnecessary columns +df = df.drop(columns=channels_not_in_use) +df = df.drop(columns=half_dome_sensors) +df = df.drop(columns=gti_columns) + +columns_drop = [ + 'wind_dir_min.1', # only available for part of January 2015 + 'Mod1ch16', # incorrect naming for January 2015 + # SOLYS 2 / sun sensor parameters + 'Q1_Sun_intensity', 'Q2_Sun_intensity', 'Q3_Sun_intensity', 'Q4_Sun_intensity', + 'Solys_instrument_status', 'Total_Sun_intensity', 'Function', + # Temperature parameters + 'RTD1', # seems to be just noise + 'RTD3', # value always stuck at 310.8 + 'RTD3_Cabinet', # values seem to be incorrect +] + +df = df.drop(columns=columns_drop) + +# %% Alternative NAN values + +nan_1290_parameters = ['sun_total', 'sun_diffuse', 'DNI_temperature', 'LWD_temperature'] + +for p in nan_1290_parameters: + df[p] = df[p].replace(-1290, np.nan) + +# %% Calculate solar position +location = pvlib.location.Location(latitude=55.7906, longitude=12.5251) +solpos = location.get_solarposition(df.index) + +df['GHI_calc'] = ( + df['DHI'].clip(lower=0) + + (df['DNI'] * np.cos(np.deg2rad(solpos['apparent_zenith']))).clip(lower=0)) + +df['dni_extra'] = pvlib.irradiance.get_extra_radiation(df.index) +df['ghi_extra'] = df['dni_extra'] * (np.cos(np.deg2rad(solpos['apparent_zenith']))).clip(lower=0) + +# %% Copy dataframe + +df_qc = df.copy() + +# %% Apply manual corrections + +# Handle offset +# Calculate missing component + +qc_path = 'C:/github/dtu_solar_station/metadata/' +qc_filename = 'manual_corrections.csv' + + +def read_qc_file(filename): + """Read manual corrections file.""" + qci = pd.read_csv(filename, comment='#') + qci['start'] = pd.to_datetime(qci['start'], format='mixed') + qci['end'] = pd.to_datetime(qci['end'], format='mixed') + return qci + + +filename = qc_path + qc_filename +qc = read_qc_file(filename) + +for _, row in qc.iterrows(): + columns = row['columns'].split(';') + if ',' in row['columns']: + raise ValueError(f"Incorrectly formatted 'columns': {row['columns']}") + if row['start'] > row['end']: + raise ValueError(f"Start ({row['start']}) cannot be after end ({row['end']})") + # Apply same action to the pyrheliometer body temperature as the output signal + columns = columns + ['DNI_temperature'] if 'DNI' in columns else columns + columns = columns + ['LWD_temperature'] if 'LWD' in columns else columns + + if row['action'] == 'remove': + df_qc.loc[row['start']: row['end'], columns] = np.nan + + +# These +df_qc.loc['2018-03-21 00:00': '2018-06-21 16:35', 'DHI'] = 8.34/8.59 * \ + df_qc.loc['2018-03-21 00:00': '2018-06-21 16:35', 'DHI'] + +df_qc.loc['2015-03-11 00:00': '2015-05-13 17:20', 'GHI'] = 4.98/5.065 * \ + df_qc.loc['2015-03-11 00:00': '2015-05-13 17:20', 'GHI'] + + +# Important to recalculate +df_qc['GHI_calc'] = ( + df_qc['DHI'].clip(lower=0) + + (df_qc['DNI'] * np.cos(np.deg2rad(solpos['apparent_zenith']))).clip(lower=0)) + +# df_qc.to_pickle('quality_controlled_dtu_data.pkl') + +# %% Export to Task 16 format + +meta = { + 'station_code': 'LYN', + 'latitude': '55.79065', + 'longitude': '12.52509', + 'altitude': '40', + 'timezone': '1', +} + +df_t16 = df_qc.copy() + +df_t16 = df_t16.rename(columns={'DHI': 'DIF'}) + +full_index = pd.date_range( + start=df_t16.index.min().round('min').replace(month=1, day=1), + end=df_t16.index.max().round('min').replace(month=12, day=31, hour=23, minute=59), + freq='1min') + +df_t16 = df_t16.reindex(full_index) + +# %% +# create individual columns containing the year, month,day, hour, and minute +# based on the dataframe index +df_t16['Year'] = df_t16.index.year +df_t16['Month'] = df_t16.index.month +df_t16['Day'] = df_t16.index.day +df_t16['Hour'] = df_t16.index.hour +df_t16['Minute'] = df_t16.index.minute + + +empty_cols = [ + 'GHIcalc', 'Elev', 'Azim', 'Kc', 'usable', 'flagPPLDIF', 'flagERLDIF', + 'flagPPLDNI', 'flagERLDNI', 'flagPPLGHI', 'flagERLGHI', 'flag3highSZA', + 'flag3lowSZA', 'flagKt', 'flagKKt', 'flagKn', 'flagKnKt', 'flagKhighSZA', + 'flagKlowSZA', 'flagTracker', 'flagManual'] + +df_t16[empty_cols] = np.nan + +columns_in_specific_order = [ + 'Year', 'Month', 'Day', 'Hour', 'Minute', + 'GHI', 'DNI', 'DIF', 'GHIcalc', + 'Elev', 'Azim', 'Kc', 'usable', + 'flagPPLDIF', 'flagERLDIF', 'flagPPLDNI', 'flagERLDNI', + 'flagPPLGHI', 'flagERLGHI', 'flag3highSZA', 'flag3lowSZA', + 'flagKt', 'flagKKt', 'flagKn', 'flagKnKt', + 'flagKhighSZA', 'flagKlowSZA', 'flagTracker', 'flagManual' +] + +df_t16 = df_t16[columns_in_specific_order] + +# %% Generate header lines + +header = [ + f"# stationcode {meta['station_code']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + f"# latitude deg N {meta['latitude']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + f"# longitude deg E {meta['longitude']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + f"# altitude in m amsl {meta['altitude']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + f"# timezone offset from UTC in hours {meta['timezone']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# time stamp (Year Month Day Hour Minute) reference is in UTC and refers to the end of the period,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# missing data points in GHI DNI and DIF are noted with -999,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# GHI is the global horizontal irradiance in W/m2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# DNI is the direct normal irradiance in W/m2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# DIF is the diffuse horizontal irradiance in W/m2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# GHIcalc is the calculated GHI from DNI and DIF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# Elev is the solar elevation in deg,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# Azim is the solar azimuth angle in deg N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# Kc is the clearsky index calculated with CAMS mcclear with GHIcalc/GHI McClear,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# usable is the validity of a data point with 1 being valid and 0 being not usable,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", + "# flag values from various tests: 1 means the data failed the test. 0 means the test was passed. -999 means the test domain was not met,,,,,,,,,,,,,,,,,,,,,,,,,,,,,", +] + +header = '\n'.join(header) + '\n' + + +# %% Save the data in files based on the operation year + +output_path = 'C:/users/arajen/downloads/' + +for year, group in df_t16.groupby('Year'): + # Open the file for writing + with open(output_path + f"{meta['station_code']}_{year}.csv", "w", newline='') as f: + # Write the header lines at the top of the CSV file + f.write(header) + # Write the DataFrame content to the CSV file + group.to_csv(f, index=False, header=True) + print(f"Saved {meta['station_code']}_{year}.csv")