Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions metadata/manual_corrections.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
start,end,columns,action,comment
"2015-01-01","2015-01-18","sun_diffuse;sun_total","remove"
"2015-01-01","2015-02-18","DNI;DHI","remove","30.1.15 er configfilen med de rigtige korrektionskonstanter til pyranometre sat i drift. Programmet læser ikke configfilen. Det problem er rettet af Lars Kokholm 18/2 2015"
"2015-01-01","2015-03-11 15:04","GHI","remove","11/3-15 kl 15:04:50: korrektionsfaktorerne for globalstrålingen f(CM11) og diffus stråling på vandret med skyggering (CM11) er ført ind i config-filen og cRIO er genstartet."
"2015-05-07 14:30","2015-05-13 19:20","GHI","remove","GHI was taken down for calibration (corresponds to Elsa's log file)"
"2017-09-06 08:45","2017-09-06 10:15","GHI","remove","GHI - Elsa's log states - 6/9 2017. CM11 til globalstråling er flyttet op på trackeren"
"2018-02-07 06:27","2018-02-07 16:00","DHI","remove","DHI pyranometer shows odd behavior (no diffuse on cloudy day which is clearly wrong)"
"2018-02-20 15:30","2018-02-20 16:30","DHI","remove","DHI went to zero"
"2018-03-08 00:00","2018-03-21 23:59","DHI","remove","DHI pyranometer stopped working (seems to be continuously decreasing starting March 8th 2018)"
"2018-03-21","2018-06-21 16:35","DHI","factor:8.34/8.59","DHI pyranometer 128764 (broken) was replaced by 128762 (sensitivity was first updated later)"
"'2015-03-11 00:00","2015-05-13 17:20","GHI","factor:4.98/5.065","Then the new calibration coefficient (4.98) for the GHI pyranometer was put in place on May 13 2015, teh closure equation was much better. Therefore, this coefficient is backfilled for the first five months of the year."
"2018-06-21 12:20","2018-06-21 15:10","DHI","remove","DHI pyranometer (128767) was recalibrated on June 21st 2018"
"2018-05-03 00:00","2018-06-07 11:00","DNI","remove","DNI was out for calibration in May and early June 2018"
"2018-07-20 18:30","2018-07-20 18:45","DNI","remove","Adjustment of the DNI sensor"
"2019-03-25 00:00","2019-05-27 09:00","sun_total;sun_diffuse","remove","Logbook notes that there was dew inside the SPN1."
"2019-05-27 09:00","2025-03-31","sun_total;sun_diffuse","remove","SPN1 was taken down"
"2017-12-21 00:00","2018-01-11 09","sun_total;sun_diffuse","remove","Stuck at high value"
"2018-05-03 00:00","2018-05-23","weather","remove","The weather station was down for calibration."
"2018-11-06 09:30","2018-11-06 12:00","weather","remove","Weather station down for check of rain sensor - rain sensor 'enabled' - Elsa"
"2018-05-23","2018-05-23 07:30","weather","remove","Weather station returned from calibration and had weird values"
"2018-08-13","2018-11-06","precipitation","remove","Rain sensor had been disabled in its configuration"
"2021-01-04 11:48","2021-01-27 16:15","DNI","remove","Instrument calibration"
"2021-01-04 12:46","2021-01-28 11:34","DHI","remove","Instrument calibration"
"2021-01-28 11:25","2021-02-22 17:25","GHI","remove","Instrument calibration"
"2023-01-25 14:56","2023-02-03 11:13","LWD","remove","Pyrgeometer was taken down for maitenance, specifically replacement of the bubble level. Dates correspond to log book entries"
"2023-03-28 04:00","2023-03-28 08:38","GHI","remove","Strange behavior of GHI pyranometer (measured much too low on clearsky day)"
"2024-03-07 13:47","2024-03-07 14:40","DNI","remove","DNI issue - check log book for other instruments?"
"2024-04-24 08:53","2024-04-24 14:24","GHI;DHI;DNI;LWD","remove","Change tracker location"
"2024-05-11 00:00","2024-05-16 23:59","GHI","remove","Humidity issues in GHI sensors"
"2024-08-17 05:00","2024-08-17 06:40","GHI;DHI;DNI","remove","Unknown deviation between GHI/GHI_calc - probably one sensor has dew"
"2024-09-19 05:00","2024-09-19 07:00","GHI;DHI;DNI","remove","DNI measurement too low - probably dew - clear-sky day"
"2024-09-21 05:00","2024-09-21 07:05","GHI;DHI;DNI","remove","DNI measurement too low - probably dew - clear-sky day"
"2015-01-01 00:00","2017-09-20 23:59","LWD","remove","Pyrgeometer measuring LWD was first correctly installed in September 2017"
"2017-11-06 18:00","2017-11-07 06:00","LWD","remove","sharp drop to negative valeus"
"2017-11-13 18:00","2017-11-14 06:00","LWD","remove","sharp drop to negative valeus"
"2017-10-31 05:36","2017-10-31 05:36","LWD","remove","sharp drop to negative valeus"
"2018-03-21 13:15","2018-03-21 13:32","LWD","remove","sharp increase"
"2018-11-07 00:00","2018-11-15 13:32","LWD","remove","sharp increase"
"2021-01-04 00:00","2021-02-23 00:00","LWD","remove","Out of service due to calibration"
"2025-03-27 12:00","2025-03-31 23:59","LWD","remove","Sensor was disconnected"
"2024-01-31 13:36","2024-01-31 13:37","LWD","remove","sharp increase"
"2024-03-07 14:37","2024-03-07 14:40","LWD","remove","sharp increase"
"2024-06-17 11:10","2024-06-17 11:26","LWD","remove","sharp increase"
"2024-03-29 17:43","2024-05-22 13:00","LWD","remove","Disconnected sensor cable to the LWD sensor"
"2025-03-27 13:15","2025-03-31 23:59","GHI;DHI;DNI;LWD","remove","Disconnected sensors, start of new time series."
"2021-10-04 18:00","2021-10-16 00:00","DNI","remove","DNI is zero when it should not be"
"2016-04-14 06:15","2016-04-14 07:26","DHI","remove","Unresonable low DHI value, could be due or other obstacle"
"2018-01-07 07:40","2018-01-07 12:00","DHI","remove","Believed to be icing or snow cover of the diffuse sensor"
"2018-01-08 07:40","2018-01-08 12:00","DHI","remove","Believed to be icing or snow cover of the diffuse sensor"
"2017-09-14 11:24","2017-09-14 12:22","DHI","remove","DHI sensor output dropped to zero for about an hour."
"2017-09-15 07:25","2017-09-15 12:40","DHI","remove","DHI sensor output dropped to zero for about an hour."
"2017-09-16 00:00","2017-09-16 13:15","DHI","remove","DHI sensor output is too low"
"2024-02-01 07:49","2024-03-07 15:30","GHI","replace","Ventilator heater was on but the ventilator itself was not powered"
"2024-01-19 00:00","2025-03-31 23:59","Azimuth;Azimuth_adjust;Zenith;Zenith_adjust","remove","Angles returned from the Solys2 were zero or missing for this period."
"2017-02-24 00:00","2017-02-24 12:00","GHI","remove","Likely snow or icing of the pyranometer. GHI has strange signal and is lower than calculated GHI."
"2018-03-20 00:00","2018-03-20 10:00","GHI","remove","Kn is higher than Kt. Perfect clear sky day, so issue must be with GHI which also has a strange profile."
"2018-03-21 00:00","2018-03-21 14:30","DHI","remove","DHI was near zero, unrealistic. Perhas a loose wire."
"2020-11-27 07:30","2020-11-27 10:00","DNI","remove","GHI_calc is lower than GHI and DNI seems to deviate from clear sky profile."
"2020-11-28 08:00","2020-11-28 11:00","GHI","remove","GHI does not follow clear sky profile whereas DHI and DNI does."
"2021-12-24 12:00","2021-12-25 23:59","GHI","remove","GHI is too low for a clear-sky day. Perhaps covered in layer of snow or ice."
"2022-12-16 00:00","2022-12-16 23:59","GHI","remove","GHI is too low for a clear-sky day. Perhaps covered in layer of snow or ice."
"2023-03-02 06:00","2023-03-02 09:00","GHI","remove","GHI has dew strikes."
313 changes: 313 additions & 0 deletions scripts/parse_dtu_climate_station_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@

import glob
import pandas as pd
import pvlib
import pvanalytics
import os
import numpy as np
import warnings
import matplotlib.pyplot as plt

data_path = 'C:/Users/arajen/OneDrive - Danmarks Tekniske Universitet/Skrivebord/Climate station data/'

# %% To do
# When were the partly shaded pyranometers taken down? Make a list
# What is LWD really? Is it the upwelling or?

# %%

parameter_dict = {
'Mod2Ch1': 'LWD',
'Mod2Ch2': 'DHI_shadow_band',
'Mod2Ch3': 'DHI',
'Mod2Ch5': 'DNI',
'Mod2Ch6': 'GHI',
'Mod2Ch7': 'GTI_270_90',
'Mod2Ch8': 'GTI_0_90',
'Mod2Ch9': 'GTI_90_90',
'Mod2Ch10': 'GTI_180_90',
'Mod2Ch11': 'GTI_180_45',
'RTD0_Pyrheliometer': 'DNI_temperature',
'RTD2': 'LWD_temperature',
}

weather_parameters = [
'wind_dir_min', 'wind_dir_avg', 'wind_dir_max',
'wind_speed_min', 'wind_speed_avg', 'wind_speed_max',
'air_temperature', 'relative_humidity', 'air_pressure',
'rain_accumulation', 'rain_duration', 'rain_intensity',
'hail_accumulation', 'hail_duration', 'hail_intensity',
]


precipitation_parameters = [
'rain_duration', 'rain_intensity', 'rain_accumulation',
'hail_accumulation', 'hail_duration', 'hail_intensity',
]

channels_not_in_use = [
'Mod2Ch4', # Not in use according to report
'Mod3Ch1', 'Mod3Ch2', 'Mod3Ch3', 'Mod3Ch4', 'Mod3Ch5', 'Mod3Ch6', 'Mod3Ch7', 'Mod3Ch8',
'Mod3Ch9', 'Mod3Ch10', 'Mod3Ch11', 'Mod3Ch12', 'Mod3Ch13', 'Mod3Ch14', 'Mod3Ch15', 'Mod3Ch16',
'Mod2Ch12', 'Mod2Ch13', 'Mod2Ch14', 'Mod2Ch15', 'Mod2Ch16',
]

half_dome_sensors = [f"Mod1Ch{i}" for i in range(1, 17)]

irradiance_cols = ['GHI', 'DHI', 'DNI', 'sun_diffuse', 'sun_total']

gti_columns = ['GTI_270_90', 'GTI_0_90', 'GTI_90_90', 'GTI_180_90', 'GTI_180_45']

# %% Timestamp parsing


def date_parser(date_str):
"""Parse timestamps in mixed formats."""
if isinstance(date_str, (int, float)):
return pd.NaT
if date_str[2] == '-': # e.g., 04-01-2014 23:59
format = '%m-%d-%Y %H:%M'
elif date_str[2] == ':': # e.g., 00:00:01 08/26/2014
format = '%H:%M:%S %m/%d/%Y'
else:
raise UserWarning(f"Timestamp {date_str} does not match any format")
return pd.to_datetime(date_str, format=format)


# %% Sort filenames according to date

def get_file_date(filename):
return pd.to_datetime(filename[-12:-4], format='%m-%d-%y')


filenames = glob.glob(data_path + '*')
filenames = [f for f in filenames if get_file_date(f).year != 2014]
file_dates = sorted([get_file_date(f) for f in filenames])
filenames = sorted(filenames, key=lambda s: file_dates.index(get_file_date(s)))

complete_dates = pd.date_range(min(file_dates), max(file_dates), freq='1d')
missing_dates = sorted(set(complete_dates).difference(file_dates))

print("Missing days\n", pd.Series(missing_dates).dt.strftime('%Y-%m').value_counts())

# %% Read in data
dfs = []
for f in filenames:
basename = os.path.basename(f)
file_date = get_file_date(f)
with warnings.catch_warnings(record=True) as w:
# Cause all warnings to always be triggered.
warnings.simplefilter("always")

dfi = pd.read_csv(f, sep='\t', skip_blank_lines=True,
on_bad_lines='warn', na_values=[-9999])

dfi['file_date'] = file_date

if len(w) == 1: # pd.read_csv engine choice specific
warning_messages = str(w[0].message).strip().split('\n')
warning_lines = [int(warning_messages[0].split()[2].strip(':'))
for m in warning_messages]
print(f"{basename.rstrip('.txt')} Warning lines: {len(warning_lines)}")
if len(w) > 1:
raise ValueError("Multiple warning types")

dfs.append(dfi)

# %%
df = pd.concat(dfs, axis=0)
df['Time(utc)'] = df['Time(utc)'].apply(lambda s: date_parser(s))
df = df.set_index('Time(utc)')
df = df[df.index.notna()]
df = df.rename(columns=parameter_dict)
df.index = df.index.round('min')
# drop 2 instances of duplicated indexes (due to rounding)
df = df[~df.index.duplicated()]
# remove few incorrect time stamps (7 instances found in 2015 files)
df = df[df.index != '1904-01-01']
# Ensure consistent frequency (add missing timestamps)
df = df.asfreq('1min')


# Note from Elsa's logbook:
# 11-5-2018: P1-P16 samt kupler er pillet ned. Dog sidder pyranometeret Mod1Ch7 stadig og måler
# med en helt lukket kuppel.
df.loc['2018-05-11':, half_dome_sensors] = np.nan

# %% Drop unused/unnecessary columns
df = df.drop(columns=channels_not_in_use)
df = df.drop(columns=half_dome_sensors)
df = df.drop(columns=gti_columns)

columns_drop = [
'wind_dir_min.1', # only available for part of January 2015
'Mod1ch16', # incorrect naming for January 2015
# SOLYS 2 / sun sensor parameters
'Q1_Sun_intensity', 'Q2_Sun_intensity', 'Q3_Sun_intensity', 'Q4_Sun_intensity',
'Solys_instrument_status', 'Total_Sun_intensity', 'Function',
# Temperature parameters
'RTD1', # seems to be just noise
'RTD3', # value always stuck at 310.8
'RTD3_Cabinet', # values seem to be incorrect
]

df = df.drop(columns=columns_drop)

# %% Alternative NAN values

nan_1290_parameters = ['sun_total', 'sun_diffuse', 'DNI_temperature', 'LWD_temperature']

for p in nan_1290_parameters:
df[p] = df[p].replace(-1290, np.nan)

# %% Calculate solar position
location = pvlib.location.Location(latitude=55.7906, longitude=12.5251)
solpos = location.get_solarposition(df.index)

df['GHI_calc'] = (
df['DHI'].clip(lower=0)
+ (df['DNI'] * np.cos(np.deg2rad(solpos['apparent_zenith']))).clip(lower=0))

df['dni_extra'] = pvlib.irradiance.get_extra_radiation(df.index)
df['ghi_extra'] = df['dni_extra'] * (np.cos(np.deg2rad(solpos['apparent_zenith']))).clip(lower=0)

# %% Copy dataframe

df_qc = df.copy()

# %% Apply manual corrections

# Handle offset
# Calculate missing component

qc_path = 'C:/github/dtu_solar_station/metadata/'
qc_filename = 'manual_corrections.csv'


def read_qc_file(filename):
"""Read manual corrections file."""
qci = pd.read_csv(filename, comment='#')
qci['start'] = pd.to_datetime(qci['start'], format='mixed')
qci['end'] = pd.to_datetime(qci['end'], format='mixed')
return qci


filename = qc_path + qc_filename
qc = read_qc_file(filename)

for _, row in qc.iterrows():
columns = row['columns'].split(';')
if ',' in row['columns']:
raise ValueError(f"Incorrectly formatted 'columns': {row['columns']}")
if row['start'] > row['end']:
raise ValueError(f"Start ({row['start']}) cannot be after end ({row['end']})")
# Apply same action to the pyrheliometer body temperature as the output signal
columns = columns + ['DNI_temperature'] if 'DNI' in columns else columns
columns = columns + ['LWD_temperature'] if 'LWD' in columns else columns

if row['action'] == 'remove':
df_qc.loc[row['start']: row['end'], columns] = np.nan


# These
df_qc.loc['2018-03-21 00:00': '2018-06-21 16:35', 'DHI'] = 8.34/8.59 * \
df_qc.loc['2018-03-21 00:00': '2018-06-21 16:35', 'DHI']

df_qc.loc['2015-03-11 00:00': '2015-05-13 17:20', 'GHI'] = 4.98/5.065 * \
df_qc.loc['2015-03-11 00:00': '2015-05-13 17:20', 'GHI']


# Important to recalculate
df_qc['GHI_calc'] = (
df_qc['DHI'].clip(lower=0)
+ (df_qc['DNI'] * np.cos(np.deg2rad(solpos['apparent_zenith']))).clip(lower=0))

# df_qc.to_pickle('quality_controlled_dtu_data.pkl')

# %% Export to Task 16 format

meta = {
'station_code': 'LYN',
'latitude': '55.79065',
'longitude': '12.52509',
'altitude': '40',
'timezone': '1',
}

df_t16 = df_qc.copy()

df_t16 = df_t16.rename(columns={'DHI': 'DIF'})

full_index = pd.date_range(
start=df_t16.index.min().round('min').replace(month=1, day=1),
end=df_t16.index.max().round('min').replace(month=12, day=31, hour=23, minute=59),
freq='1min')

df_t16 = df_t16.reindex(full_index)

# %%
# create individual columns containing the year, month,day, hour, and minute
# based on the dataframe index
df_t16['Year'] = df_t16.index.year
df_t16['Month'] = df_t16.index.month
df_t16['Day'] = df_t16.index.day
df_t16['Hour'] = df_t16.index.hour
df_t16['Minute'] = df_t16.index.minute


empty_cols = [
'GHIcalc', 'Elev', 'Azim', 'Kc', 'usable', 'flagPPLDIF', 'flagERLDIF',
'flagPPLDNI', 'flagERLDNI', 'flagPPLGHI', 'flagERLGHI', 'flag3highSZA',
'flag3lowSZA', 'flagKt', 'flagKKt', 'flagKn', 'flagKnKt', 'flagKhighSZA',
'flagKlowSZA', 'flagTracker', 'flagManual']

df_t16[empty_cols] = np.nan

columns_in_specific_order = [
'Year', 'Month', 'Day', 'Hour', 'Minute',
'GHI', 'DNI', 'DIF', 'GHIcalc',
'Elev', 'Azim', 'Kc', 'usable',
'flagPPLDIF', 'flagERLDIF', 'flagPPLDNI', 'flagERLDNI',
'flagPPLGHI', 'flagERLGHI', 'flag3highSZA', 'flag3lowSZA',
'flagKt', 'flagKKt', 'flagKn', 'flagKnKt',
'flagKhighSZA', 'flagKlowSZA', 'flagTracker', 'flagManual'
]

df_t16 = df_t16[columns_in_specific_order]

# %% Generate header lines

header = [
f"# stationcode {meta['station_code']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
f"# latitude deg N {meta['latitude']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
f"# longitude deg E {meta['longitude']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
f"# altitude in m amsl {meta['altitude']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
f"# timezone offset from UTC in hours {meta['timezone']},,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# time stamp (Year Month Day Hour Minute) reference is in UTC and refers to the end of the period,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# missing data points in GHI DNI and DIF are noted with -999,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# GHI is the global horizontal irradiance in W/m2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# DNI is the direct normal irradiance in W/m2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# DIF is the diffuse horizontal irradiance in W/m2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# GHIcalc is the calculated GHI from DNI and DIF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# Elev is the solar elevation in deg,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# Azim is the solar azimuth angle in deg N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# Kc is the clearsky index calculated with CAMS mcclear with GHIcalc/GHI McClear,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# usable is the validity of a data point with 1 being valid and 0 being not usable,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
"# flag values from various tests: 1 means the data failed the test. 0 means the test was passed. -999 means the test domain was not met,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
]

header = '\n'.join(header) + '\n'


# %% Save the data in files based on the operation year

output_path = 'C:/users/arajen/downloads/'

for year, group in df_t16.groupby('Year'):
# Open the file for writing
with open(output_path + f"{meta['station_code']}_{year}.csv", "w", newline='') as f:
# Write the header lines at the top of the CSV file
f.write(header)
# Write the DataFrame content to the CSV file
group.to_csv(f, index=False, header=True)
print(f"Saved {meta['station_code']}_{year}.csv")