Skip to content

Commit 589828a

Browse files
committed
copy tidal function from pydsm to vtools
1 parent fa1598d commit 589828a

1 file changed

Lines changed: 305 additions & 0 deletions

File tree

vtools/functions/tidalhl.py

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
import numpy as np
2+
import pandas as pd
3+
import numba
4+
from vtools.functions.filter import cosine_lanczos
5+
6+
__all__ = ["get_tidal_hl", "get_tidal_amplitude", "get_tidal_hl_zerocrossing", "get_tidal_phase_diff"]
7+
8+
def get_smoothed_resampled(
9+
df, cutoff_period="2h", resample_period="1min", interpolate_method="pchip"
10+
):
11+
"""Resample the dataframe (indexed by time) to the regular period of resample_period using the interpolate method
12+
13+
Furthermore the cosine lanczos filter is used with a cutoff_period to smooth the signal to remove high frequency noise
14+
15+
Args:
16+
17+
df (DataFrame): A single column dataframe indexed by datetime
18+
19+
cutoff_period (str, optional): cutoff period for cosine lanczos filter. Defaults to '2h'.
20+
21+
resample_period (str, optional): Resample to regular period. Defaults to '1min'.
22+
23+
interpolate_method (str, optional): interpolation for resampling. Defaults to 'pchip'.
24+
25+
Returns:
26+
27+
DataFrame: smoothed and resampled dataframe indexed by datetime
28+
"""
29+
dfb = df.resample(resample_period).bfill()
30+
df = df.resample(resample_period).interpolate(method=interpolate_method)
31+
df[dfb.iloc[:, 0].isna()] = np.nan
32+
return cosine_lanczos(df, cutoff_period)
33+
34+
35+
@numba.jit(nopython=True)
36+
def lmax(arr):
37+
"""Local maximum: Returns value only when centered on maximum"""
38+
idx = np.argmax(arr)
39+
if idx == len(arr) / 2:
40+
return arr[idx]
41+
else:
42+
return np.NaN
43+
44+
45+
@numba.jit(nopython=True)
46+
def lmin(arr):
47+
"""Local minimum: Returns value only when centered on minimum"""
48+
idx = np.argmin(arr)
49+
if idx == len(arr) / 2:
50+
return arr[idx]
51+
else:
52+
return np.NaN
53+
54+
55+
def periods_per_window(moving_window_size: str, period_str: str) -> int:
56+
"""Number of period size in moving window
57+
58+
Args:
59+
60+
moving_window_size (str): moving window size as a string e.g 7H for 7 hour
61+
62+
period_str (str): period as str e.g. 1T for 1 min
63+
64+
Returns:
65+
66+
int: number of periods in the moving window rounded to an integer
67+
"""
68+
69+
return int(
70+
pd.Timedelta(moving_window_size)
71+
/ pd.to_timedelta(pd.tseries.frequencies.to_offset(period_str))
72+
)
73+
74+
75+
def tidal_highs(df, moving_window_size="7h"):
76+
"""Tidal highs (could be upto two highs in a 25 hr period)
77+
78+
Args:
79+
80+
df (DataFrame): a time series with a regular frequency
81+
82+
moving_window_size (str, optional): moving window size to look for highs within. Defaults to '7h'.
83+
84+
Returns:
85+
86+
DataFrame: an irregular time series with highs at resolution of df.index
87+
"""
88+
period_str = df.index.freqstr
89+
periods = periods_per_window(moving_window_size, period_str)
90+
dfmax = df.rolling(moving_window_size, min_periods=periods).apply(lmax, raw=True)
91+
dfmax = dfmax.shift(periods=-(periods // 2 - 1))
92+
dfmax = dfmax.dropna()
93+
dfmax.columns = ["max"]
94+
return dfmax
95+
96+
97+
def tidal_lows(df, moving_window_size="7h"):
98+
"""Tidal lows (could be upto two lows in a 25 hr period)
99+
100+
Args:
101+
102+
df (DataFrame): a time series with a regular frequency
103+
104+
moving_window_size (str, optional): moving window size to look for lows within. Defaults to '7h'.
105+
106+
Returns:
107+
108+
DataFrame: an irregular time series with lows at resolution of df.index
109+
"""
110+
period_str = df.index.freqstr
111+
periods = periods_per_window(moving_window_size, period_str)
112+
dfmin = df.rolling(moving_window_size, min_periods=periods).apply(lmin, raw=True)
113+
dfmin = dfmin.shift(periods=-(periods // 2 - 1))
114+
dfmin = dfmin.dropna()
115+
dfmin.columns = ["min"]
116+
return dfmin
117+
118+
119+
def get_tidal_hl(
120+
df,
121+
cutoff_period="2h",
122+
resample_period="1min",
123+
interpolate_method="pchip",
124+
moving_window_size="7h",
125+
):
126+
"""Get Tidal highs and lows
127+
128+
Args:
129+
130+
df (DataFrame): A single column dataframe indexed by datetime
131+
132+
cutoff_period (str, optional): cutoff period for cosine lanczos filter. Defaults to '2h'.
133+
134+
resample_period (str, optional): Resample to regular period. Defaults to '1min'.
135+
136+
interpolate_method (str, optional): interpolation for resampling. Defaults to 'pchip'.
137+
138+
moving_window_size (str, optional): moving window size to look for lows within. Defaults to '7h'.
139+
140+
Returns:
141+
142+
tuple of DataFrame: Tidal high and tidal low time series
143+
"""
144+
dfs = get_smoothed_resampled(df, cutoff_period, resample_period, interpolate_method)
145+
return tidal_highs(dfs), tidal_lows(dfs)
146+
147+
148+
get_tidal_hl_rolling = get_tidal_hl # for older refs. #FIXME
149+
150+
151+
def get_tidal_amplitude(dfh, dfl):
152+
"""Tidal amplitude given tidal highs and lows
153+
154+
Args:
155+
156+
dfh (DataFrame): Tidal highs time series
157+
158+
dfl (DataFrame): Tidal lows time series
159+
160+
Returns:
161+
162+
DataFrame: Amplitude timeseries, at the times of the low following the high being used for amplitude calculation
163+
"""
164+
dfamp = pd.concat([dfh, dfl], axis=1)
165+
dfamp = dfamp[["min"]].dropna().join(dfamp[["max"]].ffill())
166+
return pd.DataFrame(dfamp["max"] - dfamp["min"], columns=["amplitude"])
167+
168+
169+
def get_tidal_amplitude_diff(dfamp1, dfamp2, percent_diff=False, tolerance="4h"):
170+
"""Get the difference of values within +/- 4H of values in the two amplitude arrays
171+
172+
Args:
173+
174+
dfamp1 (DataFrame): Amplitude time series
175+
176+
dfamp2 (DataFrame): Amplitude time series
177+
178+
percent_diff (bool, optional): If true do percent diff. Defaults to False.
179+
180+
Returns:
181+
182+
DataFrame: Difference dfamp1-dfamp2 or % Difference (dfamp1-dfamp2)/dfamp2*100 for values within +/- 4H of each other
183+
"""
184+
dfamp = pd.merge_asof(
185+
dfamp1,
186+
dfamp2,
187+
left_index=True,
188+
right_index=True,
189+
direction="nearest",
190+
tolerance=pd.Timedelta(tolerance),
191+
)
192+
if percent_diff:
193+
dfdiff = 100.0 * (dfamp.iloc[:, 0] - dfamp.iloc[:, 1]) / dfamp.iloc[:, 1]
194+
else:
195+
dfdiff = dfamp.iloc[:, 0] - dfamp.iloc[:, 1]
196+
return pd.DataFrame(dfdiff, columns=["amplitude_diff"])
197+
198+
199+
def get_phase_diff(df1, df2, tolerance="4h"):
200+
df1["time"] = df1.index
201+
df2["time"] = df2.index
202+
df21 = pd.merge_asof(
203+
df2,
204+
df1,
205+
left_index=True,
206+
right_index=True,
207+
direction="nearest",
208+
tolerance=pd.Timedelta(tolerance),
209+
)
210+
return (df21["time_x"] - df21["time_y"]).apply(lambda x: x.total_seconds() / 60)
211+
212+
213+
def get_tidal_phase_diff(dfh2, dfl2, dfh1, dfl1, tolerance="4h"):
214+
"""Calculates the phase difference between df2 and df1 tidal highs and lows
215+
216+
Scans +/- 4 hours in df1 to get the highs and lows in that windows for df2 to
217+
get the tidal highs and lows at the times of df1
218+
219+
220+
Args:
221+
222+
dfh2 (DataFrame): Timeseries of tidal highs
223+
224+
dfl2 (DataFrame): Timeseries of tidal lows
225+
226+
dfh1 (DataFrame): Timeseries of tidal highs
227+
228+
dfl1 (DataFRame): Timeseries of tidal lows
229+
230+
Returns:
231+
232+
DataFrame: Phase difference (dfh2-dfh1) and (dfl2-dfl1) in minutes
233+
"""
234+
high_phase_diff = get_phase_diff(dfh2, dfh1, tolerance)
235+
low_phase_diff = get_phase_diff(dfl2, dfl1, tolerance)
236+
merged_diff = pd.merge(
237+
pd.DataFrame(high_phase_diff, index=dfh1.index),
238+
pd.DataFrame(low_phase_diff, index=dfl1.index),
239+
how="outer",
240+
left_index=True,
241+
right_index=True,
242+
)
243+
return merged_diff.iloc[:, 0].fillna(merged_diff.iloc[:, 1])
244+
245+
246+
def get_tidal_hl_zerocrossing(df, round_to="1min"):
247+
"""
248+
Finds the tidal high and low times using zero crossings of the first derivative.
249+
250+
This works for all situations but is not robust in the face of noise and perturbations in the signal
251+
"""
252+
zc, zi = zerocross(df)
253+
if round_to:
254+
zc = pd.to_datetime(zc).round(round_to)
255+
return zc
256+
257+
258+
def zerocross(df):
259+
"""
260+
Calculates the gradient of the time series and identifies locations where gradient changes sign
261+
Returns the time rounded to nearest minute where the zero crossing happens (based on linear derivative assumption)
262+
"""
263+
diffdfv = pd.Series(np.gradient(df[df.columns[0]].values), index=df.index)
264+
indi = np.where((np.diff(np.sign(diffdfv))) & (diffdfv[1:] != 0))[0]
265+
# Find the zero crossing by linear interpolation
266+
zdb = diffdfv[indi].index
267+
zda = diffdfv[indi + 1].index
268+
x = diffdfv.index
269+
y = diffdfv.values
270+
dx = x[indi + 1] - x[indi]
271+
dy = y[indi + 1] - y[indi]
272+
zc = -y[indi] * (dx / dy) + x[indi]
273+
return zc, indi
274+
275+
276+
##---- FUNCTIONS CACHED BELOW THIS LINE PERHAPS TO USE LATER? ---#
277+
278+
279+
def where_changed(df):
280+
""" """
281+
diff = np.diff(df[df.columns[0]].values)
282+
wdiff = np.where(diff != 0)[0]
283+
wdiff = np.insert(wdiff, 0, 0) # insert the first value i.e. zero index
284+
return df.iloc[wdiff + 1, :]
285+
286+
287+
def where_same(dfg, df):
288+
"""
289+
return dfg only where its value is the same as df for the same time stamps
290+
i.e. the interesection locations with df
291+
"""
292+
dfall = pd.concat([dfg, df], axis=1)
293+
return dfall[dfall.iloc[:, 0] == dfall.iloc[:, 1]].iloc[:, 0]
294+
295+
296+
def limit_to_indices(df, si, ei):
297+
return df[(df.index > si) & (df.index < ei)]
298+
299+
300+
def filter_where_na(df, dfb):
301+
"""
302+
remove values in df where dfb has na values
303+
"""
304+
dfx = dfb.loc[df.index]
305+
return df.loc[dfx.dropna().index, :]

0 commit comments

Comments
 (0)