Skip to content

Commit c1fe325

Browse files
authored
Merge pull request #190 from florisvb/fix/issue-173-polydiff-variable-step-nan
Fix polydiff for variable step size and missing data (issue #173)
2 parents 49631ee + 21db7f4 commit c1fe325

5 files changed

Lines changed: 60 additions & 41 deletions

File tree

pynumdiff/kalman_smooth.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ def rtsdiff(x, dt_or_t, order, log_qr_ratio, forwardbackward):
111111
"""Perform Rauch-Tung-Striebel smoothing with a naive constant derivative model. Makes use of :code:`kalman_filter`
112112
and :code:`rts_smooth`, which are made public. :code:`constant_X` methods in this module call this function.
113113
114-
:param np.array[float] x: data series to differentiate
114+
:param np.array[float] x: data series to differentiate. May contain NaN values (missing data); NaNs are excluded from
115+
fitting and imputed by dynamical model evolution.
115116
:param float or array[float] dt_or_t: This function supports variable step size. This parameter is either the constant
116117
step size if given as a single float, or data locations if given as an array of same length as :code:`x`.
117118
:param int order: which derivative to stabilize in the constant-derivative model

pynumdiff/polynomial_fit.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,14 @@ def splinediff(x, dt_or_t, params=None, options=None, degree=3, s=None, num_iter
4747
return x_hat, dxdt_hat
4848

4949

50-
def polydiff(x, dt, params=None, options=None, degree=None, window_size=None, step_size=1,
50+
def polydiff(x, dt_or_t, params=None, options=None, degree=None, window_size=None, step_size=1,
5151
kernel='friedrichs'):
5252
"""Fit polynomials to the data, and differentiate the polynomials.
5353
54-
:param np.array[float] x: data to differentiate
55-
:param float dt: step size
54+
:param np.array[float] x: data to differentiate. May contain NaN values (missing data); NaNs are excluded from
55+
fitting and imputed by polynomial interpolation.
56+
:param float or array[float] dt_or_t: This function supports variable step size. This parameter is either the constant
57+
:math:`\\Delta t` if given as a single float, or data locations if given as an array of same length as :code:`x`.
5658
:param list[int] params: (**deprecated**, prefer :code:`degree` and :code:`window_size`)
5759
:param dict options: (**deprecated**, prefer :code:`step_size` and :code:`kernel`)
5860
a dictionary consisting of {'sliding': (bool), 'step_size': (int), 'kernel_name': (str)}
@@ -82,22 +84,23 @@ def polydiff(x, dt, params=None, options=None, degree=None, window_size=None, st
8284
window_size += 1
8385
warn("Kernel window size should be odd. Added 1 to length.")
8486

85-
def _polydiff(x, dt, degree, weights=None):
86-
t = np.arange(len(x))*dt
87+
def _polydiff(x, dt_or_t, degree, weights=None):
88+
t = dt_or_t if not np.isscalar(dt_or_t) else np.arange(len(x)) * dt_or_t # sample locations
89+
mask = ~np.isnan(x) # Filter out any NaN values so polyfit doesn't lose its mind in the event of missing data
90+
if not np.any(mask): warn("Window of all NaNs encountered. `polyfit` will fail. Choose a wider `window_size`?")
8791

88-
r = np.polyfit(t, x, degree, w=weights) # polyfit returns highest order first
92+
r = np.polyfit(t[mask], x[mask], degree, w=weights[mask] if weights is not None else None) # polyfit returns highest order first
8993
dr = np.polyder(r) # power rule already implemented for us
9094

9195
dxdt_hat = np.polyval(dr, t) # evaluate the derivative and original polynomials at points t
9296
x_hat = np.polyval(r, t) # smoothed x
9397

9498
return x_hat, dxdt_hat
9599

96-
if not window_size:
97-
return _polydiff(x, dt, degree)
100+
if not window_size: return _polydiff(x, dt_or_t, degree)
98101

99102
kernel = {'gaussian':utility.gaussian_kernel, 'friedrichs':utility.friedrichs_kernel}[kernel](window_size)
100-
return utility.slide_function(_polydiff, x, dt, kernel, degree, stride=step_size, pass_weights=True)
103+
return utility.slide_function(_polydiff, x, dt_or_t, kernel, degree, stride=step_size, pass_weights=True)
101104

102105

103106
def savgoldiff(x, dt, params=None, options=None, degree=None, window_size=None, smoothing_win=None, axis=0):

pynumdiff/tests/test_diff_methods.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
def iterated_second_order(*args, **kwargs): return second_order(*args, **kwargs)
1414
def iterated_fourth_order(*args, **kwargs): return fourth_order(*args, **kwargs)
1515
def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
16+
def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs)
17+
irreg_list = [spline_irreg_step, polydiff_irreg_step, rbfdiff, rtsdiff] # methods to test with irregular time steps
1618

1719
dt = 0.1
1820
t = np.linspace(0, 3, 31) # sample locations, including the endpoint
@@ -42,6 +44,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
4244
(first_order, {}), (second_order, {}), (fourth_order, {}), # empty dictionary for the case of no parameters
4345
(iterated_second_order, {'num_iterations':5}), (iterated_fourth_order, {'num_iterations':10}),
4446
(polydiff, {'degree':2, 'window_size':3}), (polydiff, [2, 3]),
47+
(polydiff_irreg_step, {'degree':2, 'window_size':3}),
4548
(savgoldiff, {'degree':2, 'window_size':5, 'smoothing_win':5}), (savgoldiff, [2, 5, 5]),
4649
(splinediff, {'degree':5, 's':2}), (splinediff, [5, 2]),
4750
(spline_irreg_step, {'degree':5, 's':2}),
@@ -132,6 +135,12 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
132135
[(-2, -2), (0, 0), (0, -1), (1, 1)],
133136
[(0, 0), (1, 1), (0, -1), (1, 1)],
134137
[(0, 0), (3, 3), (0, 0), (3, 3)]],
138+
polydiff_irreg_step: [[(-14, -15), (-14, -14), (0, -1), (1, 1)],
139+
[(-14, -14), (-13, -13), (0, -1), (1, 1)],
140+
[(-14, -14), (-13, -13), (0, -1), (1, 1)],
141+
[(-2, -2), (0, 0), (0, -1), (1, 1)],
142+
[(0, 0), (1, 1), (0, 0), (1, 1)],
143+
[(0, 0), (3, 3), (0, 0), (3, 3)]],
135144
savgoldiff: [[(-13, -14), (-13, -14), (0, -1), (0, 0)],
136145
[(-13, -13), (-13, -13), (0, -1), (0, 0)],
137146
[(-2, -2), (-1, -1), (0, -1), (0, 0)],
@@ -242,9 +251,9 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
242251
i, latex_name, f, df = test_func_and_deriv
243252

244253
# sample the true function and true derivative, and make noisy samples
245-
x = f(t) if diff_method not in [spline_irreg_step, rbfdiff, rtsdiff] else f(t_irreg)
246-
dxdt = df(t) if diff_method not in [spline_irreg_step, rbfdiff, rtsdiff] else df(t_irreg)
247-
_t = dt if diff_method not in [spline_irreg_step, rbfdiff, rtsdiff] else t_irreg
254+
x = f(t) if diff_method not in irreg_list else f(t_irreg)
255+
dxdt = df(t) if diff_method not in irreg_list else df(t_irreg)
256+
_t = dt if diff_method not in irreg_list else t_irreg
248257
x_noisy = x + noise
249258

250259
# differentiate without and with noise, accounting for new and old styles of calling functions
@@ -258,7 +267,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
258267
# plotting code
259268
if request.config.getoption("--plot") and not isinstance(params, list): # Get the plot flag from pytest configuration
260269
fig, axes = request.config.plots[diff_method] # get the appropriate plot, set up by the store_plots fixture in conftest.py
261-
t_ = t_irreg if diff_method in [spline_irreg_step, rtsdiff, rbfdiff] else t
270+
t_ = t_irreg if diff_method in irreg_list else t
262271
axes[i, 0].plot(t_, f(t_))
263272
axes[i, 0].plot(t_, x, 'C0+')
264273
axes[i, 0].plot(t_, x_hat, 'C2.', ms=4)

pynumdiff/tests/test_utils.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from pynumdiff.utils import utility, evaluate
66
from pynumdiff.utils.simulate import sine, triangle, pop_dyn, linear_autonomous, pi_cruise_control, lorenz_x
7-
np.random.seed(42) # The answer to life, the universe, and everything
7+
np.random.seed(7)
88

99

1010
def test_integrate_dxdt_hat():
@@ -56,7 +56,8 @@ def test_convolutional_smoother():
5656
def test_peakdet(request):
5757
"""Verify peakdet finds peaks and valleys"""
5858
t = np.arange(0, 10, 0.001)
59-
x = 0.3*np.sin(t) + np.sin(1.3*t) + 0.9*np.sin(4.2*t) + 0.02*np.random.randn(10000)
59+
x = 0.3*np.sin(t) + np.sin(1.3*t) + 0.9*np.sin(4.2*t) + \
60+
0.02*np.random.RandomState(42).randn(10000) # isolated source of randomness so test order doesn't affect results
6061
maxtab, mintab = utility.peakdet(x, 0.5, t)
6162

6263
if request.config.getoption("--plot"):
@@ -66,30 +67,33 @@ def test_peakdet(request):
6667
pyplot.title('peakdet validataion')
6768
pyplot.show()
6869

69-
assert np.allclose(maxtab, [[0.475, 1.58696894], # these numbers validated by eye with --plot
70-
[1.813, 1.91418201],
71-
[3.311, -0.02749755],
72-
[4.971, 0.74687989],
73-
[6.333, 1.89776084],
74-
[7.76, 0.57366611],
75-
[9.397, 0.59379866]])
76-
assert np.allclose(mintab, [[1.134, 0.31086976],
77-
[2.747, -1.13032479],
78-
[4.093, -2.00466846],
79-
[5.502, -0.31428495],
80-
[7.206, -0.5993835],
81-
[8.607,-1.71266074]])
70+
assert np.allclose(maxtab, [[0.478, 1.59725055], # these numbers validated by eye with --plot
71+
[1.8, 1.91003085],
72+
[3.319, -0.04597348],
73+
[4.997, 0.74477798],
74+
[6.35, 1.89578662],
75+
[7.783, 0.57274039],
76+
[9.429, 0.58636224]])
77+
assert np.allclose(mintab, [[1.101, 0.30335672],
78+
[2.744, -1.12418367],
79+
[4.077, -2.00297377],
80+
[5.587, -0.31253041],
81+
[7.14, -0.58622913],
82+
[8.608, -1.71228973]])
8283

8384
def test_slide_function():
8485
"""Verify the slide function's weighting scheme calculates as expected"""
85-
def identity(x, dt): return x, 0 # should come back the same
86+
def identity(x, dt_or_t): return x, 0 # should come back the same
8687

87-
x = np.arange(100)
88+
x = np.arange(100, dtype=float)
8889
kernel = utility.gaussian_kernel(9)
8990

90-
x_hat, dxdt_hat = utility.slide_function(identity, x, 0.1, kernel, stride=2)
91+
x_hat_dt, _ = utility.slide_function(identity, x, 0.1, kernel, stride=2)
92+
assert np.allclose(x, x_hat_dt)
9193

92-
assert np.allclose(x, x_hat)
94+
# time array: func receives a kernel-length slice of times instead of scalar dt; identity still returns x unchanged
95+
x_hat_t, _ = utility.slide_function(identity, x, np.linspace(0, 10, 100, endpoint=False), kernel, stride=2)
96+
assert np.allclose(x, x_hat_t)
9397

9498

9599
def test_simulations(request):

pynumdiff/utils/utility.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,24 +114,26 @@ def convolutional_smoother(x, kernel, num_iterations=1, axis=0):
114114
return x_hat
115115

116116

117-
def slide_function(func, x, dt, kernel, *args, stride=1, pass_weights=False, **kwargs):
117+
def slide_function(func, x, dt_or_t, kernel, *args, stride=1, pass_weights=False, **kwargs):
118118
"""Slide a smoothing derivative function across a timeseries with specified window size, and
119119
combine the results according to kernel weights.
120120
121121
:param callable func: name of the function to slide
122122
:param np.array[float] x: data to differentiate
123-
:param float dt: step size
123+
:param float or np.array[float] dt_or_t: constant step size (scalar) or array of sample locations (same length as x).
124+
When given as an array, the actual time values for each window are passed to :code:`func`, enabling nonuniform spacing.
124125
:param np.array[float] kernel: values to weight the sliding window
125-
:param list args: passed to func
126-
:param int stride: step size for slide (e.g. 1 means slide by 1 step)
126+
:param list args: passed to :code:`func`
127+
:param int stride: step size for slide (e.g. 1 means slide by 1 index location)
127128
:param bool pass_weights: whether weights should be passed to func via update to kwargs
128-
:param dict kwargs: passed to func
129+
:param dict kwargs: passed to :code:`func`
129130
130131
:return: - **x_hat** -- estimated (smoothed) x
131132
- **dxdt_hat** -- estimated derivative of x
132133
"""
133134
if len(kernel) % 2 == 0: raise ValueError("Kernel window size should be odd.")
134135
half_window_size = (len(kernel) - 1)//2 # int because len(kernel) is always odd
136+
equispaced = np.isscalar(dt_or_t)
135137

136138
x_hat = np.zeros(x.shape)
137139
dxdt_hat = np.zeros(x.shape)
@@ -141,7 +143,7 @@ def slide_function(func, x, dt, kernel, *args, stride=1, pass_weights=False, **k
141143
# find where to index data and kernel, taking care at edges
142144
start = max(0, midpoint - half_window_size)
143145
end = min(len(x), midpoint + half_window_size + 1) # +1 because slicing is exclusive of end
144-
window = slice(start, end)
146+
window = slice(start, end) # This is in terms of indices, not true time in the event of nonuniform spacing
145147

146148
kstart = max(0, half_window_size - midpoint)
147149
kend = kstart + (end - start)
@@ -150,8 +152,8 @@ def slide_function(func, x, dt, kernel, *args, stride=1, pass_weights=False, **k
150152
w = kernel if (end-start) == len(kernel) else kernel[kslice]/np.sum(kernel[kslice])
151153
if pass_weights: kwargs['weights'] = w
152154

153-
# run the function on the window and add weighted results to cumulative answers
154-
x_window_hat, dxdt_window_hat = func(x[window], dt, *args, **kwargs)
155+
# Run the function on the window and add weighted results to cumulative answers. If not equispaced, pass times for window.
156+
x_window_hat, dxdt_window_hat = func(x[window], dt_or_t if equispaced else dt_or_t[window], *args, **kwargs)
155157
x_hat[window] += w * x_window_hat
156158
dxdt_hat[window] += w * dxdt_window_hat
157159
weight_sum[window] += w # save sum of weights for normalization at the end

0 commit comments

Comments
 (0)