Merge pull request #190 from florisvb/fix/issue-173-polydiff-variable-step-nan

pavelkomarov · web-flow · commit c1fe325c83c0 · 2026-03-06T14:37:50.000-08:00
Fix polydiff for variable step size and missing data (issue #173)
diff --git a/pynumdiff/kalman_smooth.py b/pynumdiff/kalman_smooth.py
@@ -111,7 +111,8 @@ def rtsdiff(x, dt_or_t, order, log_qr_ratio, forwardbackward):
     """Perform Rauch-Tung-Striebel smoothing with a naive constant derivative model. Makes use of :code:`kalman_filter`
     and :code:`rts_smooth`, which are made public. :code:`constant_X` methods in this module call this function.
 
-    :param np.array[float] x: data series to differentiate
+    :param np.array[float] x: data series to differentiate. May contain NaN values (missing data); NaNs are excluded from
+        fitting and imputed by dynamical model evolution. 
     :param float or array[float] dt_or_t: This function supports variable step size. This parameter is either the constant
         step size if given as a single float, or data locations if given as an array of same length as :code:`x`.
     :param int order: which derivative to stabilize in the constant-derivative model
diff --git a/pynumdiff/polynomial_fit.py b/pynumdiff/polynomial_fit.py
@@ -47,12 +47,14 @@ def splinediff(x, dt_or_t, params=None, options=None, degree=3, s=None, num_iter
     return x_hat, dxdt_hat
 
 
-def polydiff(x, dt, params=None, options=None, degree=None, window_size=None, step_size=1,
+def polydiff(x, dt_or_t, params=None, options=None, degree=None, window_size=None, step_size=1,
     kernel='friedrichs'):
     """Fit polynomials to the data, and differentiate the polynomials.
 
-    :param np.array[float] x: data to differentiate
-    :param float dt: step size
+    :param np.array[float] x: data to differentiate. May contain NaN values (missing data); NaNs are excluded from
+        fitting and imputed by polynomial interpolation.
+    :param float or array[float] dt_or_t: This function supports variable step size. This parameter is either the constant
+        :math:`\\Delta t` if given as a single float, or data locations if given as an array of same length as :code:`x`.
     :param list[int] params: (**deprecated**, prefer :code:`degree` and :code:`window_size`)
     :param dict options: (**deprecated**, prefer :code:`step_size` and :code:`kernel`)
             a dictionary consisting of {'sliding': (bool), 'step_size': (int), 'kernel_name': (str)}
@@ -82,22 +84,23 @@ def polydiff(x, dt, params=None, options=None, degree=None, window_size=None, st
         window_size += 1
         warn("Kernel window size should be odd. Added 1 to length.")
 
-    def _polydiff(x, dt, degree, weights=None):
-        t = np.arange(len(x))*dt
+    def _polydiff(x, dt_or_t, degree, weights=None):
+        t = dt_or_t if not np.isscalar(dt_or_t) else np.arange(len(x)) * dt_or_t # sample locations
+        mask = ~np.isnan(x) # Filter out any NaN values so polyfit doesn't lose its mind in the event of missing data
+        if not np.any(mask): warn("Window of all NaNs encountered. `polyfit` will fail. Choose a wider `window_size`?")
 
-        r = np.polyfit(t, x, degree, w=weights) # polyfit returns highest order first
+        r = np.polyfit(t[mask], x[mask], degree, w=weights[mask] if weights is not None else None) # polyfit returns highest order first
         dr = np.polyder(r) # power rule already implemented for us
 
         dxdt_hat = np.polyval(dr, t) # evaluate the derivative and original polynomials at points t
         x_hat = np.polyval(r, t) # smoothed x
 
         return x_hat, dxdt_hat
 
-    if not window_size:
-        return _polydiff(x, dt, degree)
+    if not window_size: return _polydiff(x, dt_or_t, degree)
 
     kernel = {'gaussian':utility.gaussian_kernel, 'friedrichs':utility.friedrichs_kernel}[kernel](window_size)
-    return utility.slide_function(_polydiff, x, dt, kernel, degree, stride=step_size, pass_weights=True)
+    return utility.slide_function(_polydiff, x, dt_or_t, kernel, degree, stride=step_size, pass_weights=True)
 
 
 def savgoldiff(x, dt, params=None, options=None, degree=None, window_size=None, smoothing_win=None, axis=0):
diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
@@ -13,6 +13,8 @@
 def iterated_second_order(*args, **kwargs): return second_order(*args, **kwargs)
 def iterated_fourth_order(*args, **kwargs): return fourth_order(*args, **kwargs)
 def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
+def polydiff_irreg_step(*args, **kwargs): return polydiff(*args, **kwargs)
+irreg_list = [spline_irreg_step, polydiff_irreg_step, rbfdiff, rtsdiff] # methods to test with irregular time steps
 
 dt = 0.1
 t = np.linspace(0, 3, 31) # sample locations, including the endpoint
@@ -42,6 +44,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
     (first_order, {}), (second_order, {}), (fourth_order, {}), # empty dictionary for the case of no parameters
     (iterated_second_order, {'num_iterations':5}), (iterated_fourth_order, {'num_iterations':10}),
     (polydiff, {'degree':2, 'window_size':3}), (polydiff, [2, 3]),
+    (polydiff_irreg_step, {'degree':2, 'window_size':3}),
     (savgoldiff, {'degree':2, 'window_size':5, 'smoothing_win':5}), (savgoldiff, [2, 5, 5]),
     (splinediff, {'degree':5, 's':2}), (splinediff, [5, 2]),
     (spline_irreg_step, {'degree':5, 's':2}),
@@ -132,6 +135,12 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                [(-2, -2), (0, 0), (0, -1), (1, 1)],
                [(0, 0), (1, 1), (0, -1), (1, 1)],
                [(0, 0), (3, 3), (0, 0), (3, 3)]],
+    polydiff_irreg_step: [[(-14, -15), (-14, -14), (0, -1), (1, 1)],
+                          [(-14, -14), (-13, -13), (0, -1), (1, 1)],
+                          [(-14, -14), (-13, -13), (0, -1), (1, 1)],
+                          [(-2, -2), (0, 0), (0, -1), (1, 1)],
+                          [(0, 0), (1, 1), (0, 0), (1, 1)],
+                          [(0, 0), (3, 3), (0, 0), (3, 3)]],
     savgoldiff: [[(-13, -14), (-13, -14), (0, -1), (0, 0)],
                  [(-13, -13), (-13, -13), (0, -1), (0, 0)],
                  [(-2, -2), (-1, -1), (0, -1), (0, 0)],
@@ -242,9 +251,9 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     i, latex_name, f, df = test_func_and_deriv
 
     # sample the true function and true derivative, and make noisy samples
-    x = f(t) if diff_method not in [spline_irreg_step, rbfdiff, rtsdiff] else f(t_irreg)
-    dxdt = df(t) if diff_method not in [spline_irreg_step, rbfdiff, rtsdiff] else df(t_irreg)
-    _t = dt if diff_method not in [spline_irreg_step, rbfdiff, rtsdiff] else t_irreg
+    x = f(t) if diff_method not in irreg_list else f(t_irreg)
+    dxdt = df(t) if diff_method not in irreg_list else df(t_irreg)
+    _t = dt if diff_method not in irreg_list else t_irreg
     x_noisy = x + noise
 
     # differentiate without and with noise, accounting for new and old styles of calling functions
@@ -258,7 +267,7 @@ def test_diff_method(diff_method_and_params, test_func_and_deriv, request): # re
     # plotting code
     if request.config.getoption("--plot") and not isinstance(params, list): # Get the plot flag from pytest configuration
         fig, axes = request.config.plots[diff_method] # get the appropriate plot, set up by the store_plots fixture in conftest.py
-        t_ = t_irreg if diff_method in [spline_irreg_step, rtsdiff, rbfdiff] else t
+        t_ = t_irreg if diff_method in irreg_list else t
         axes[i, 0].plot(t_, f(t_))
         axes[i, 0].plot(t_, x, 'C0+')
         axes[i, 0].plot(t_, x_hat, 'C2.', ms=4)
diff --git a/pynumdiff/tests/test_utils.py b/pynumdiff/tests/test_utils.py
@@ -4,7 +4,7 @@
 
 from pynumdiff.utils import utility, evaluate
 from pynumdiff.utils.simulate import sine, triangle, pop_dyn, linear_autonomous, pi_cruise_control, lorenz_x
-np.random.seed(42) # The answer to life, the universe, and everything
+np.random.seed(7)
 
 
 def test_integrate_dxdt_hat():
@@ -56,7 +56,8 @@ def test_convolutional_smoother():
 def test_peakdet(request):
     """Verify peakdet finds peaks and valleys"""
     t = np.arange(0, 10, 0.001)
-    x = 0.3*np.sin(t) + np.sin(1.3*t) + 0.9*np.sin(4.2*t) + 0.02*np.random.randn(10000)
+    x = 0.3*np.sin(t) + np.sin(1.3*t) + 0.9*np.sin(4.2*t) + \
+        0.02*np.random.RandomState(42).randn(10000) # isolated source of randomness so test order doesn't affect results
     maxtab, mintab = utility.peakdet(x, 0.5, t)
 
     if request.config.getoption("--plot"):
@@ -66,30 +67,33 @@ def test_peakdet(request):
         pyplot.title('peakdet validataion')
         pyplot.show()
 
-    assert np.allclose(maxtab, [[0.475, 1.58696894], # these numbers validated by eye with --plot
-                                [1.813, 1.91418201],
-                                [3.311, -0.02749755],
-                                [4.971, 0.74687989],
-                                [6.333, 1.89776084],
-                                [7.76, 0.57366611],
-                                [9.397, 0.59379866]])
-    assert np.allclose(mintab, [[1.134, 0.31086976],
-                                [2.747, -1.13032479],
-                                [4.093, -2.00466846],
-                                [5.502, -0.31428495],
-                                [7.206, -0.5993835],
-                                [8.607,-1.71266074]])
+    assert np.allclose(maxtab, [[0.478, 1.59725055], # these numbers validated by eye with --plot
+                                [1.8, 1.91003085],
+                                [3.319, -0.04597348],
+                                [4.997, 0.74477798],
+                                [6.35, 1.89578662],
+                                [7.783, 0.57274039],
+                                [9.429, 0.58636224]])
+    assert np.allclose(mintab, [[1.101, 0.30335672],
+                                [2.744, -1.12418367],
+                                [4.077, -2.00297377],
+                                [5.587, -0.31253041],
+                                [7.14, -0.58622913],
+                                [8.608, -1.71228973]])
 
 def test_slide_function():
     """Verify the slide function's weighting scheme calculates as expected"""
-    def identity(x, dt): return x, 0 # should come back the same
+    def identity(x, dt_or_t): return x, 0 # should come back the same
 
-    x = np.arange(100)
+    x = np.arange(100, dtype=float)
     kernel = utility.gaussian_kernel(9)
 
-    x_hat, dxdt_hat = utility.slide_function(identity, x, 0.1, kernel, stride=2)
+    x_hat_dt, _ = utility.slide_function(identity, x, 0.1, kernel, stride=2)
+    assert np.allclose(x, x_hat_dt)
 
-    assert np.allclose(x, x_hat)
+    # time array: func receives a kernel-length slice of times instead of scalar dt; identity still returns x unchanged
+    x_hat_t, _ = utility.slide_function(identity, x, np.linspace(0, 10, 100, endpoint=False), kernel, stride=2)
+    assert np.allclose(x, x_hat_t)
 
 
 def test_simulations(request):
diff --git a/pynumdiff/utils/utility.py b/pynumdiff/utils/utility.py
@@ -114,24 +114,26 @@ def convolutional_smoother(x, kernel, num_iterations=1, axis=0):
     return x_hat
 
 
-def slide_function(func, x, dt, kernel, *args, stride=1, pass_weights=False, **kwargs):
+def slide_function(func, x, dt_or_t, kernel, *args, stride=1, pass_weights=False, **kwargs):
     """Slide a smoothing derivative function across a timeseries with specified window size, and
     combine the results according to kernel weights.
 
     :param callable func: name of the function to slide
     :param np.array[float] x: data to differentiate
-    :param float dt: step size
+    :param float or np.array[float] dt_or_t: constant step size (scalar) or array of sample locations (same length as x).
+        When given as an array, the actual time values for each window are passed to :code:`func`, enabling nonuniform spacing.
     :param np.array[float] kernel: values to weight the sliding window
-    :param list args: passed to func
-    :param int stride: step size for slide (e.g. 1 means slide by 1 step)
+    :param list args: passed to :code:`func`
+    :param int stride: step size for slide (e.g. 1 means slide by 1 index location)
     :param bool pass_weights: whether weights should be passed to func via update to kwargs
-    :param dict kwargs: passed to func
+    :param dict kwargs: passed to :code:`func`
 
     :return: - **x_hat** -- estimated (smoothed) x
              - **dxdt_hat** -- estimated derivative of x
     """
     if len(kernel) % 2 == 0: raise ValueError("Kernel window size should be odd.")
     half_window_size = (len(kernel) - 1)//2 # int because len(kernel) is always odd
+    equispaced = np.isscalar(dt_or_t)
 
     x_hat = np.zeros(x.shape)
     dxdt_hat = np.zeros(x.shape)
@@ -141,7 +143,7 @@ def slide_function(func, x, dt, kernel, *args, stride=1, pass_weights=False, **k
         # find where to index data and kernel, taking care at edges
         start = max(0, midpoint - half_window_size)
         end = min(len(x), midpoint + half_window_size + 1) # +1 because slicing is exclusive of end
-        window = slice(start, end)
+        window = slice(start, end) # This is in terms of indices, not true time in the event of nonuniform spacing
 
         kstart = max(0, half_window_size - midpoint)
         kend = kstart + (end - start)
@@ -150,8 +152,8 @@ def slide_function(func, x, dt, kernel, *args, stride=1, pass_weights=False, **k
         w = kernel if (end-start) == len(kernel) else kernel[kslice]/np.sum(kernel[kslice])
         if pass_weights: kwargs['weights'] = w
 
-        # run the function on the window and add weighted results to cumulative answers
-        x_window_hat, dxdt_window_hat = func(x[window], dt, *args, **kwargs)
+        # Run the function on the window and add weighted results to cumulative answers. If not equispaced, pass times for window.
+        x_window_hat, dxdt_window_hat = func(x[window], dt_or_t if equispaced else dt_or_t[window], *args, **kwargs)
         x_hat[window] += w * x_window_hat
         dxdt_hat[window] += w * dxdt_window_hat
         weight_sum[window] += w # save sum of weights for normalization at the end