robustified TVR and in the process discovered some weirdness with finding integration constants

pavelkomarov · pavelkomarov · commit 03b963eec9f7 · 2025-11-21T14:53:03.000-08:00
diff --git a/pynumdiff/kalman_smooth/_kalman_smooth.py b/pynumdiff/kalman_smooth/_kalman_smooth.py
@@ -1,7 +1,6 @@
 import numpy as np
 from warnings import warn
 from scipy.linalg import expm, sqrtm
-from scipy.stats import norm
 from time import time
 try: import cvxpy
 except ImportError: pass
diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
@@ -57,7 +57,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
     (jerk, {'gamma':10}), (jerk, [10]),
     (iterative_velocity, {'num_iterations':5, 'gamma':0.05}), (iterative_velocity, [5, 0.05]),
     (smooth_acceleration, {'gamma':2, 'window_size':5}), (smooth_acceleration, [2, 5]),
-    (lineardiff, {'order':3, 'gamma':5, 'window_size':11, 'solver':'CLARABEL'}), (lineardiff, [3, 5, 11], {'solver':'CLARABEL'})
+    (lineardiff, {'order':3, 'gamma':0.01, 'window_size':11, 'solver':'CLARABEL'}), (lineardiff, [3, 0.01, 11], {'solver':'CLARABEL'})
     ]
 
 # All the testing methodology follows the exact same pattern; the only thing that changes is the
@@ -108,8 +108,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                    [(-25, -25), (0, -1), (0, 0), (1, 1)],
                    [(-25, -25), (1, 1), (0, 0), (1, 1)],
                    [(-25, -25), (3, 3), (0, 0), (3, 3)]],
-    iterated_second_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)],
-                           [(-7, -8), (-14, -14), (0, -1), (0, 0)],
+    iterated_second_order: [[(-25, -25), (-25, -25), (0, -1), (0, 0)],
+                           [(-14, -14), (-14, -14), (0, -1), (0, 0)],
                            [(-1, -1), (0, 0), (0, -1), (0, 0)],
                            [(0, 0), (1, 0), (0, 0), (1, 0)],
                            [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -120,8 +120,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                    [(-25, -25), (-2, -2), (0, 0), (1, 1)],
                    [(-25, -25), (1, 0), (0, 0), (1, 1)],
                    [(-25, -25), (2, 2), (0, 0), (2, 2)]],
-    iterated_fourth_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)],
-                            [(-7, -8), (-13, -13), (0, -1), (0, 0)],
+    iterated_fourth_order: [[(-25, -25), (-25, -25), (0, -1), (0, 0)],
+                            [(-14, -14), (-13, -13), (0, -1), (0, 0)],
                             [(-1, -1), (0, 0), (-1, -1), (0, 0)],
                             [(0, -1), (1, 1), (0, 0), (1, 1)],
                             [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -132,8 +132,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                [(-2, -2), (0, 0), (0, -1), (1, 1)],
                [(0, 0), (1, 1), (0, -1), (1, 1)],
                [(0, 0), (3, 3), (0, 0), (3, 3)]],
-    savgoldiff: [[(-7, -7), (-13, -14), (0, -1), (0, 0)],
-                 [(-7, -7), (-13, -13), (0, -1), (0, 0)],
+    savgoldiff: [[(-13, -14), (-13, -14), (0, -1), (0, 0)],
+                 [(-13, -13), (-13, -13), (0, -1), (0, 0)],
                  [(-2, -2), (-1, -1), (0, -1), (0, 0)],
                  [(0, -1), (0, 0), (0, 0), (1, 0)],
                  [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -164,16 +164,16 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
               [(1, 1), (3, 3), (1, 1), (3, 3)]],
     velocity: [[(-25, -25), (-18, -19), (0, -1), (1, 0)],
                [(-12, -12), (-11, -12), (-1, -1), (-1, -2)],
-               [(0, 0), (1, 0), (0, 0), (1, 0)],
+               [(0, -1), (1, 0), (0, -1), (1, 0)],
                [(0, -1), (1, 1), (0, 0), (1, 0)],
-               [(1, 1), (2, 2), (1, 1), (2, 2)],
-               [(1, 0), (3, 3), (1, 0), (3, 3)]],
-    acceleration: [[(-25, -25), (-18, -18), (0, -1), (0, 0)],
+               [(1, 0), (2, 2), (1, 0), (2, 2)],
+               [(0, 0), (3, 3), (0, 0), (3, 3)]],
+    acceleration: [[(-25, -25), (-18, -18), (0, -1), (1, 0)],
                    [(-10, -10), (-9, -9), (-1, -1), (0, -1)],
                    [(-10, -10), (-9, -10), (-1, -1), (0, -1)],
                    [(0, -1), (1, 0), (0, -1), (1, 0)],
-                   [(1, 1), (2, 2), (1, 1), (2, 2)],
-                   [(1, 1), (3, 3), (1, 1), (3, 3)]],
+                   [(1, 0), (2, 2), (1, 0), (2, 2)],
+                   [(0, 0), (3, 3), (0, 0), (3, 3)]],
     jerk: [[(-25, -25), (-18, -18), (-1, -1), (0, 0)],
            [(-9, -10), (-9, -9), (-1, -1), (0, 0)],
            [(-10, -10), (-9, -10), (-1, -1), (0, 0)],
@@ -186,8 +186,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                          [(1, 0), (1, 1), (1, 0), (1, 1)],
                          [(2, 1), (2, 2), (2, 1), (2, 2)],
                          [(1, 1), (3, 3), (1, 1), (3, 3)]],
-    smooth_acceleration: [[(-7, -8), (-18, -18), (0, -1), (0, 0)],
-                          [(-7, -7), (-10, -10), (-1, -1), (-1, -1)],
+    smooth_acceleration: [[(-25, -25), (-21, -21), (0, -1), (0, 0)],
+                          [(-10, -11), (-10, -10), (-1, -1), (-1, -1)],
                           [(-2, -2), (-1, -1), (-1, -1), (0, -1)],
                           [(0, 0), (1, 0), (0, -1), (1, 0)],
                           [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -222,12 +222,12 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                  [(-7, -7), (-2, -2), (0, -1), (1, 1)],
                  [(0, 0), (2, 2), (0, 0), (2, 2)],
                  [(1, 1), (3, 3), (1, 1), (3, 3)]],
-    lineardiff: [[(-7, -8), (-14, -14), (0, -1), (0, 0)],
+    lineardiff: [[(-3, -4), (-3, -3), (0, -1), (1, 0)],
+                 [(-1, -2), (0, 0), (0, -1), (1, 0)],
+                 [(-1, -1), (0, 0), (0, -1), (1, 1)],
+                 [(-1, -2), (0, 0), (0, -1), (1, 1)],
                  [(0, 0), (2, 1), (0, 0), (2, 1)],
-                 [(1, 0), (2, 2), (1, 0), (2, 2)],
-                 [(1, 0), (2, 1), (1, 0), (2, 1)],
-                 [(1, 1), (2, 2), (1, 1), (2, 2)],
-                 [(1, 1), (3, 3), (1, 1), (3, 3)]]
+                 [(0, -1), (3, 3), (0, 0), (3, 3)]]
 }
 
 # Essentially run the cartesian product of [diff methods] x [test functions] through this one test
diff --git a/pynumdiff/total_variation_regularization/_total_variation_regularization.py b/pynumdiff/total_variation_regularization/_total_variation_regularization.py
@@ -1,5 +1,6 @@
 import numpy as np
 from warnings import warn
+from scipy.stats import median_abs_deviation
 
 from pynumdiff.total_variation_regularization import _chartrand_tvregdiff
 from pynumdiff.utils import utility
@@ -53,25 +54,28 @@ def iterative_velocity(x, dt, params=None, options=None, num_iterations=None, ga
     return x_hat, dxdt_hat
 
 
-def tvrdiff(x, dt, order, gamma, solver=None):
+def tvrdiff(x, dt, order, gamma, huberM=float('inf'), solver=None):
     """Generalized total variation regularized derivatives. Use convex optimization (cvxpy) to solve for a
     total variation regularized derivative. Other convex-solver-based methods in this module call this function.
 
     :param np.array[float] x: data to differentiate
     :param float dt: step size
     :param int order: 1, 2, or 3, the derivative to regularize
     :param float gamma: regularization parameter
+    :param float huberM: Huber loss parameter, in units of scaled median absolute deviation of input data, :code:`x`.
+                    :math:`M = \\infty` reduces to :math:`\\ell_2` loss squared on first, fidelity cost term, and
+                    :math:`M = 0` reduces to :math:`\\ell_1` loss.
     :param str solver: Solver to use. Solver options include: 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS'.
-                    In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
+                    If not given, fall back to CVXPY's default.
 
     :return: - **x_hat** (np.array) -- estimated (smoothed) x
              - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     # Normalize for numerical consistency with convex solver
-    mean = np.mean(x)
-    std = np.std(x)
-    if std == 0: std = 1 # safety guard
-    x = (x-mean)/std
+    mu = np.mean(x)
+    sigma = median_abs_deviation(x, scale='normal') # robust alternative to std()
+    if sigma == 0: sigma = 1 # safety guard
+    x = (x-mu)/sigma
 
     # Define the variables for the highest order derivative and the integration constants
     deriv_values = cvxpy.Variable(len(x)) # values of the order^th derivative, in which we're penalizing variation
@@ -84,10 +88,13 @@ def tvrdiff(x, dt, order, gamma, solver=None):
     for i in range(order):
         y = cvxpy.cumsum(y) + integration_constants[i]
 
+    # Compare the recursively integrated position to the noisy position. \ell_2 doesn't get scaled by 1/2 here,
+    # so cvxpy Huber is already the right scale, and \ell_1 should be scaled by 2\sqrt{2} to match.
+    fidelity_cost = cvxpy.sum_squares(y - x) if huberM == float('inf') \
+            else np.sqrt(8)*cvxpy.norm(y - x, 1) if huberM == 0 \
+            else utility.huber_const(huberM)*cvxpy.sum(cvxpy.huber(y - x, huberM*sigma))
     # Set up and solve the optimization problem
-    prob = cvxpy.Problem(cvxpy.Minimize(
-        # Compare the recursively integrated position to the noisy position, and add TVR penalty
-        cvxpy.sum_squares(y - x) + gamma*cvxpy.sum(cvxpy.tv(deriv_values)) ))
+    prob = cvxpy.Problem(cvxpy.Minimize(fidelity_cost + gamma*cvxpy.sum(cvxpy.tv(deriv_values)) ))
     prob.solve(solver=solver)
 
     # Recursively integrate the final derivative values to get back to the function and derivative values
@@ -102,7 +109,7 @@ def tvrdiff(x, dt, order, gamma, solver=None):
     dxdt_hat = (dxdt_hat[:-1] + dxdt_hat[1:])/2
     dxdt_hat = np.hstack((dxdt_hat, 2*dxdt_hat[-1] - dxdt_hat[-2])) # last value = penultimate value [-1] + diff between [-1] and [-2]
 
-    return x_hat*std+mean, dxdt_hat*std # derivative is linear, so scale derivative by std
+    return x_hat*sigma+mu, dxdt_hat*sigma # derivative is linear, so scale derivative by scatter
 
 
 def velocity(x, dt, params=None, options=None, gamma=None, solver=None):
diff --git a/pynumdiff/utils/evaluate.py b/pynumdiff/utils/evaluate.py
@@ -86,8 +86,8 @@ def robust_rme(u, v, padding=0, M=6):
     :param np.array[float] v: e.g. estimated smoothed signal, reconstructed from derivative
     :param int padding: number of snapshots on either side of the array to ignore when calculating
         the metric. If :code:`'auto'`, defaults to 2.5% of the size of inputs
-    :param float M: Huber loss parameter in units of ~1.4*mean absolute deviation, intended to approximate
-        standard deviation robustly.
+    :param float M: Huber loss parameter in units of ~1.4*mean absolute deviation of population of residual
+        errors, intended to approximate standard deviation robustly.
 
     :return: (float) -- Robust root mean error between u and v
     """
diff --git a/pynumdiff/utils/utility.py b/pynumdiff/utils/utility.py
@@ -2,7 +2,7 @@
 import numpy as np
 from scipy.integrate import cumulative_trapezoid
 from scipy.optimize import minimize
-from scipy.stats import median_abs_deviation
+from scipy.stats import median_abs_deviation, norm
 
 
 def hankel_matrix(x, num_delays, pad=False): # fixed delay step of 1
@@ -130,19 +130,19 @@ def estimate_integration_constant(x, x_hat, M=6):
 
     :param np.array[float] x: timeseries of measurements
     :param np.array[float] x_hat: smoothed estimate of x
-    :param float M: robustifies constant estimation using Huber loss. The default is intended to capture the idea
-        of "six sigma": Assuming Gaussian inliers and M in units of standard deviation, the portion of inliers
-        beyond the Huber loss' transition is only about 1.97e-9. M here is in units of scaled mean absolute deviation,
-        so scatter can be calculated and used to normalize data without being thrown off by outliers.
+    :param float M: constant estimation is robustified with the Huber loss. The default is intended to capture the idea
+        of "six sigma": Assuming Gaussian :code:`x - xhat` errors and :code:`M` in units of standard deviation, the
+        portion of inliers beyond the Huber loss' transition is only about 1.97e-9. :code:`M` here is in units of scaled
+        mean absolute deviation, so scatter can be calculated and used to normalize without being thrown off by outliers.
 
     :return: **integration constant** (float) -- initial condition that best aligns x_hat with x
     """
-    if M == float('inf'): # calculates the constant to be mean(diff(x, x_hat)), equivalent to argmin_{x0} ||x_hat + x0 - x||_2^2
-        return np.mean(x - x_hat) # Solves the L2 distance minimization
-    elif M < 0.1: # small M looks like L1 loss, and Huber gets too flat to work well
+    sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric
+    if M == float('inf') or sigma < 1e-6: # If no scatter, then no outliers, so use L2
+        return np.mean(x - x_hat) # Solves the L2 distance minimization, argmin_{x0} ||x_hat + x0 - x||_2^
+    elif M < 1e-2: # small M looks like L1 loss, and Huber gets too flat to work well
         return np.median(x - x_hat) # Solves the L1 distance minimization
     else:
-        sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric
         return minimize(lambda x0: np.sum(huber(x - (x_hat+x0), M*sigma)), # fn to minimize in 1st argument
             0, method='SLSQP').x[0] # result is a vector, even if initial guess is just a scalar