11import numpy as np
22from warnings import warn
33from scipy .linalg import expm , sqrtm
4+ from scipy .stats import norm
5+ from time import time
46
57try : import cvxpy
68except ImportError : pass
@@ -107,15 +109,15 @@ def rts_smooth(_t, A, xhat_pre, xhat_post, P_pre, P_post, compute_P_smooth=True)
107109 return xhat_smooth if not compute_P_smooth else (xhat_smooth , P_smooth )
108110
109111
110- def rtsdiff (x , _t , order , qr_ratio , forwardbackward ):
112+ def rtsdiff (x , _t , order , log_qr_ratio , forwardbackward ):
111113 """Perform Rauch-Tung-Striebel smoothing with a naive constant derivative model. Makes use of :code:`kalman_filter`
112114 and :code:`rts_smooth`, which are made public. :code:`constant_X` methods in this module call this function.
113115
114116 :param np.array[float] x: data series to differentiate
115117 :param float or array[float] _t: This function supports variable step size. This parameter is either the constant
116118 step size if given as a single float, or data locations if given as an array of same length as :code:`x`.
117119 :param int order: which derivative to stabilize in the constant-derivative model
118- :param qr_ratio : the process noise level of the divided by the measurement noise level, because,
120+ :param log_qr_ratio : the log of the process noise level divided by the measurement noise level, because,
119121 per `our analysis <https://github.com/florisvb/PyNumDiff/issues/139>`_, the mean result is
120122 dependent on the relative rather than absolute size of :math:`q` and :math:`r`.
121123 :param bool forwardbackward: indicates whether to run smoother forwards and backwards
@@ -129,8 +131,8 @@ def rtsdiff(x, _t, order, qr_ratio, forwardbackward):
129131 raise ValueError ("If `_t` is given as array-like, must have same length as `x`." )
130132 x = np .asarray (x ) # to flexibly allow array-like inputs
131133
132- q = 10 ** int (np . log10 ( qr_ratio ) / 2 ) # even-ish split of the powers across 0
133- r = q / qr_ratio
134+ q = 10 ** int (log_qr_ratio / 2 ) # even-ish split of the powers across 0
135+ r = q / ( 10 ** log_qr_ratio )
134136
135137 A = np .diag (np .ones (order ), 1 ) # continuous-time A just has 1s on the first diagonal (where 0th is main diagonal)
136138 Q = np .zeros (A .shape ); Q [- 1 ,- 1 ] = q # continuous-time uncertainty around the last derivative
@@ -262,24 +264,30 @@ def constant_jerk(x, dt, params=None, options=None, r=None, q=None, forwardbackw
262264 return rtsdiff (x , dt , 3 , q / r , forwardbackward )
263265
264266
265- def robustdiff (x , dt , order , q , r , proc_huberM = 6 , meas_huberM = 1.345 ):
267+ def robustdiff (x , dt , order , log_q , log_r , proc_huberM = 6 , meas_huberM = 1.345 ):
266268 """Perform outlier-robust differentiation by solving the Maximum A Priori optimization problem:
267269 :math:`\\ min_{\\ {x_n\\ }} \\ sum_{n=0}^{N-1} V(R^{-1/2}(y_n - C x_n)) + \\ sum_{n=1}^{N-1} J(Q^{-1/2}(x_n - A x_{n-1}))`,
268270 where :math:`A,Q,C,R` come from an assumed constant derivative model and :math:`V,J` are the :math:`\\ ell_1` norm or Huber
269271 loss rather than the :math:`\\ ell_2` norm optimized by RTS smoothing. This problem is convex, so this method calls
270272 :code:`convex_smooth`.
271273
272- Note that for Huber losses, :code:`M` is the radius where the Huber loss function turns from quadratic to linear, in units of
273- standard deviation, because all inputs to Huber are normalized by noise levels, :code:`q` and :code:`r`. This choice affects
274- which portion of inputs fall beyond the transition: Assuming Gaussian inliers, :code:`M = scipy.norm.ppf(1 - outlier_portion/2)`.
275- :code:`M=0` reduces to the :math:`\\ ell_1` norm, and :code:`M=6` is essentially the :math:`\\ ell_2` norm, becaue the portion of
276- examples beyond :math:`6\\ sigma` is miniscule, about :code:`1.97e-9`.
274+ Note that for Huber losses, :code:`M` is the radius where the Huber loss function turns from quadratic to linear. Because
275+ all inputs to Huber are normalized by noise level, :code:`q` or :code:`r`, :code:`M` is in units of standard deviation.
276+ In other words, this choice affects which portion of inputs are treated as outliers. For example, assuming Gaussian inliers,
277+ the portion beyond :math:`M\\ sigma` is :code:`outlier_portion = 2*(1 - scipy.stats.norm.cdf(M))`. The inverse of this is
278+ :code:`M = scipy.stats.norm.ppf(1 - outlier_portion/2)`. By :code:`M=6`, Huber is essentially indistinguishable from the
279+ 1/2-sum-of-squares case, :math:`\\ frac{1}{2}\\ |\\ cdot\\ |_2^2`, because the portion of examples beyond :math:`6\\ sigma` is
280+ miniscule, about :code:`1.97e-9`, and the normalization constant of the Huber loss approaches 1 as :math:`M` increases.
281+ (See :math:`c_2` `in section 6 <https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf>`_, missing a :math:`\\ sqrt{\\ cdot}`
282+ term there, see p2700). Similarly, as :code:`M` approaches 0, Huber reduces to the :math:`\\ ell_1` norm case, because
283+ :math:`c_2` approaches :math:`\\ frac{\\ sqrt{2}}{M}`, cancelling the :math:`M` multiplying :math:`|\\ cdot|` and leaving
284+ behind :math:`\\ sqrt{2}`, the proper normalization.
277285
278286 :param np.array[float] x: data series to differentiate
279287 :param float dt: step size
280288 :param int order: which derivative to stabilize in the constant-derivative model (1=velocity, 2=acceleration, 3=jerk)
281- :param float q: the process noise level
282- :param float r: measurement noise level
289+ :param float log_q: base 10 logarithm of the process noise level, so :code:`q = 10**log_q`
290+ :param float log_r: base 10 logarithm of the measurement noise level, so :code:`r = 10**log_r`
283291 :param float proc_huberM: quadratic-to-linear transition point for process loss
284292 :param float meas_huberM: quadratic-to-linear transition point for measurement loss
285293
@@ -288,9 +296,9 @@ def robustdiff(x, dt, order, q, r, proc_huberM=6, meas_huberM=1.345):
288296 - **dxdt_hat** -- estimated derivative of x
289297 """
290298 A_c = np .diag (np .ones (order ), 1 ) # continuous-time A just has 1s on the first diagonal (where 0th is main diagonal)
291- Q_c = np .zeros (A_c .shape ); Q_c [- 1 ,- 1 ] = q # continuous-time uncertainty around the last derivative
299+ Q_c = np .zeros (A_c .shape ); Q_c [- 1 ,- 1 ] = 10 ** log_q # continuous-time uncertainty around the last derivative
292300 C = np .zeros ((1 , order + 1 )); C [0 ,0 ] = 1 # we measure only y = noisy x
293- R = np .array ([[r ]]) # 1 observed state, so this is 1x1
301+ R = np .array ([[10 ** log_r ]]) # 1 observed state, so this is 1x1
294302
295303 # convert to discrete time using matrix exponential
296304 eM = expm (np .block ([[A_c , Q_c ], [np .zeros (A_c .shape ), - A_c .T ]]) * dt ) # Note this could handle variable dt, similar to rtsdiff
@@ -299,12 +307,13 @@ def robustdiff(x, dt, order, q, r, proc_huberM=6, meas_huberM=1.345):
299307 if np .linalg .cond (Q_d ) > 1e12 : Q_d += np .eye (order + 1 )* 1e-12 # for numerical stability with convex solver. Doesn't change answers appreciably (or at all).
300308
301309 x_states = convex_smooth (x , A_d , Q_d , C , R , proc_huberM , meas_huberM ) # outsource solution of the convex optimization problem
302- return x_states [:, 0 ], x_states [:, 1 ]
310+ return x_states [0 ], x_states [1 ]
303311
304312
305313def convex_smooth (y , A , Q , C , R , proc_huberM = 6 , meas_huberM = 1.345 ):
306314 """Solve the optimization problem for robust smoothing using CVXPY. Note this currently assumes constant dt
307- but could be extended to handle variable step sizes by finding discrete-time A and Q for requisite gaps.
315+ but could be extended to handle variable step sizes by finding discrete-time A and Q for requisite gaps as
316+ in the :code:`kalman_filter` function.
308317
309318 :param np.array[float] y: measurements
310319 :param np.array A: discrete-time state transition matrix
@@ -316,30 +325,36 @@ def convex_smooth(y, A, Q, C, R, proc_huberM=6, meas_huberM=1.345):
316325 :return: np.array -- state estimates (N x state_dim)
317326 """
318327 N = len (y )
319- x_states = cvxpy .Variable ((N , A .shape [0 ])) # each row is [position, velocity, acceleration, ...] at step n
320-
321- Q_sqrt_inv = np .linalg .inv (sqrtm (Q ))
322- R_sqrt_inv = np .linalg .inv (sqrtm (R ))
323- # Process terms: sum of J(Q^(-1/2)(x_n - A x_{n-1}))
324- objective = cvxpy .sum ([cvxpy .norm (Q_sqrt_inv @ (x_states [n ] - A @ x_states [n - 1 ]), 1 ) if proc_huberM < 1e-3 # l1 case
325- else cvxpy .sum_squares (Q_sqrt_inv @ (x_states [n ] - A @ x_states [n - 1 ])) if proc_huberM > (6 - 1e-3 ) # l2 case
326- else cvxpy .sum (cvxpy .huber (Q_sqrt_inv @ (x_states [n ] - A @ x_states [n - 1 ]), proc_huberM )) # proper Huber
327- for n in range (1 , N )])
328- # Measurement terms: sum of g V(R^(-1/2)(y_n - C x_n))
329- objective += cvxpy .sum ([cvxpy .norm (R_sqrt_inv @ (y [n ] - C @ x_states [n ]), 1 ) if meas_huberM < 1e-3
330- else cvxpy .sum_squares (R_sqrt_inv @ (y [n ] - C @ x_states [n ])) if meas_huberM > (6 - 1e-3 )
331- else cvxpy .sum (cvxpy .huber (R_sqrt_inv @ (y [n ] - C @ x_states [n ]), meas_huberM ))
332- for n in range (N )])
328+ x_states = cvxpy .Variable ((A .shape [0 ], N )) # each column is [position, velocity, acceleration, ...] at step n
329+
330+ def huber_const (M ): # from https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf, with correction for missing sqrt
331+ a = 2 * np .exp (- M ** 2 / 2 )/ M
332+ b = np .sqrt (2 * np .pi )* (2 * norm .cdf (M ) - 1 )
333+ return np .sqrt ((2 * a * (1 + M ** 2 )/ M ** 2 + b )/ (a + b ))
334+
335+ # It is extremely important to run time that CVXPY expressions be in vectorized form
336+ proc_resids = np .linalg .inv (sqrtm (Q )) @ (x_states [:,1 :] - A @ x_states [:,:- 1 ]) # all Q^(-1/2)(x_n - A x_{n-1})
337+ meas_resids = np .linalg .inv (sqrtm (R )) @ (y .reshape (1 ,- 1 ) - C @ x_states ) # all R^(-1/2)(y_n - C x_n)
338+ # Process terms: sum of J(proc_resids)
339+ objective = 0.5 * cvxpy .sum_squares (proc_resids ) if proc_huberM > (6 - 1e-3 ) \
340+ else np .sqrt (2 )* cvxpy .sum (cvxpy .abs (proc_resids )) if proc_huberM < 1e-3 \
341+ else huber_const (proc_huberM )* cvxpy .sum (cvxpy .huber (proc_resids , proc_huberM )) # 1/2 l2^2, l1, or Huber
342+ # Measurement terms: sum of V(meas_resids)
343+ objective += 0.5 * cvxpy .sum_squares (meas_resids ) if meas_huberM > (6 - 1e-3 ) \
344+ else np .sqrt (2 )* cvxpy .sum (cvxpy .abs (meas_resids )) if meas_huberM < 1e-3 \
345+ else huber_const (meas_huberM )* cvxpy .sum (cvxpy .huber (meas_resids , meas_huberM )) # CVXPY quirk: norm(, 1) != sum(abs()) for matrices
333346
334347 problem = cvxpy .Problem (cvxpy .Minimize (objective ))
335348 try :
349+ before = time ()
336350 problem .solve (solver = cvxpy .CLARABEL )
337- print ("CLARABEL succeeded" )
338- except cvxpy .error .SolverError :
339- print (f"CLARABEL failed. Retrying with SCS." )
340- problem .solve (solver = cvxpy .SCS ) # SCS is a lot slower but pretty bulletproof even with big condition numbers
341- if x_states .value is None : # There is occasional solver failure with huber as opposed to 1-norm
342- print ("Convex solvers failed with status {problem.status}. Returning NaNs." )
343- return np .full ((N , A .shape [0 ]), np .nan )
351+ print (f"CLARABEL succeeded in { time () - before } s" )
352+ except cvxpy .error .SolverError : pass
353+ #print("CLARABEL failed with status {problem.status}. Returning NaNs.")
354+ #return np.full((A.shape[0], N), np.nan)
355+ # problem.solve(solver=cvxpy.SCS) # Alternatively, SCS is a lot slower but pretty bulletproof even with big condition numbers
356+ if x_states .value is None : # There can be solver failure
357+ #print(f"Convex solvers failed with status {problem.status}. Returning NaNs.")
358+ return np .full ((A .shape [0 ], N ), np .nan )
344359
345360 return x_states .value
0 commit comments