better optimization and comments

pavelkomarov · pavelkomarov · commit 239467681fba · 2025-11-10T18:57:47.000-08:00
diff --git a/pynumdiff/optimize/_optimize.py b/pynumdiff/optimize/_optimize.py
@@ -8,7 +8,7 @@
 from hashlib import sha1
 from tqdm import tqdm
 
-from ..utils import evaluate
+from ..utils import evaluate, utility
 from ..finite_difference import finitediff, first_order, second_order, fourth_order
 from ..smooth_finite_difference import kerneldiff, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff
 from ..polynomial_fit import polydiff, savgoldiff, splinediff
@@ -157,8 +157,12 @@ def _objective_function(point, func, x, dt, singleton_params, categorical_params
         elif metric == 'error_correlation':
             ec = evaluate.error_correlation(dxdt_truth, dxdt_hat, padding=padding)
             cache[key] = ec; return ec
-    else: # then minimize (RMSE(x_hat - x) || sqrt{2*Mean(Huber((x_hat- x)/sigma, M))}*sigma) + gamma*TV(dxdt_hat)
-        # rubust_rme(,inf) = rmse(), so just use the simpler function in that case
+    else: # then minimize L(Phi) = (RMSE(trapz(dxdt_hat) + c - x) || sqrt{2*Mean(Huber((trapz(dxdt_hat) + c - x)/sigma, M))}*sigma) + gamma*TV(dxdt_hat)
+        # It seems like we should be able to use x_hat rather than the trapz integral of dxdt_hat + constant, but the latter is more reliable,
+        # because it accounts for the accuracy of the derivative directly, not through the generating algorithm's smooth signal estimate.
+        rec_x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt)
+        rec_x_hat += utility.estimate_integration_constant(x, rec_x_hat, M=huberM)
+        # rubust_rme(,M=inf) = rmse(), so just use the simpler function if M=inf
         cost = evaluate.rmse(x, x_hat, padding=padding) if huberM == float('inf') else evaluate.robust_rme(x, x_hat, padding=padding, M=huberM)
         cost += tvgamma*evaluate.total_variation(dxdt_hat, padding=padding)
         cache[key] = cost; return cost
diff --git a/pynumdiff/tests/test_optimize.py b/pynumdiff/tests/test_optimize.py
@@ -32,7 +32,7 @@ def test_targeting_rmse_vs_tvgamma_loss():
     x_hat, dxdt_hat = splinediff(x, dt, **params_loss)
     loss_rmse = rmse(dxdt_truth, dxdt_hat)
 
-    assert val_rmse < loss_rmse < 1.1*val_rmse # This exact bound might break if using a different diff method or data series, but the point is they should be close
+    assert val_rmse <= loss_rmse < 1.1*val_rmse # This exact bound might break if using a different diff method or data series, but the point is they should be close
 
 
 def test_search_space_updates_applied():
diff --git a/pynumdiff/utils/utility.py b/pynumdiff/utils/utility.py
@@ -117,7 +117,9 @@ def integrate_dxdt_hat(dxdt_hat, _t):
 
 def estimate_integration_constant(x, x_hat, M=6):
     """Integration leaves an unknown integration constant. This function finds a best fit integration
-    constant given x and x_hat (the integral of dxdt_hat) by optimizing :math:`\\min_c ||x - \\hat{x} + c||_2`.
+    constant to correct the DC of :code:`x_hat` (the integral of dxdt_hat) by optimizing
+    :math:`\\min_c J(x - \\hat{x} + c)`, where :math:`J` is the Huber loss function or the :math:`\\ell_1`
+    or :math:`\\ell_2` norm.
 
     :param np.array[float] x: timeseries of measurements
     :param np.array[float] x_hat: smoothed estimate of x