Skip to content

Commit 462f665

Browse files
committed
played with optimization code more and reran the notebooks
1 parent 5f27575 commit 462f665

11 files changed

Lines changed: 276 additions & 895 deletions

docs/source/total_variation_regularization.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ total_variation_regularization
44
.. automodule:: pynumdiff.total_variation_regularization
55
:no-members:
66

7-
.. autofunction:: pynumdiff.total_variation_regularization.tvr
7+
.. autofunction:: pynumdiff.total_variation_regularization.tvrdiff
88
.. autofunction:: pynumdiff.total_variation_regularization.velocity
99
.. autofunction:: pynumdiff.total_variation_regularization.acceleration
1010
.. autofunction:: pynumdiff.total_variation_regularization.jerk

examples/1_basic_tutorial.ipynb

Lines changed: 6 additions & 6 deletions
Large diffs are not rendered by default.

examples/2a_optimizing_parameters_with_dxdt_known.ipynb

Lines changed: 106 additions & 412 deletions
Large diffs are not rendered by default.

examples/2b_optimizing_parameters_with_dxdt_unknown.ipynb

Lines changed: 92 additions & 402 deletions
Large diffs are not rendered by default.

examples/3_automatic_method_suggestion.ipynb

Lines changed: 10 additions & 8 deletions
Large diffs are not rendered by default.

pynumdiff/finite_difference/_finite_difference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ def finite_difference(x, dt, num_iterations, order):
2727
dxdt_hat[-1] = dxdt_hat[-2] # using stencil -1,0 vs stencil 0,1 you get an expression for the same value
2828
elif order == 2:
2929
dxdt_hat[1:-1] = (x_hat[2:] - x_hat[:-2])/2 # second-order center-difference formula
30-
dxdt_hat[0] = (-3 * x_hat[0] + 4 * x_hat[1] - x_hat[2])/2 # use spaced out stencil to get endpoint formulas
31-
dxdt_hat[-1] = (3 * x_hat[-1] - 4 * x_hat[-2] + x_hat[-3])/2 # that do not amplify noise. See #104
30+
dxdt_hat[0] = x_hat[1] - x_hat[0]
31+
dxdt_hat[-1] = x_hat[-1] - x_hat[-2] # use first-order endpoint formulas so as not to amplify noise. See #104
3232
elif order == 4:
3333
dxdt_hat[2:-2] = (8*(x_hat[3:-1] - x_hat[1:-3]) - x_hat[4:] + x_hat[:-4])/12 # fourth-order center-difference
3434
dxdt_hat[1] = (x_hat[2] - x_hat[0])/2

pynumdiff/optimize/_optimize.py

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111
from ..finite_difference import finite_difference, first_order, second_order, fourth_order
1212
from ..smooth_finite_difference import mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff, splinediff
1313
from ..linear_model import spectraldiff, polydiff, savgoldiff, lineardiff
14-
from ..total_variation_regularization import tvr, velocity, acceleration, jerk, iterative_velocity, smooth_acceleration, jerk_sliding
14+
from ..total_variation_regularization import tvrdiff, velocity, acceleration, jerk, iterative_velocity, smooth_acceleration, jerk_sliding
1515
from ..kalman_smooth import rts_const_deriv, constant_velocity, constant_acceleration, constant_jerk
1616

1717

1818
# Map from method -> (search_space, bounds_low_hi)
1919
method_params_and_bounds = {
20-
spectraldiff: ({'even_extension': [True, False], # give boolean or numerical params in a list to scipy.optimize over them
21-
'pad_to_zero_dxdt': [True, False],
20+
spectraldiff: ({'even_extension': (True, False), # give boolean or numerical params in a list to scipy.optimize over them
21+
'pad_to_zero_dxdt': (True, False),
2222
'high_freq_cutoff': [1e-3, 5e-2, 1e-2, 5e-2, 1e-1]},
2323
{'high_freq_cutoff': (1e-5, 1-1e-5)}),
2424
polydiff: ({'step_size': [1, 2, 5],
@@ -42,30 +42,27 @@
4242
'gamma': (1e-3, 1000),
4343
'window_size': (15, 1000)}),
4444
finite_difference: ({'num_iterations': [5, 10, 30, 50],
45-
'order': (2, 4)}, # order is categorical here, because can't be 3
45+
'order': (2, 4)}, # order is categorical here, because it can't be 3
4646
{'num_iterations': (1, 1000)}),
4747
first_order: ({'num_iterations': [5, 10, 30, 50]},
4848
{'num_iterations': (1, 1000)}),
4949
mediandiff: ({'window_size': [5, 15, 30, 50],
5050
'num_iterations': [1, 5, 10]},
5151
{'window_size': (1, 1e6),
5252
'num_iterations': (1, 100)}),
53-
butterdiff: ({'filter_order': [1, 2, 3, 4, 5, 6, 7],
53+
butterdiff: ({'filter_order': tuple(i for i in range(1,11)), # categorical to save us from doing double work by guessing between orders
5454
'cutoff_freq': [0.0001, 0.001, 0.005, 0.01, 0.1, 0.5],
5555
'num_iterations': [1, 5, 10]},
56-
{'filter_order': (1, 10),
57-
'cutoff_freq': (1e-4, 1-1e-2),
56+
{'cutoff_freq': (1e-4, 1-1e-2),
5857
'num_iterations': (1, 1000)}),
59-
splinediff: ({'order': [3, 5],
58+
splinediff: ({'order': (3, 4, 5), # categorical, because order is whole number, and there aren't many choices
6059
's': [0.5, 0.9, 0.95, 1, 10, 100],
6160
'num_iterations': [1, 5, 10]},
62-
{'order': (3, 5),
63-
's': (1e-2, 1e6),
61+
{'s': (1e-2, 1e6),
6462
'num_iterations': (1, 10)}),
65-
tvr: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
66-
'order': [1, 3]},
67-
{'gamma': (1e-4, 1e7),
68-
'order': (1, 3)}),
63+
tvrdiff: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
64+
'order': (1, 2, 3)}, # categorical, because order is whole number, and there aren't many choices
65+
{'gamma': (1e-4, 1e7)}),
6966
velocity: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]},
7067
{'gamma': (1e-4, 1e7)}),
7168
iterative_velocity: ({'num_iterations': [1, 5, 10],
@@ -77,12 +74,11 @@
7774
'window_size': [3, 10, 30, 50, 90, 130]},
7875
{'gamma': (1e-4, 1e7),
7976
'window_size': (3, 1000)}),
80-
rts_const_deriv: ({'forwardbackward': [True, False],
81-
'order': [1, 3],
82-
'qr_ratio': [1e-16, 1e-12, 1e-9, 1e-6, 1e-3, 1, 1e3, 1e6, 1e9, 1e12, 1e16]},
83-
{'order': (1, 3),
84-
'qr_ratio': [1e-20, 1e20]}),
85-
constant_velocity: ({'forwardbackward': [True, False],
77+
rts_const_deriv: ({'forwardbackward': (True, False),
78+
'order': (1, 2, 3), # for this few options, the optimization works better if this is categorical
79+
'qr_ratio': [1e-16, 1e-12] + [10**k for k in range(-9, 10, 2)] + [1e12, 1e16]},
80+
{'qr_ratio': [1e-20, 1e20]}),
81+
constant_velocity: ({'forwardbackward': (True, False),
8682
'q': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8],
8783
'r': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8]},
8884
{'q': (1e-10, 1e10),
@@ -113,8 +109,7 @@ def _objective_function(point, func, x, dt, singleton_params, categorical_params
113109
:return: float, cost of this objective at the point
114110
"""
115111
point_params = {k:(v if search_space_types[k] == float else
116-
int(np.round(v)) if search_space_types[k] == int else
117-
v > 0.5) for k,v in zip(search_space_types, point)} # point -> dict
112+
int(np.round(v))) for k,v in zip(search_space_types, point)} # point -> dict
118113
# add back in the singletons we're not searching over
119114
try: x_hat, dxdt_hat = func(x, dt, **point_params, **singleton_params, **categorical_params) # estimate x and dxdt
120115
except np.linalg.LinAlgError: return 1000000000 # some methods can fail numerically
@@ -131,24 +126,25 @@ def _objective_function(point, func, x, dt, singleton_params, categorical_params
131126
return rms_rec_x + tvgamma*evaluate.total_variation(dxdt_hat, padding=padding)
132127

133128

134-
def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, padding=0, metric='rmse',
135-
opt_method='Nelder-Mead', maxiter=10):
129+
def optimize(func, x, dt, dxdt_truth=None, tvgamma=1e-2, search_space_updates={}, metric='rmse',
130+
padding=0, opt_method='Nelder-Mead', maxiter=10):
136131
"""Find the optimal parameters for a given differentiation method.
137132
138133
:param function func: differentiation method to optimize parameters for, e.g. linear_model.savgoldiff
139134
:param np.array[float] x: data to differentiate
140135
:param float dt: step size
141-
:param dict search_space: function parameter settings to use as initial starting points in optimization,
142-
structured as :code:`{param1:[values], param2:[values], param3:value, ...}`. The search space
143-
is the Cartesian product. If left None, a default search space of initial values is used.
144136
:param np.array[float] dxdt_truth: actual time series of the derivative of x, if known
145137
:param float tvgamma: Only used if :code:`dxdt_truth` is given. Regularization value used to select for parameters
146138
that yield a smooth derivative. Larger value results in a smoother derivative.
139+
:param dict search_space_updates: At the top of :code:`_optimize.py`, each method has a search space of parameters
140+
settings structured as :code:`{param1:[values], param2:[values], param3:value, ...}`. The Cartesian
141+
product of values are used as initial starting points in optimization. If left None, the default search
142+
space is used.
143+
:param str metric: either :code:`'rmse'` or :code:`'error_correlation'`, only applies if :code:`dxdt_truth`
144+
is not None, see _objective_function
147145
:param int padding: number of time steps to ignore at the beginning and end of the time series in the
148146
optimization, or :code:`'auto'` to ignore 2.5% at each end. Larger value causes the
149147
optimization to emphasize the accuracy of dxdt in the middle of the time series
150-
:param str metric: either :code:`'rmse'` or :code:`'error_correlation'`, only applies if :code:`dxdt_truth`
151-
is not None, see _objective_function
152148
:param str opt_method: Optimization technique used by :code:`scipy.minimize`, the workhorse
153149
:param int maxiter: passed down to :code:`scipy.minimize`, maximum iterations
154150
@@ -162,7 +158,7 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
162158
raise ValueError('`metric` can only be `error_correlation` if `dxdt_truth` is given.')
163159

164160
params, bounds = method_params_and_bounds[func]
165-
params.update(search_space) # for things not given, use defaults
161+
params.update(search_space_updates) # for things not given, use defaults
166162

167163
# No need to optimize over singletons, just pass them through
168164
singleton_params = {k:v for k,v in params.items() if not isinstance(v, (list, tuple))}
@@ -171,15 +167,14 @@ def optimize(func, x, dt, search_space={}, dxdt_truth=None, tvgamma=1e-2, paddin
171167
categorical_params = {k for k,v in params.items() if isinstance(v, tuple)}
172168
categorical_combos = [dict(zip(categorical_params, combo)) for combo in product(*[params[k] for k in categorical_params])] # ends up [{}] if there are no categorical params
173169

174-
# The Nelder-Mead's search space is the dimensions where multiple numerical (float or castable to float) options are given in a list
170+
# The Nelder-Mead's search space is the dimensions where multiple numerical options are given in a list
175171
search_space_types = {k:type(v[0]) for k,v in params.items() if isinstance(v, list)} # map param name -> type, for converting to and from point
176-
if any(v not in [float, int, bool] for v in search_space_types.values()):
177-
raise ValueError("To optimize over categorical strings, put them in a tuple, not a list.")
178-
# If excluding string type, I can just cast ints and bools to floats, and we're good to go
172+
if any(v not in [float, int] for v in search_space_types.values()):
173+
raise ValueError("To optimize over categorical strings or bools, put them in a tuple, not a list.")
174+
# Cast ints to floats, and we're good to go
179175
starting_points = list(product(*[np.array(params[k]).astype(float) for k in search_space_types]))
180176
# The numerical space should have bounds
181177
bounds = [bounds[k] if k in bounds else # pass these to minimize(). It should respect them.
182-
(0, 1) if v == bool else
183178
None for k,v in search_space_types.items()] # None means no bound on a dimension
184179

185180
results = []
@@ -240,17 +235,17 @@ def suggest_method(x, dt, dxdt_truth=None, cutoff_frequency=None):
240235
raise ValueError('Either dxdt_truth or cutoff_frequency must be provided.')
241236
tvgamma = np.exp(-1.6*np.log(cutoff_frequency) -0.71*np.log(dt) - 5.1) # See https://ieeexplore.ieee.org/document/9241009
242237

243-
methods = [second_order, fourth_order, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff,
244-
splinediff, spectraldiff, polydiff, savgoldiff, constant_velocity, constant_acceleration, constant_jerk]
238+
methods = [finite_difference, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff,
239+
splinediff, spectraldiff, polydiff, savgoldiff, rts_const_deriv]
245240
try: # optionally skip some methods
246241
import cvxpy
247-
methods += [acceleration, jerk, smooth_acceleration]
242+
methods += [tvrdiff, smooth_acceleration]
248243
except ImportError:
249244
warn("CVXPY not installed, skipping acceleration, jerk, and smooth_acceleration")
250245

251246
best_value = float('inf') # core loop
252247
for func in tqdm(methods):
253-
p, v = optimize(func, x, dt, dxdt_truth=dxdt_truth, tvgamma=tvgamma)
248+
p, v = optimize(func, x, dt, dxdt_truth=dxdt_truth, tvgamma=tvgamma, search_space_updates=({'order':(2,3)} if func==tvrdiff else {})) # TVR with order 1 hacks the cost function
254249
if v < best_value:
255250
method = func
256251
best_value = v

pynumdiff/tests/test_diff_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def iterated_fourth_order(*args, **kwargs): return fourth_order(*args, **kwargs)
7474
[(-25, -25), (3, 3), (0, 0), (3, 3)]],
7575
iterated_second_order: [[(-9, -10), (-25, -25), (0, -1), (0, 0)],
7676
[(-9, -10), (-14, -14), (0, -1), (0, 0)],
77-
[(-9, -10), (-13, -14), (0, -1), (0, 0)],
77+
[(-1, -1), (0, 0), (0, -1), (0, 0)],
7878
[(0, 0), (1, 0), (0, 0), (1, 0)],
7979
[(1, 1), (2, 2), (1, 1), (2, 2)],
8080
[(1, 1), (3, 3), (1, 1), (3, 3)]],

pynumdiff/tests/test_optimize.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,32 +24,32 @@ def test_finite_difference():
2424
assert params2['num_iterations'] == 1
2525

2626
def test_mediandiff():
27-
params1, val1 = optimize(mediandiff, x, dt, search_space={'num_iterations':1}, dxdt_truth=dxdt_truth, padding='auto')
28-
params2, val2 = optimize(mediandiff, x, dt, search_space={'num_iterations':1}, tvgamma=tvgamma, dxdt_truth=None, padding='auto')
27+
params1, val1 = optimize(mediandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
28+
params2, val2 = optimize(mediandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
2929
assert params1['window_size'] == 5
3030
assert params2['window_size'] == 1
3131

3232
def test_meandiff():
33-
params1, val1 = optimize(meandiff, x, dt, search_space={'num_iterations':1}, dxdt_truth=dxdt_truth, padding='auto')
34-
params2, val2 = optimize(meandiff, x, dt, search_space={'num_iterations':1}, tvgamma=tvgamma, dxdt_truth=None, padding='auto')
33+
params1, val1 = optimize(meandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
34+
params2, val2 = optimize(meandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
3535
assert params1['window_size'] == 5
3636
assert params2['window_size'] == 1
3737

3838
def test_gaussiandiff():
39-
params1, val1 = optimize(gaussiandiff, x, dt, search_space={'num_iterations':1}, dxdt_truth=dxdt_truth, padding='auto')
40-
params2, val2 = optimize(gaussiandiff, x, dt, search_space={'num_iterations':1}, tvgamma=tvgamma, dxdt_truth=None, padding='auto')
39+
params1, val1 = optimize(gaussiandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
40+
params2, val2 = optimize(gaussiandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
4141
assert params1['window_size'] == 9
4242
assert params2['window_size'] == 1
4343

4444
def test_friedrichsdiff():
45-
params1, val1 = optimize(friedrichsdiff, x, dt, search_space={'num_iterations':1}, dxdt_truth=dxdt_truth, padding='auto')
46-
params2, val2 = optimize(friedrichsdiff, x, dt, search_space={'num_iterations':1}, tvgamma=tvgamma, dxdt_truth=None, padding='auto')
45+
params1, val1 = optimize(friedrichsdiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
46+
params2, val2 = optimize(friedrichsdiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
4747
assert params1['window_size'] == 9
4848
assert params2['window_size'] == 1
4949

5050
def test_iterative_velocity():
51-
params1, val1 = optimize(iterative_velocity, x, dt, search_space={'num_iterations':1}, dxdt_truth=dxdt_truth, padding='auto')
52-
params2, val2 = optimize(iterative_velocity, x, dt, search_space={'num_iterations':1}, tvgamma=tvgamma, dxdt_truth=None, padding='auto')
51+
params1, val1 = optimize(iterative_velocity, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
52+
params2, val2 = optimize(iterative_velocity, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
5353

5454
np.testing.assert_almost_equal(params1['gamma'], 0.0001, decimal=4)
5555
np.testing.assert_almost_equal(params2['gamma'], 0.0001, decimal=4)
@@ -59,7 +59,7 @@ def test_velocity():
5959
except: skip("could not import cvxpy, skipping test_velocity")
6060

6161
params1, val1 = optimize(velocity, x, dt, dxdt_truth=dxdt_truth, padding='auto', maxiter=20)
62-
params2, val2 = optimize(velocity, x, dt, tvgamma=tvgamma, dxdt_truth=None, padding='auto', maxiter=20)
62+
params2, val2 = optimize(velocity, x, dt, tvgamma=tvgamma, padding='auto', maxiter=20)
6363

6464
np.testing.assert_almost_equal(params1['gamma'], 0.0769, decimal=3)
6565
np.testing.assert_almost_equal(params2['gamma'], 0.010, decimal=3)
@@ -76,18 +76,18 @@ def test_acceleration():
7676

7777
def test_savgoldiff():
7878
params1, val1 = optimize(savgoldiff, x, dt, dxdt_truth=dxdt_truth, padding='auto')
79-
params2, val2 = optimize(savgoldiff, x, dt, tvgamma=tvgamma, dxdt_truth=None, padding='auto')
79+
params2, val2 = optimize(savgoldiff, x, dt, tvgamma=tvgamma, padding='auto')
8080
assert (params1['poly_order'], params1['window_size'], params1['smoothing_win']) == (7, 41, 3)
8181
assert (params2['poly_order'], params2['window_size'], params2['smoothing_win']) == (3, 3, 5)
8282

8383
def test_spectraldiff():
8484
params1, val1 = optimize(spectraldiff, x, dt, dxdt_truth=dxdt_truth, padding='auto')
8585
params2, val2 = optimize(spectraldiff, x, dt, tvgamma=tvgamma, padding='auto')
86-
np.testing.assert_almost_equal(params1['high_freq_cutoff'], 0.105, decimal=2)
87-
np.testing.assert_almost_equal(params2['high_freq_cutoff'], 0.105, decimal=2)
86+
np.testing.assert_almost_equal(params1['high_freq_cutoff'], 0.18, decimal=2)
87+
np.testing.assert_almost_equal(params2['high_freq_cutoff'], 0.45, decimal=2)
8888

8989
def test_polydiff():
90-
params1, val1 = optimize(polydiff, x, dt, search_space={'step_size':1}, dxdt_truth=dxdt_truth, padding='auto')
91-
params2, val2 = optimize(polydiff, x, dt, search_space={'step_size':1}, tvgamma=tvgamma, dxdt_truth=None, padding='auto')
90+
params1, val1 = optimize(polydiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'step_size':1}, padding='auto')
91+
params2, val2 = optimize(polydiff, x, dt, tvgamma=tvgamma, search_space_updates={'step_size':1}, padding='auto')
9292
assert (params1['poly_order'], params1['window_size'], params1['kernel']) == (6, 50, 'friedrichs')
9393
assert (params2['poly_order'], params2['window_size'], params2['kernel']) == (3, 10, 'gaussian')

pynumdiff/total_variation_regularization/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""
33
try:
44
import cvxpy
5-
from ._total_variation_regularization import tvr, velocity, acceleration, jerk, jerk_sliding, smooth_acceleration
5+
from ._total_variation_regularization import tvrdiff, velocity, acceleration, jerk, jerk_sliding, smooth_acceleration
66
except:
77
from warnings import warn
88
warn("Limited Total Variation Regularization Support Detected! CVXPY is not installed. " +

0 commit comments

Comments
 (0)