Skip to content

Commit be8166c

Browse files
committed
optimization now uses sets instead of tuples for categorical params, because that's more applicable notation; changed the way the t vector is calculated in splinediff to be more robust and always be same length as x; added outliers param to simulations
1 parent 16b6c96 commit be8166c

3 files changed

Lines changed: 41 additions & 34 deletions

File tree

pynumdiff/optimize/_optimize.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,18 @@
1717

1818
# Map from method -> (search_space, bounds_low_hi)
1919
method_params_and_bounds = {
20-
spectraldiff: ({'even_extension': (True, False), # give boolean or numerical params in a list to scipy.optimize over them
21-
'pad_to_zero_dxdt': (True, False),
22-
'high_freq_cutoff': [1e-3, 5e-2, 1e-2, 5e-2, 1e-1]},
20+
spectraldiff: ({'even_extension': {True, False}, # give categorical params in a set
21+
'pad_to_zero_dxdt': {True, False},
22+
'high_freq_cutoff': [1e-3, 5e-2, 1e-2, 5e-2, 1e-1]}, # give numerical params in a list to scipy.optimize over them
2323
{'high_freq_cutoff': (1e-5, 1-1e-5)}),
2424
polydiff: ({'step_size': [1, 2, 5],
25-
'kernel': ('friedrichs', 'gaussian'), # use a tuple of multiple options for categoricals
25+
'kernel': {'friedrichs', 'gaussian'}, # categorical
2626
'poly_order': [2, 3, 5, 7],
2727
'window_size': [11, 31, 51, 91, 131]},
2828
{'step_size': (1, 100),
2929
'poly_order': (1, 8),
3030
'window_size': (10, 1000)}),
31-
savgoldiff: ({'poly_order': [2, 3, 5, 7, 9, 11, 13],
31+
savgoldiff: ({'poly_order': [2, 3, 5, 7, 10],
3232
'window_size': [3, 10, 30, 50, 90, 130, 200, 300],
3333
'smoothing_win': [3, 10, 30, 50, 90, 130, 200, 300]},
3434
{'poly_order': (1, 12),
@@ -42,26 +42,26 @@
4242
'gamma': (1e-3, 1000),
4343
'window_size': (15, 1000)}),
4444
finite_difference: ({'num_iterations': [5, 10, 30, 50],
45-
'order': (2, 4)}, # order is categorical here, because it can't be 3
45+
'order': {2, 4}}, # order is categorical here, because it can't be 3
4646
{'num_iterations': (1, 1000)}),
4747
first_order: ({'num_iterations': [5, 10, 30, 50]},
4848
{'num_iterations': (1, 1000)}),
4949
mediandiff: ({'window_size': [5, 15, 30, 50],
5050
'num_iterations': [1, 5, 10]},
5151
{'window_size': (1, 1e6),
5252
'num_iterations': (1, 100)}),
53-
butterdiff: ({'filter_order': tuple(i for i in range(1,11)), # categorical to save us from doing double work by guessing between orders
53+
butterdiff: ({'filter_order': set(i for i in range(1,11)), # categorical to save us from doing double work by guessing between orders
5454
'cutoff_freq': [0.0001, 0.001, 0.005, 0.01, 0.1, 0.5],
5555
'num_iterations': [1, 5, 10]},
5656
{'cutoff_freq': (1e-4, 1-1e-2),
5757
'num_iterations': (1, 1000)}),
58-
splinediff: ({'order': (3, 4, 5), # categorical, because order is whole number, and there aren't many choices
58+
splinediff: ({'order': {3, 4, 5}, # categorical, because order is whole number, and there aren't many choices
5959
's': [0.5, 0.9, 0.95, 1, 10, 100],
6060
'num_iterations': [1, 5, 10]},
6161
{'s': (1e-2, 1e6),
6262
'num_iterations': (1, 10)}),
6363
tvrdiff: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
64-
'order': (1, 2, 3)}, # categorical, because order is whole number, and there aren't many choices
64+
'order': {1, 2, 3}}, # categorical, because order is whole number, and there aren't many choices
6565
{'gamma': (1e-4, 1e7)}),
6666
velocity: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]},
6767
{'gamma': (1e-4, 1e7)}),
@@ -75,7 +75,7 @@
7575
{'gamma': (1e-4, 1e7),
7676
'window_size': (3, 1000)}),
7777
rts_const_deriv: ({'forwardbackward': (True, False),
78-
'order': (1, 2, 3), # for this few options, the optimization works better if this is categorical
78+
'order': {1, 2, 3}, # for this few options, the optimization works better if this is categorical
7979
'qr_ratio': [1e-16, 1e-12] + [10**k for k in range(-9, 10, 2)] + [1e12, 1e16]},
8080
{'qr_ratio': [1e-20, 1e20]}),
8181
constant_velocity: ({'forwardbackward': (True, False),
@@ -161,10 +161,10 @@ def optimize(func, x, dt, dxdt_truth=None, tvgamma=1e-2, search_space_updates={}
161161
params.update(search_space_updates) # for things not given, use defaults
162162

163163
# No need to optimize over singletons, just pass them through
164-
singleton_params = {k:v for k,v in params.items() if not isinstance(v, (list, tuple))}
164+
singleton_params = {k:v for k,v in params.items() if not isinstance(v, (list, set))}
165165

166166
# To handle categoricals, find their combination, and then pass each set individually
167-
categorical_params = {k for k,v in params.items() if isinstance(v, tuple)}
167+
categorical_params = {k for k,v in params.items() if isinstance(v, set)}
168168
categorical_combos = [dict(zip(categorical_params, combo)) for combo in product(*[params[k] for k in categorical_params])] # ends up [{}] if there are no categorical params
169169

170170
# The Nelder-Mead's search space is the dimensions where multiple numerical options are given in a list

pynumdiff/smooth_finite_difference/_smooth_finite_difference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def splinediff(x, dt, params=None, options={}, order=3, s=None, num_iterations=1
194194
if 'iterate' in options and options['iterate']:
195195
num_iterations = params[2]
196196

197-
t = np.arange(0, len(x)*dt, dt)
197+
t = np.arange(len(x))*dt
198198

199199
x_hat = x
200200
for _ in range(num_iterations):

pynumdiff/utils/simulate.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,30 +10,35 @@
1010

1111

1212
# pylint: disable-msg=too-many-locals, too-many-arguments, no-member
13-
def _add_noise(x, random_seed, noise_type, noise_parameters):
13+
def _add_noise(x, random_seed, noise_type, noise_parameters, outliers=False):
1414
"""Add synthetic noise to data
1515
1616
:param np.array[float] x: data
1717
:param int random_seed: an integer seed used to initialize the random number generator
1818
:param str noise_type: type of noise, compatible with :code:`np.random` functions
1919
(eg. 'normal', 'uniform', 'poisson')
20-
:param noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
20+
:param array-like noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
21+
:param bool outliers: whether to corrupt 1% of the data points with out-of-distribution values
2122
2223
:return: (np.array) -- noisy time series data
2324
"""
2425
np.random.seed(random_seed)
2526
noise = np.random.__getattribute__(noise_type)(*noise_parameters, x.shape[0])
27+
if outliers:
28+
ndxs = np.random.choice(len(noise), len(noise)//100, replace=False) # select 1% of locations
29+
noise[ndxs] = (np.random.choice([-1, 1], size=len(ndxs)) + np.random.uniform(-0.5, 0.5, len(ndxs)))*(np.max(x) - np.min(x))
2630
return x + noise
2731

2832

29-
def sine(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_seed=1,
33+
def sine(duration=4, noise_type='normal', noise_parameters=(0, 0.5), outliers=False, random_seed=1,
3034
dt=0.01, simdt=0.0001, frequencies=(1, 1.7), magnitude=1):
3135
"""Create toy example of time series consisted of sinusoidal modes
3236
3337
:param float duration: governs the length of the series, duration/dt
3438
:param str noise_type: type of noise, compatible with :code:`np.random` functions
3539
(eg. 'normal', 'uniform', 'poisson')
3640
:param noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
41+
:param bool outliers: whether to corrupt 1% of the data points with out-of-distribution values
3742
:param int random_seed: an integer seed used to initialize the random number generator
3843
:param float dt: the step size
3944
:param float simdt: simulation step size used to generate the time series, typically smaller than
@@ -54,19 +59,20 @@ def sine(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_seed
5459
vel += magnitude/len(frequencies)*np.cos(t*2*np.pi*f)*2*np.pi*f
5560

5661
idx = slice(0, len(t), int(dt/simdt)) # downsample so things are dt apart
57-
noisy_pos = _add_noise(pos[idx], random_seed, noise_type, noise_parameters)
62+
noisy_pos = _add_noise(pos[idx], random_seed, noise_type, noise_parameters, outliers)
5863

5964
return noisy_pos, pos[idx], vel[idx]
6065

6166

62-
def triangle(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_seed=1,
67+
def triangle(duration=4, noise_type='normal', noise_parameters=(0, 0.5), outliers=False, random_seed=1,
6368
dt=0.01, simdt=0.0001):
6469
"""Create toy example of sharp-edged triangle wave with increasing frequencies
6570
6671
:param float duration: governs the length of the series, duration/dt
6772
:param str noise_type: type of noise, compatible with :code:`np.random` functions
6873
(eg. 'normal', 'uniform', 'poisson')
6974
:param noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
75+
:param bool outliers: whether to corrupt 1% of the data points with out-of-distribution values
7076
:param int random_seed: an integer seed used to initialize the random number generator
7177
:param float dt: the step size
7278
:param float simdt: simulation step size used to generate the time series, typically smaller than
@@ -103,13 +109,13 @@ def triangle(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_
103109

104110
pos = np.interp(t, reversal_ts, reversal_vals)
105111
_, vel = finite_difference(pos, dt=simdt)
106-
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters)
112+
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters, outliers)
107113

108114
idx = np.arange(0, len(t), int(dt/simdt))
109115
return noisy_pos[idx], pos[idx], vel[idx]
110116

111117

112-
def pop_dyn(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_seed=1,
118+
def pop_dyn(duration=4, noise_type='normal', noise_parameters=(0, 0.5), outliers=False, random_seed=1,
113119
dt=0.01, simdt=0.0001):
114120
"""Create toy example of bounded exponential growth:
115121
http://www.biologydiscussion.com/population/population-growth/population-growth-curves-ecology/51854
@@ -118,6 +124,7 @@ def pop_dyn(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_s
118124
:param str noise_type: type of noise, compatible with :code:`np.random` functions
119125
(eg. 'normal', 'uniform', 'poisson')
120126
:param noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
127+
:param bool outliers: whether to corrupt 1% of the data points with out-of-distribution values
121128
:param int random_seed: an integer seed used to initialize the random number generator
122129
:param float dt: the step size
123130
:param float simdt: simulation step size used to generate the time series, typically smaller than
@@ -141,19 +148,20 @@ def pop_dyn(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_s
141148
idx = slice(0, len(t), int(dt/simdt)) # downsample so things are dt apart
142149
pos = np.array(pos[idx])
143150
vel = np.array(vel[idx])
144-
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters)
151+
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters, outliers)
145152

146153
return noisy_pos, pos, vel
147154

148155

149-
def linear_autonomous(duration=4, noise_type='normal', noise_parameters=(0, 0.5),
156+
def linear_autonomous(duration=4, noise_type='normal', noise_parameters=(0, 0.5), outliers=False,
150157
random_seed=1, dt=0.01, simdt=0.0001):
151158
"""Create toy example of time series from an autonomous linear system
152159
153160
:param float duration: governs the length of the series, duration/dt
154161
:param str noise_type: type of noise, compatible with :code:`np.random` functions
155162
(eg. 'normal', 'uniform', 'poisson')
156163
:param noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
164+
:param bool outliers: whether to corrupt 1% of the data points with out-of-distribution values
157165
:param int random_seed: an integer seed used to initialize the random number generator
158166
:param float dt: the step size
159167
:param float simdt: simulation step size used to generate the time series, typically smaller than
@@ -175,13 +183,13 @@ def linear_autonomous(duration=4, noise_type='normal', noise_parameters=(0, 0.5)
175183
pos = xs[0,:]
176184

177185
smooth_pos, vel = finite_difference(pos, simdt)
178-
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters)
186+
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters, outliers)
179187

180188
idx = slice(0, len(t), int(dt/simdt)) # downsample so things are dt apart
181189
return noisy_pos[1:][idx], smooth_pos[1:][idx], vel[1:][idx]
182190

183191

184-
def pi_cruise_control(duration=4, noise_type='normal', noise_parameters=(0, 0.5),
192+
def pi_cruise_control(duration=4, noise_type='normal', noise_parameters=(0, 0.5), outliers=False,
185193
random_seed=1, dt=0.01):
186194
"""Create a toy example of linear proportional integral controller with nonlinear control inputs.
187195
Simulate proportional integral control of a car attempting to maintain constant velocity while going
@@ -193,6 +201,7 @@ def pi_cruise_control(duration=4, noise_type='normal', noise_parameters=(0, 0.5)
193201
:param str noise_type: type of noise, compatible with :code:`np.random` functions
194202
(eg. 'normal', 'uniform', 'poisson')
195203
:param noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
204+
:param bool outliers: whether to corrupt 1% of the data points with out-of-distribution values
196205
:param int random_seed: an integer seed used to initialize the random number generator
197206
:param float dt: the step size
198207
:param float simdt: simulation step size used to generate the time series, typically smaller than
@@ -242,19 +251,20 @@ def pi_cruise_control(duration=4, noise_type='normal', noise_parameters=(0, 0.5)
242251

243252
pos = states[0, :]
244253
vel = states[1, :]
245-
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters)
254+
noisy_pos = _add_noise(pos, random_seed, noise_type, noise_parameters, outliers)
246255

247256
return noisy_pos, pos, vel
248257

249258

250-
def lorenz_x(duration=4, noise_type='normal', noise_parameters=(0, 0.5),
259+
def lorenz_x(duration=4, noise_type='normal', noise_parameters=(0, 0.5), outliers=False,
251260
random_seed=1, dt=0.01, simdt=0.0001):
252261
"""Create toy example of x component from a lorenz attractor
253262
254263
:param float duration: governs the length of the series, duration/dt
255264
:param str noise_type: type of noise, compatible with :code:`np.random` functions
256265
(eg. 'normal', 'uniform', 'poisson')
257266
:param noise_parameters: parameters of the noise used in :code:`np.random`, leaving off :code:`size`
267+
:param bool outliers: whether to corrupt 1% of the data points with out-of-distribution values
258268
:param int random_seed: an integer seed used to initialize the random number generator
259269
:param float dt: the step size
260270
:param float simdt: simulation step size used to generate the time series, typically smaller than
@@ -269,8 +279,7 @@ def lorenz_x(duration=4, noise_type='normal', noise_parameters=(0, 0.5),
269279
beta = 8/3
270280
rho = 45
271281

272-
def _lorenz_xyz_euler(duration=4, noise_type='normal', noise_parameters=(0, 0.5), random_seed=1,
273-
dt=0.01, simdt=0.0001, x0=(5, 1, 3), normalize=True):
282+
def _lorenz_xyz_euler(x0=(5, 1, 3), normalize=True):
274283
"""Simulation of Lorenz system with Eular method"""
275284
t = np.arange(0, duration, simdt)
276285

@@ -294,11 +303,10 @@ def _lorenz_xyz_euler(duration=4, noise_type='normal', noise_parameters=(0, 0.5)
294303
xyz /= f # because xyz is one longer than xyz_dot, leave off last entry
295304
xyz_dot /= f
296305

297-
noisy_xyz = np.vstack([_add_noise(xyz[i,:], random_seed+i, noise_type, noise_parameters) for i in range(3)])
306+
noisy_xyz = np.vstack([_add_noise(xyz[i,:], random_seed+i, noise_type, noise_parameters, outliers) for i in range(3)])
298307
return noisy_xyz, xyz, xyz_dot
299308

300-
def _lorenz_xyz_odeint(duration=4, noise_type='normal', noise_parameters=(0, 0.5),
301-
random_seed=1, dt=0.01, simdt=None, normalize=True):
309+
def _lorenz_xyz_odeint(normalize=True):
302310
"""Simulate the Lorenz system with scipy's ODE solver"""
303311
t = np.linspace(0, duration, int(duration/dt))
304312

@@ -320,9 +328,8 @@ def dxyz_dt(xyz, t_): # t_ is the time, unused because the Lorenz system is auto
320328
xyz /= 20
321329
xyz_dot /= 20
322330

323-
noisy_xyz = np.vstack([_add_noise(xyz[i,:], random_seed+i, noise_type, noise_parameters) for i in range(3)])
331+
noisy_xyz = np.vstack([_add_noise(xyz[i,:], random_seed+i, noise_type, noise_parameters, outliers) for i in range(3)])
324332
return noisy_xyz, xyz, xyz_dot
325333

326-
noisy_pos, pos, vel = _lorenz_xyz_euler(duration, noise_type, noise_parameters,
327-
random_seed, dt, simdt)
334+
noisy_pos, pos, vel = _lorenz_xyz_euler()
328335
return noisy_pos[0,:], pos[0,:], vel[0,:]

0 commit comments

Comments
 (0)