-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathmodels.py
More file actions
448 lines (364 loc) · 14.4 KB
/
Copy pathmodels.py
File metadata and controls
448 lines (364 loc) · 14.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
"""Statistical Model Classes
Some of these models are implemented and executed using R and it`s built-in stats module
. All those models are self-starting nonlinear models. Nonlinear least-squares method is
used to estimate the parameters.
The package `rpy2` is an interface to R running embedded in a Python process:
https://rpy2.github.io/
Self-starting means the model will make the initial guess. Non-linear fits are sensitive
to the initial guess. If the initial guess is not good following error can get raised by
R: "singular gradient matrix at initial parameter estimates". In this case `rpy2` will
raise an `RRuntimeError`.
"""
# NOTE: All calls to rpy2 need to be wrapped in
# `with robjects.default_converter.context()`
# because of
# https://github.com/rpy2/rpy2/pull/1076/changes#diff-0f477f4ec1a1057412d1907d470a84b42251161f3581383d2713e7775b6a62bf
import os
from abc import ABC, abstractmethod
import numpy as np
import rpy2.robjects.packages as rpackages
from numpy.typing import ArrayLike, DTypeLike
from rpy2 import robjects
from scipy.optimize import curve_fit
from scipy.stats.distributions import t as t_distribution
class BaseStatModel(ABC):
"""Base Statistical Model"""
def __init__(self, xdata: ArrayLike, ydata: ArrayLike):
assert np.shape(xdata) == np.shape(ydata) # noqa
self.xdata = xdata
self.ydata = ydata
self.coefficients = {}
self.fitted_values = None
self.asym_conf_int = None
@property
@abstractmethod
def name(self) -> str:
"""Name of the statistical model"""
pass
@property
@abstractmethod
def function_formula(self) -> str:
"""Function formula of the statistical model"""
pass
@property
@abstractmethod
def asymptote(self) -> str:
pass
@property
def mae(self) -> ArrayLike | DTypeLike:
"""Mean absolute error"""
return np.mean(np.abs(np.subtract(self.ydata, self.fitted_values)))
def as_dict(self):
return {
key: getattr(self, key)
for key in (
"name",
"function_formula",
"asymptote",
"mae",
"xdata",
"ydata",
"coefficients",
"fitted_values",
)
}
class Sigmoid(BaseStatModel):
"""Sigmoid model.
Function formula:
f(x) = L / (1 + e^(-k * (x - x_0)))
Function parameters:
L, the curve's maximum value/asymptotic (the plateaus);
k, the Logistic growth rate or steepness of the curve;
x_0, the x value of the sigmoid's midpoint (inflection point);
Function formula is taken from the Wikipedia article "Logistic function":
https://en.wikipedia.org/wiki/Logistic_function
"""
name = "Sigmoid model"
function_formula = "f(x) = L / (1 + e^(-k * (x - x_0)))"
def __init__(self, xdata, ydata):
super().__init__(xdata, ydata)
# curve_fit: Use non-linear least squares to fit a function, f, to data.
# popt: Optimal values for the parameters as array
# pcov: The estimated covariance of popt
popt, pcov = curve_fit(
self.function,
xdata=xdata,
ydata=ydata,
p0=self.initial_guess(),
bounds=self.bounds(),
)
self.coefficients = {
"x_0": popt[0],
"k": popt[1],
"L": popt[2],
}
self.asym_conf_int = self.confint(popt, pcov)
self.fitted_values = self.function(xdata, **self.coefficients)
def function(
self,
x: ArrayLike,
x_0: DTypeLike,
k: DTypeLike,
L: DTypeLike,
) -> ArrayLike:
"""Sigmoid function."""
return L / (1 + np.exp(-k * (x - x_0)))
def initial_guess(self) -> tuple:
"""Get an initial guess on parameters for single Sigmoid function."""
x_0 = self.xdata.size / 2
k = 0
L = self.ydata.max(initial=0)
return x_0, k, L
def bounds(self) -> tuple:
"""Get lower and upper bounds of the parameters for single Sigmoid function."""
x_0_upper_bound = self.xdata.size * 1.5
x_0_lower_bound = 0.0
k_upper_bound = 1.0
k_lower_bound = -1.0
L_upper_bound = self.ydata.max(initial=0)
L_lower_bound = 0.0
return (
(x_0_lower_bound, k_lower_bound, L_lower_bound),
(x_0_upper_bound, k_upper_bound, L_upper_bound),
)
@property
def asymptote(self):
return self.coefficients["L"]
@property
def inflection_point(self):
return self.coefficients["x_0"]
def confint(
self,
popt: ArrayLike,
pcov: ArrayLike,
par_pos: int = 2,
level: float = 0.95,
) -> np.array:
"""Confidence Intervals for a certain Model Parameter
Args:
pcov: The estimated covariance of the optimal values for the parameters
par_pos: Parameter position (index)
level: Confidence level
Returns:
ArrayLike: lower (first element) and upper (second element) confidence
limits of the model parameter
"""
alpha = 1.0 - level
# 3 is the number of parameters
degrees_of_freedom = len(self.ydata) - 3
tval = t_distribution.ppf(1.0 - alpha / 2.0, degrees_of_freedom)
perr = np.sqrt(np.diag(pcov)) # standard deviation errors
lower = popt - perr * tval
upper = popt + perr * tval
return np.array([lower[par_pos], upper[par_pos]])
class SSlogis(BaseStatModel):
"""Self-Starting Nls Logistic Model.
Function Formula:
asym / (1 + e^((xmid - x) / scal))
Function Parameters:
x, numeric vector of values at which to evaluate the model;
asym, numeric parameter representing the asymptote;
xmid, numeric parameter representing the x value at the inflection point of the
curve. The value of SSlogis will be Asym/2 at xmid;
scal, a numeric scale parameter on the input axis;
Function formula and parameter description taken from R Documentation:
"SSlogis: Self-Starting Nls Logistic Model":
https://rdrr.io/r/stats/SSlogis.html
"""
name = "Nls Logistic Model"
function_formula = "asym / (1 + e^((xmid - x) / scal))"
def __init__(self, xdata, ydata):
super().__init__(xdata, ydata)
with robjects.default_converter.context():
rstats = rpackages.importr("stats")
fmla = robjects.Formula("y ~ SSlogis(x, Asym, xmid, scal)")
env = fmla.environment
env["x"] = robjects.FloatVector(xdata)
env["y"] = robjects.FloatVector(ydata)
fm = rstats.nls(fmla)
coef = np.array(rstats.coef(fm))
self.coefficients = {
"Asym": coef[0],
"xmid": coef[1],
"scal": coef[2],
}
# Confidence interval of asymptote
self.asym_conf_int = np.array(rstats.confint(fm, "Asym", 0.95))
self.fitted_values = np.array(rstats.fitted(fm))
@property
def asymptote(self):
return self.coefficients["Asym"]
@property
def inflection_point(self):
return self.coefficients["xmid"]
class SSdoubleS(BaseStatModel):
"""Two-Steps-Sigmoidal Model (Tangens Hyperbolicus)
Function Formula:
e + (f - e) * 1 / 2 * (np.tanh(k * (x - b)) + 1)
+ (Z - f) * 1 / 2 * (np.tanh(k * (x - c)) + 1)
Function parameters:
Z, numeric parameter representing the asymptote;
"""
name = "Two-Steps-Sigmoidal Model (Tangens Hyperbolicus)"
function_formula = (
"e + (f - e) * 1 / 2 * (np.tanh(k * (x - b)) + 1)"
+ "+ (Z - f) * 1 / 2 * (np.tanh(k * (x - c)) + 1)"
)
def __init__(self, xdata, ydata):
super().__init__(xdata, ydata)
if xdata.min(initial=0) == 0:
xdata = xdata + 1
if ydata.min(initial=0) == 0:
ydata = ydata + 1
with robjects.default_converter.context():
rstats = rpackages.importr("stats")
fp = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "ssdoubles.R"
)
with open(fp, "r") as f:
ssdoubles = f.read()
robjects.r(ssdoubles)
fmla = robjects.Formula("y ~ SSdoubleS(x, e, f, k, b, Z, c)")
env = fmla.environment
env["x"] = robjects.FloatVector(xdata)
env["y"] = robjects.FloatVector(ydata)
fm = rstats.nls(fmla)
coef = np.array(rstats.coef(fm))
self.coefficients = {
"e": coef[0],
"f": coef[1],
"k": coef[2],
"b": coef[3],
"Z": coef[4],
"c": coef[5],
}
self.asym_conf_int = np.array(rstats.confint(fm, "Z", 0.95))
# Substract 1 from fitted values to adjust manipulated ydata (ydata + 1)
self.fitted_values = np.array(rstats.fitted(fm)) - 1
@property
def asymptote(self):
return self.coefficients["Z"]
@property
def inflection_point(self):
return self.coefficients["f"]
class SSfpl(BaseStatModel):
"""Self-Starting Nls Four-Parameter Logistic Model.
Function Formula:
A + (B - A) / (1 + e^((xmid - x) / scal))
Function Parameters:
x, numeric vector of values at which to evaluate the model;
A, numeric parameter representing the horizontal asymptote on the left
side (very small values of x);
B, numeric parameter representing the horizontal asymptote on the right
side (very large values of x);
xmid, numeric parameter representing the `x` value at the inflection
point of the curve. The value of SSfpl will be midway between A and B
at xmid;
scal, numeric scale parameter on the input axis;
Function formula and parameter description taken from R Documentation
"SSfpl: Self-Starting Nls Four-Parameter Logistic Model":
https://rdrr.io/r/stats/SSfpl.html
"""
name = "Nls Four-Parameter Logistic Model"
function_formula = "A + (B - A) / (1 + e^((xmid - x) / scal))"
def __init__(self, xdata, ydata):
super().__init__(xdata, ydata)
with robjects.default_converter.context():
rstats = rpackages.importr("stats")
fmla = robjects.Formula("y ~ SSfpl(x, A, B, xmid, scal)")
env = fmla.environment
env["x"] = robjects.FloatVector(xdata)
env["y"] = robjects.FloatVector(ydata)
fm = rstats.nls(fmla)
coef = np.array(rstats.coef(fm))
self.coefficients = {
"A": coef[0],
"B": coef[1],
"xmid": coef[2],
"scal": coef[3],
}
self.asym_conf_int = np.array(rstats.confint(fm, "B", 0.95))
self.fitted_values = np.array(rstats.fitted(fm))
@property
def asymptote(self):
return self.coefficients["B"]
@property
def inflection_point(self):
return self.coefficients["xmid"]
class SSasymp(BaseStatModel):
"""Self-Starting Nls Asymptotic Regression Model.
Function Formula:
asym + (R0 - asym) * e^(-e^(lrc) * x)
Function Parameters:
x, numeric vector of values at which to evaluate the model;
asym, numeric parameter representing the horizontal asymptote on the right
side (very large values of `x`);
R0, numeric parameter representing the response when `x` is zero;
lrc, numeric parameter representing the natural logarithm of the rate
constant;
Function formula and parameter description taken from R Documentation
"SSasymp: Self-Starting Nls Asymptotic Regression Model":
https://rdrr.io/r/stats/SSasymp.html
"""
name = "Nls Asymptotic Regression Model"
function_formula = "asym + (R0 - asym) * e^(-e^(lrc) * x)"
def __init__(self, xdata, ydata):
super().__init__(xdata, ydata)
with robjects.default_converter.context():
rstats = rpackages.importr("stats")
fmla = robjects.Formula("y ~ SSasymp(x, asym, R0, lrc)")
env = fmla.environment
env["x"] = robjects.FloatVector(xdata)
env["y"] = robjects.FloatVector(ydata)
fm = rstats.nls(fmla)
coef = np.array(rstats.coef(fm))
self.coefficients = {
"asym": coef[0],
"R0": coef[1],
"lrc": coef[2],
}
self.asym_conf_int = np.array(rstats.confint(fm, "asym", 0.95))
self.fitted_values = np.array(rstats.fitted(fm))
@property
def asymptote(self):
return self.coefficients["asym"]
class SSmicmen(BaseStatModel):
"""Self-Starting Nls Michaelis-Menten Model
Function Formula
Vm * x / (K + x)
Parameters
Vm, numeric parameter representing the maximum value of the response
(the curve's maximum value/asymptotic);
K, numeric parameter representing the input value at which half the maximum
response is attained (Michaelis constant);
Function formula and parameter description is taken from the R documentation
"SSmicmen: Self-Starting Nls Michaelis-Menten Model":
https://rdrr.io/r/stats/SSmicmen.html
"""
name = "Nls Michaelis-Menten Model"
function_formula = "Vm * x / (K + x)"
def __init__(self, xdata, ydata):
# Model fails when xdata or ydata includes zero
super().__init__(xdata, ydata)
if xdata.min(initial=0) == 0:
xdata = xdata + 1
if ydata.min(initial=0) == 0:
ydata = ydata + 1
with robjects.default_converter.context():
rstats = rpackages.importr("stats")
fmla = robjects.Formula("y ~ SSmicmen(x, Vm, K)")
env = fmla.environment
env["x"] = robjects.FloatVector(xdata)
env["y"] = robjects.FloatVector(ydata)
fm = rstats.nls(fmla)
coef = np.array(rstats.coef(fm))
self.coefficients = {
"Vm": coef[0],
"K": coef[1],
}
self.asym_conf_int = np.array(rstats.confint(fm, "Vm", 0.95))
# Substract 1 from fitted values to adjust manipulated ydata (ydata + 1)
self.fitted_values = np.array(rstats.fitted(fm)) - 1
@property
def asymptote(self):
return self.coefficients["Vm"]