-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathfit_to_data_streamlit.py
More file actions
194 lines (137 loc) · 6.83 KB
/
fit_to_data_streamlit.py
File metadata and controls
194 lines (137 loc) · 6.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import pandas as pd
import numpy as np
import scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
import streamlit as st
import platform
import math
# from matplotlib.backends.backend_agg import RendererAgg
# _lock = RendererAgg.lock
# https://stackoverflow.com/questions/55212002/how-do-i-use-scipy-optimize-curve-fit-with-panda-df
def main():
st.subheader("Find formula for personal contribution (*basishuur*, the part of the rent you have to pay yourself) in the calculation of the housing allowance")
st.info("A graph was given. Goal was to find a formula to describe the graph, to implement in some code")
if platform.processor() != "":
file = r"C:\Users\rcxsm\Documents\python_scripts\streamlit_scripts\input\eigen_bijdrage2022.csv"
#file = r"C:\Users\rcxsm\Documents\python_scripts\streamlit_scripts\input\zorgtoeslag.csv"
else:
file = "https://raw.githubusercontent.com/rcsmit/streamlit_scripts/main/input/eigen_bijdrage2022.csv"
data = pd.read_csv(
file,
delimiter=",",
low_memory=False,
)
df = pd.read_csv(file)
st.write (df)
xdata = df['x'] #.as_matrix()
ydata = df['y'] #.as_matrix()
def various_functions():
# included for future reference
pass
# Functions to calculate values a,b and c ##########################
def func_(x, a, b, c):
return (a*np.sin(b*x))+(c * np.exp(x))
def derivate(x, a, b, c):
''' First derivate of the sigmoidal function. Might contain an error'''
return (np.exp(b * (-1 * np.exp(-c * x)) - c * x) * a * b * c ) + BASEVALUE
#return a * b * c * np.exp(-b*np.exp(-c*x))*np.exp(-c*x)
def exponential(x, a, r):
'''Exponential growth function.'''
return (a * ((1+r)**x))
def derivate_of_derivate(x,a,b,c):
return a*b*c*(b*c*exp(-c*x) - c)*exp(-b*exp(-c*x) - c*x)
def gaussian(x, a, b, c):
''' Standard Guassian function. Doesnt give results, Not in use'''
return a * np.exp(-np.power(x - b, 2) / (2 * np.power(c, 2)))
def gaussian_2(x, a, b, c):
''' Another gaussian fuctnion. in use
a = height, b = cen (?), c= width '''
return a * np.exp(-((x - b) ** 2) / c)
def growth(x, a, b):
""" Growth model. a is the value at t=0. b is the so-called R number.
Doesnt work. FIX IT """
return np.power(a * 0.5, (x / (4 * (math.log(0.5) / math.log(b)))))
# https://replit.com/@jsalsman/COVID19USlognormals
def lognormal_c(x, s, mu, h): # x, sigma, mean, height
return h * 0.5 * erfc(- (log(x) - mu) / (s * sqrt(2)))
# https://en.wikipedia.org/wiki/Log-normal_distribution#Cumulative_distribution_function
def normal_c(x, s, mu, h): # x, sigma, mean, height
return h * 0.5 * (1 + erf((x - mu) / (s * sqrt(2))))
def sigmoidal(x, a, b, c):
''' Standard sigmoidal function
a = height, b= halfway point, c = growth rate
https://en.wikipedia.org/wiki/sigmoidal_function '''
return a * np.exp(-b * np.exp(-c * x))
def func_(x, a, b):
return (a + b* x)
def func(x, a, b):
#https://wetten.overheid.nl/BWBR0008659/2022-01-01 art. 19.2
return ((a * x**2) + (b *x))
def func_(x, a, r):
'''Exponential growth function.'''
return (a * ((1+r)**x))
# #####################################################################
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xdata, *parameterTuple)
return np.sum((ydata - val) ** 2.0)
def generate_Initial_Parameters():
parameterBounds = []
parameterBounds.append([0.0, 10.0]) # search bounds for a
parameterBounds.append([0.0, 10.0]) # search bounds for b
#parameterBounds.append([0.0, 10.0]) # search bounds for c
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
#popt, pcov = curve_fit(func, xdata, ydata, geneticParameters)
popt, pcov = curve_fit(func, xdata, ydata)
# poptarray
# Optimal values for the parameters so that the sum of the squared residuals
# of f(xdata, *popt) - ydata is minimized.
# pcov2-D array
# The estimated covariance of popt. The diagonals provide the variance of the parameter estimate.
# To compute one standard deviation errors on the parameters use perr = np.sqrt(np.diag(pcov)).
# modelPredictions = func(xdata, *popt)
modelPredictions = func(xdata, *popt)
absError = modelPredictions - ydata
SE = np.square(absError) # squared errors
MSE = np.mean(SE) # mean squared errors
RMSE = np.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (np.var(absError) / np.var(ydata))
st.write()
#st.write(f'Formula: y = ({popt[0]} * ((1+{popt[1]})**x))') # {popt[0]} * x + {popt[1]}')
st.write(f'Formula: y = (({popt[0]} * x^2) + ({popt[1]} *x))')
#st.write(f'Formula: y = {popt[0]} * x + {popt[1]}') # {popt[0]} * x + {popt[1]}')
st.write(f'Root Mean Squared Error, RMSE: {RMSE}' )
st.write(f'R-squared: {Rsquared}')
st.write()
# factor a en b voor berekenen basishuur
# https://download.belastingdienst.nl/toeslagen/docs/berekening_huurtoeslag_tg0831z21fd.pdf
a_2022 = 5.96879*10**-7
b_2022 = 0.002363459319
a_2021= 6.23385*10**-7
b_2021= 0.002453085056
fig = plt.figure()
plt.plot(xdata, func(xdata, a_2021,b_2021), 'b-',
label='ministeriele regeling 2021' )
plt.plot(xdata, func(xdata, a_2022,b_2022), 'b--',
label='ministeriele regeling 2022' )
plt.plot(xdata, func(xdata, *popt), 'r--',
label='fit: a=%5.3f, b=%5.3f' % tuple(popt)) # c=%5.3f
plt.plot(xdata, ydata, 'g-',
label='values' )
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
st.pyplot(fig)
st.info("Official formula is Basishuur = (factor a x rekeninkomen^2) + (factor b x rekeninkomen) + taakstellingsbedrag. https://download.belastingdienst.nl/toeslagen/docs/berekening_huurtoeslag_tg0831z21fd.pdf")
if __name__ == "__main__":
main()