Skip to content

Commit b50795e

Browse files
committed
regression_models.py: added classification models to all other architectures; minor bugfix for _feature_trans()
1 parent e4d8809 commit b50795e

1 file changed

Lines changed: 104 additions & 39 deletions

File tree

Code-SPA/regression_models.py

Lines changed: 104 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
# from sklearn.metrics import log_loss
99
from torch import Tensor, LongTensor
1010
from torch.nn import CrossEntropyLoss
11-
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
12-
from sklearn.svm import SVR
11+
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
12+
from sklearn.svm import SVR, SVC
1313
import numpy as np
1414
import warnings
1515
warnings.filterwarnings("ignore") # TODO: Want to just ignore the PLS constant residual warnings, but this will do for now
@@ -125,11 +125,11 @@ def EN_fitting(X, y, X_test, y_test, alpha, l1_ratio, max_iter = 10000, tol = 1e
125125
loss_train = _classification_score(y, pred_class_train, class_weight)
126126
pred_class_test = yhat_test.argmax(axis=1)
127127
loss_test = _classification_score(y_test, pred_class_test, class_weight)
128-
EN_params = EN_model.coef_.squeeze()
128+
EN_params = EN_model.coef_
129129
return (EN_model, EN_params, loss_train, loss_test, yhat_train, yhat_test)
130130

131131
def LCEN_fitting(X, y, X_test, y_test, alpha, l1_ratio, degree = 1, lag = 0, min_lag = 0, trans_type = 'all', interaction = True, selection = None, transform_y = False,
132-
all_pos_X = True, all_pos_y = True, scale_X = True, scale_y = True, classification = False, class_weight = None):
132+
all_pos_X = None, all_pos_y = True, scale_X = True, scale_y = True, classification = False, class_weight = None):
133133
"""
134134
Fits data using the LASSO-Clip-EN (LCEN) algorithm (doi.org/10.48550/arXiv.2402.17120).
135135
LCEN is a powerful, non-linear, and interpretable feature selection algorithm with considerable feature selection capabilities, model accuracy, and speed.
@@ -173,7 +173,7 @@ def LCEN_fitting(X, y, X_test, y_test, alpha, l1_ratio, degree = 1, lag = 0, min
173173
Whether all entries in the X/y data are positive or also contain some negative entries. This is done to avoid taking ...
174174
sqrt(k) for some k<0 when the features are transformed/expanded.
175175
This used to be determined within _feature_trans(), but it leads to array size problems when some folds have negative ...
176-
entries and some do not.
176+
entries and some do not, as the number of features will vary per fold.
177177
scale_X, scale_y : boolean, optional, default = True
178178
Whether to scale the X/y data.
179179
classification : bool, optional, default = False
@@ -182,6 +182,10 @@ def LCEN_fitting(X, y, X_test, y_test, alpha, l1_ratio, degree = 1, lag = 0, min
182182
The class weights for each class.
183183
SPA automatically sets this to an array of ones (equal weights) if the user did not input anything to SPA.main_SPA()
184184
"""
185+
if all_pos_X is None: # all_pos_X is never None, unless this function is called manually by the end-user
186+
all_pos_X = np.all(X >= 0, axis = 0)
187+
if X_test:
188+
all_pos_X = all_pos_X & np.all(X_test >= 0, axis = 0)
185189
# Expanding the X features
186190
if not transform_y and X.shape[1] > 0:
187191
X, X_test, label_names = _feature_trans(X, X_test, degree, interaction, trans_type, all_pos_X, all_pos_y)
@@ -231,24 +235,24 @@ def LCEN_fitting(X, y, X_test, y_test, alpha, l1_ratio, degree = 1, lag = 0, min
231235
if X.shape[1] == 0:
232236
LCEN_model = None
233237
LCEN_params = np.empty(0)
234-
mse_train = np.var(y)
235-
mse_test = np.var(y_test)
238+
loss_train = np.var(y) # TODO: for classification, this is not var(y) but guessing only the majority class and calculating the metrics from there
239+
loss_test = np.var(y_test)
236240
yhat_train = np.zeros(y.shape) # Ok to be 0 because y is scaled --> mean(y) is approximately 0
237241
yhat_test = np.zeros(y_test.shape)
238242
else:
239-
LCEN_model, LCEN_params, mse_train, mse_test, yhat_train, yhat_test = EN_fitting(X, y, X_test, y_test, alpha, l1_ratio, classification = classification, class_weight = class_weight)
243+
LCEN_model, LCEN_params, loss_train, loss_test, yhat_train, yhat_test = EN_fitting(X, y, X_test, y_test, alpha, l1_ratio, classification = classification, class_weight = class_weight)
240244
if not classification:
241245
# Calculating information criteria values
242246
num_train = X.shape[0] # Typically written as n
243247
num_parameter = (LCEN_params!=0).flatten().sum() # Typically written as k
244-
AIC = num_train*np.log(mse_train) + 2*num_parameter # num_train * log(MSE) is one of the formulae to replace L. Shown in https://doi.org/10.1002/wics.1460, for example.
248+
AIC = num_train*np.log(loss_train) + 2*num_parameter # num_train * log(MSE) is one of the formulae to replace L. Shown in https://doi.org/10.1002/wics.1460, for example.
245249
AICc = AIC + 2*num_parameter*(num_parameter+1) / (num_train - num_parameter - 1)
246-
BIC = num_train*np.log(mse_train) + num_parameter*np.log(num_train)
250+
BIC = num_train*np.log(loss_train) + num_parameter*np.log(num_train)
247251
else:
248252
AIC, AICc, BIC = 0, 0, 0
249-
return (LCEN_model, LCEN_params, mse_train, mse_test, yhat_train, yhat_test, label_names, (AIC, AICc, BIC))
253+
return (LCEN_model, LCEN_params, loss_train, loss_test, yhat_train, yhat_test, label_names, (AIC, AICc, BIC))
250254

251-
def forest_fitting(X, y, X_test, y_test, n_estimators = 100, max_depth = 10, min_samples_leaf = 0.1, max_features = 1.0, learning_rate = None, random_state = 0):
255+
def forest_fitting(X, y, X_test, y_test, n_estimators = 100, max_depth = 10, min_samples_leaf = 0.1, max_features = 1.0, learning_rate = None, random_state = 0, classification = False, class_weight = None):
252256
"""
253257
Fits data using a random forest or gradient-boosted decision trees
254258
@@ -274,22 +278,42 @@ def forest_fitting(X, y, X_test, y_test, n_estimators = 100, max_depth = 10, min
274278
If None, a random forest regressor is used.
275279
random_state : int or None, optional, default = 0
276280
Seed for the random number generator.
281+
classification : bool, optional, default = False
282+
Whether to train a model for a classification (True) or regression (False) task.
283+
class_weight : array, optional, default = None
284+
The class weights for each class.
285+
SPA automatically sets this to an array of ones (equal weights) if the user did not input anything to SPA.main_SPA()
277286
"""
278-
if learning_rate is None or learning_rate == 0:
287+
if (learning_rate is None or learning_rate == 0) and not classification:
279288
forest = RandomForestRegressor(n_estimators, max_depth = max_depth, random_state = random_state, max_features = max_features, min_samples_leaf = min_samples_leaf)
280-
else:
289+
elif not classification:
281290
forest = GradientBoostingRegressor(n_estimators = n_estimators, learning_rate = learning_rate, max_depth = max_depth, random_state = random_state,
282291
max_features = max_features, min_samples_leaf = min_samples_leaf)
292+
elif learning_rate is None or learning_rate == 0:
293+
forest = RandomForestClassifier(n_estimators, max_depth = max_depth, random_state = random_state, max_features = max_features, min_samples_leaf = min_samples_leaf)
294+
else:
295+
forest = GradientBoostingClassifier(n_estimators = n_estimators, learning_rate = learning_rate, max_depth = max_depth, random_state = random_state,
296+
max_features = max_features, min_samples_leaf = min_samples_leaf)
283297
forest.fit(X, y.flatten())
284-
# Predictions and MSEs
285-
yhat_train = forest.predict(X)
286-
yhat_test = forest.predict(X_test)
287-
mse_train = MSE(y, yhat_train)
288-
mse_test = MSE(y_test, yhat_test)
298+
# Predictions and metrics
299+
if not classification:
300+
yhat_train = forest.predict(X)
301+
yhat_test = forest.predict(X_test)
302+
loss_train = MSE(y, yhat_train)
303+
loss_test = MSE(y_test, yhat_test)
304+
else:
305+
if class_weight is None:
306+
class_weight = np.ones(len( set(y.squeeze()) ))
307+
yhat_train = forest.predict_proba(X)
308+
yhat_test = forest.predict_proba(X_test)
309+
pred_class_train = yhat_train.argmax(axis=1)
310+
loss_train = _classification_score(y, pred_class_train, class_weight)
311+
pred_class_test = yhat_test.argmax(axis=1)
312+
loss_test = _classification_score(y_test, pred_class_test, class_weight)
289313

290-
return (forest, mse_train, mse_test, yhat_train, yhat_test)
314+
return (forest, loss_train, loss_test, yhat_train, yhat_test)
291315

292-
def SVM_fitting(X, y, X_test, y_test, gamma = 'scale', C = 100, epsilon = 0.1, tol = 1e-4, max_iter = 10000):
316+
def SVM_fitting(X, y, X_test, y_test, gamma = 'scale', C = 100, epsilon = 0.1, tol = 1e-4, max_iter = 10000, classification = False, class_weight = None):
293317
"""
294318
Support Vector Machine-based regression
295319
@@ -306,21 +330,40 @@ def SVM_fitting(X, y, X_test, y_test, gamma = 'scale', C = 100, epsilon = 0.1, t
306330
C : float > 0, optional, default = 100
307331
Parameter inversely proportional to the strength of the regularization
308332
epsilon : float >= 0, optional, default = 0.1
309-
Epsilon-tube within which no penalty is associated in the training loss function
333+
Epsilon-tube within which no penalty is associated in the training loss function.
334+
Relevant only when classification == False.
310335
max_iter : int, optional, default = 10000
311336
The maximum number of iterations
337+
classification : bool, optional, default = False
338+
Whether to train a model for a classification (True) or regression (False) task.
339+
class_weight : array, optional, default = None
340+
The class weights for each class.
341+
SPA automatically sets this to an array of ones (equal weights) if the user did not input anything to SPA.main_SPA()
312342
"""
313-
SVM_model = SVR(gamma = gamma, C = C, epsilon = epsilon, tol = tol, max_iter = max_iter)
343+
if not classification:
344+
SVM_model = SVR(gamma = gamma, C = C, epsilon = epsilon, tol = tol, max_iter = max_iter)
345+
else:
346+
SVM_model = SVC(gamma = gamma, C = C, probability = True, tol = tol, max_iter = max_iter)
314347
SVM_model.fit(X, y.flatten())
315-
# Predictions and MSEs
316-
yhat_train = SVM_model.predict(X)
317-
yhat_test = SVM_model.predict(X_test)
318-
mse_train = MSE(y, yhat_train)
319-
mse_test = MSE(y_test, yhat_test)
348+
# Predictions and metrics
349+
if not classification:
350+
yhat_train = SVM_model.predict(X)
351+
yhat_test = SVM_model.predict(X_test)
352+
loss_train = MSE(y, yhat_train)
353+
loss_test = MSE(y_test, yhat_test)
354+
else:
355+
if class_weight is None:
356+
class_weight = np.ones(len( set(y.squeeze()) ))
357+
yhat_train = SVM_model.predict_proba(X)
358+
yhat_test = SVM_model.predict_proba(X_test)
359+
pred_class_train = yhat_train.argmax(axis=1)
360+
loss_train = _classification_score(y, pred_class_train, class_weight)
361+
pred_class_test = yhat_test.argmax(axis=1)
362+
loss_test = _classification_score(y_test, pred_class_test, class_weight)
320363

321-
return (SVM_model, mse_train, mse_test, yhat_train, yhat_test)
364+
return (SVM_model, loss_train, loss_test, yhat_train, yhat_test)
322365

323-
def AdaBoost_fitting(X, y, X_test, y_test, n_estimators = 50, learning_rate = 0.1, random_state = 0):
366+
def AdaBoost_fitting(X, y, X_test, y_test, n_estimators = 50, learning_rate = 0.1, random_state = 0, classification = False, class_weight = None):
324367
"""
325368
Adaptive Boosting regression
326369
@@ -338,18 +381,36 @@ def AdaBoost_fitting(X, y, X_test, y_test, n_estimators = 50, learning_rate = 0.
338381
If None, a random forest regressor is used.
339382
random_state : int or None, optional, default = 0
340383
Seed for the random number generator.
384+
classification : bool, optional, default = False
385+
Whether to train a model for a classification (True) or regression (False) task.
386+
class_weight : array, optional, default = None
387+
The class weights for each class.
388+
SPA automatically sets this to an array of ones (equal weights) if the user did not input anything to SPA.main_SPA()
341389
"""
342-
AdaB_model = AdaBoostRegressor(n_estimators = n_estimators, learning_rate = learning_rate, loss = 'square', random_state = random_state)
390+
if not classification:
391+
AdaB_model = AdaBoostRegressor(n_estimators = n_estimators, learning_rate = learning_rate, loss = 'square', random_state = random_state)
392+
else:
393+
AdaB_model = AdaBoostClassifier(n_estimators = n_estimators, learning_rate = learning_rate, algorithm = 'SAMME', random_state = random_state)
343394
AdaB_model.fit(X, y.flatten())
344-
# Predictions and MSEs
345-
yhat_train = AdaB_model.predict(X)
346-
yhat_test = AdaB_model.predict(X_test)
347-
mse_train = MSE(y, yhat_train)
348-
mse_test = MSE(y_test, yhat_test)
395+
# Predictions and metrics
396+
if not classification:
397+
yhat_train = AdaB_model.predict(X)
398+
yhat_test = AdaB_model.predict(X_test)
399+
loss_train = MSE(y, yhat_train)
400+
loss_test = MSE(y_test, yhat_test)
401+
else:
402+
if class_weight is None:
403+
class_weight = np.ones(len( set(y.squeeze()) ))
404+
yhat_train = AdaB_model.predict_proba(X)
405+
yhat_test = AdaB_model.predict_proba(X_test)
406+
pred_class_train = yhat_train.argmax(axis=1)
407+
loss_train = _classification_score(y, pred_class_train, class_weight)
408+
pred_class_test = yhat_test.argmax(axis=1)
409+
loss_test = _classification_score(y_test, pred_class_test, class_weight)
349410

350-
return (AdaB_model, mse_train, mse_test, yhat_train, yhat_test)
411+
return (AdaB_model, loss_train, loss_test, yhat_train, yhat_test)
351412

352-
def _feature_trans(X, X_test = None, degree = 2, interaction = True, trans_type = 'all', all_pos_X = True, all_pos_y = True):
413+
def _feature_trans(X, X_test = None, degree = 2, interaction = True, trans_type = 'all', all_pos_X = None, all_pos_y = True):
353414
"""
354415
A helper function that is automatically called by SPA.
355416
Performs non-linear transformations of X (and X_test). Transformations include polynomial transforms up to "degree", ...
@@ -378,9 +439,13 @@ def _feature_trans(X, X_test = None, degree = 2, interaction = True, trans_type
378439
Whether all entries in the data are positive or also contain some negative entries. This is done to avoid taking ...
379440
sqrt(k) for some k<0.
380441
This used to be determined within this function, but it leads to array size problems when some folds have negative ...
381-
entries and some do not.
442+
entries and some do not, as the number of features will vary per fold.
382443
"""
383444
X_test_out = None # Declaring this variable to avoid errors when doing the return statement
445+
if all_pos_X is None: # all_pos_X is never None, unless this function is called manually by the end-user
446+
all_pos_X = np.all(X >= 0, axis = 0) # This assumes [perhaps incorrectly] that all entries of X are indeed X entries (and not y entries that were concatenated because LCEN_transform_y == True)
447+
if X_test: # X_test may be None
448+
all_pos_X = all_pos_X & np.all(X_test >= 0, axis = 0)
384449

385450
# Polynomial transforms (and interaction terms)
386451
poly = PolynomialFeatures(degree, include_bias = False, interaction_only = trans_type.casefold() == 'simple_interaction', order = 'F') # Switching to Fortran order to speed up Elastic Net (and LCEN) [but does not make any significant difference]

0 commit comments

Comments
 (0)