1+ from sklearn .base import BaseEstimator
2+ import copy
3+ import numpy as np
4+
5+ class Iter (BaseEstimator ):
6+
7+ def __init__ (self ,relief_object ,max_iter = 10 ,convergence_threshold = 0.0001 ,beta = 0.1 ):
8+ '''
9+ :param relief_object: Must be an object that implements the standard sklearn fit function, and after fit, has attribute feature_importances_
10+ that can be accessed. Scores must be a 1D np.ndarray of length # of features. The fit function must also be able to
11+ take in an optional 1D np.ndarray 'weights' parameter of length num_features.
12+ :param max_iter: Maximum number of iterations to run
13+ :param convergence_threshold Difference between iteration feature weights to determine convergence
14+ :param beta Learning Rate for Widrow Hoff Weight Update
15+ '''
16+
17+ if not self .check_is_int (max_iter ) or max_iter < 0 :
18+ raise Exception ('max_iter must be a nonnegative integer' )
19+
20+ if not self .check_is_float (convergence_threshold ) or convergence_threshold < 0 :
21+ raise Exception ('convergence_threshold must be a nonnegative float' )
22+
23+ if not self .check_is_float (beta ):
24+ raise Exception ('beta must be a float' )
25+
26+ self .relief_object = relief_object
27+ self .max_iter = max_iter
28+ self .converage_threshold = convergence_threshold
29+ self .rank_absolute = self .relief_object .rank_absolute
30+ self .beta = beta
31+
32+ def fit (self , X , y ):
33+ """Scikit-learn required: Computes the feature importance scores from the training data.
34+ Parameters
35+ ----------
36+ X: array-like {n_samples, n_features} Training instances to compute the feature importance scores from
37+ y: array-like {n_samples} Training labels
38+ Returns
39+ -------
40+ self
41+ """
42+
43+ #Iterate, feeding the resulting weights of the first run into the fit of the next run (how are they translated?)
44+ last_iteration_scores = None
45+ last_last_iteration_scores = None
46+ for i in range (self .max_iter ):
47+ copy_relief_object = copy .deepcopy (self .relief_object )
48+ if i == 0 :
49+ copy_relief_object .fit (X ,y )
50+ last_iteration_scores = copy_relief_object .feature_importances_
51+ elif i == 1 :
52+ if self .rank_absolute :
53+ absolute_weights = np .absolute (last_iteration_scores )
54+ transformed_weights = absolute_weights / np .max (absolute_weights )
55+ else :
56+ transformed_weights = self .transform_weights (last_iteration_scores )
57+ copy_relief_object .fit (X , y , weights = transformed_weights )
58+ if self .has_converged (last_iteration_scores ,copy_relief_object .feature_importances_ ):
59+ last_iteration_scores = copy_relief_object .feature_importances_
60+ break
61+ last_last_iteration_scores = copy .deepcopy (transformed_weights )
62+ last_iteration_scores = copy_relief_object .feature_importances_
63+ else :
64+ if self .rank_absolute :
65+ absolute_weights = np .absolute (last_iteration_scores )
66+ new_weights = absolute_weights / np .max (absolute_weights )
67+ else :
68+ new_weights = self .transform_weights (last_iteration_scores )
69+
70+ transformed_weights = self .widrow_hoff (last_last_iteration_scores ,new_weights ,self .beta )
71+ copy_relief_object .fit (X ,y ,weights = transformed_weights )
72+ if self .has_converged (last_iteration_scores ,copy_relief_object .feature_importances_ ):
73+ last_iteration_scores = copy_relief_object .feature_importances_
74+ break
75+ last_last_iteration_scores = copy .deepcopy (transformed_weights )
76+ last_iteration_scores = copy_relief_object .feature_importances_
77+
78+ #DEBUGGING
79+ #print(last_iteration_scores)
80+
81+ #Save final FI as feature_importances_
82+ self .feature_importances_ = last_iteration_scores
83+
84+ if self .rank_absolute :
85+ self .top_features_ = np .argsort (np .absolute (self .feature_importances_ ))[::- 1 ]
86+ else :
87+ self .top_features_ = np .argsort (self .feature_importances_ )[::- 1 ]
88+
89+ return self
90+
91+ def widrow_hoff (self ,originalw , neww ,beta ):
92+ diff = neww - originalw
93+ return originalw + (beta * diff )
94+
95+ def has_converged (self ,weight1 ,weight2 ):
96+ for i in range (len (weight1 )):
97+ if abs (weight1 [i ] - weight2 [i ]) >= self .converage_threshold :
98+ return False
99+ return True
100+
101+ def transform_weights (self ,weights ):
102+ max_val = np .max (weights )
103+ for i in range (len (weights )):
104+ if weights [i ] < 0 :
105+ weights [i ] = 0
106+ else :
107+ if max_val == 0 :
108+ weights [i ] = 0
109+ else :
110+ weights [i ] = weights [i ]/ max_val
111+ return weights
112+
113+ def check_is_int (self , num ):
114+ try :
115+ n = float (num )
116+ if num - int (num ) == 0 :
117+ return True
118+ else :
119+ return False
120+ except :
121+ return False
122+
123+ def check_is_float (self , num ):
124+ try :
125+ n = float (num )
126+ return True
127+ except :
128+ return False
129+
130+ def transform (self , X ):
131+ if X .shape [1 ] < self .relief_object .n_features_to_select :
132+ raise ValueError ('Number of features to select is larger than the number of features in the dataset.' )
133+
134+ return X [:, self .top_features_ [:self .relief_object .n_features_to_select ]]
135+
136+ def fit_transform (self , X , y ):
137+ self .fit (X , y )
138+ return self .transform (X )
0 commit comments