Skip to content

Commit 1679885

Browse files
Wrapper Update
1 parent 8a26df1 commit 1679885

12 files changed

Lines changed: 716 additions & 431 deletions

File tree

skrebate/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,6 @@
3030
from .surfstar import SURFstar
3131
from .multisurf import MultiSURF
3232
from .multisurfstar import MultiSURFstar
33-
from .turf import TuRF
33+
from .turf import TURF
34+
from .vls import VLS
35+
from .iter import Iter

skrebate/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2525
"""
2626

27-
__version__ = '0.61'
27+
__version__ = '0.7'

skrebate/iter.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
from sklearn.base import BaseEstimator
2+
import copy
3+
import numpy as np
4+
5+
class Iter(BaseEstimator):
6+
7+
def __init__(self,relief_object,max_iter=10,convergence_threshold=0.0001,beta=0.1):
8+
'''
9+
:param relief_object: Must be an object that implements the standard sklearn fit function, and after fit, has attribute feature_importances_
10+
that can be accessed. Scores must be a 1D np.ndarray of length # of features. The fit function must also be able to
11+
take in an optional 1D np.ndarray 'weights' parameter of length num_features.
12+
:param max_iter: Maximum number of iterations to run
13+
:param convergence_threshold Difference between iteration feature weights to determine convergence
14+
:param beta Learning Rate for Widrow Hoff Weight Update
15+
'''
16+
17+
if not self.check_is_int(max_iter) or max_iter < 0:
18+
raise Exception('max_iter must be a nonnegative integer')
19+
20+
if not self.check_is_float(convergence_threshold) or convergence_threshold < 0:
21+
raise Exception('convergence_threshold must be a nonnegative float')
22+
23+
if not self.check_is_float(beta):
24+
raise Exception('beta must be a float')
25+
26+
self.relief_object = relief_object
27+
self.max_iter = max_iter
28+
self.converage_threshold = convergence_threshold
29+
self.rank_absolute = self.relief_object.rank_absolute
30+
self.beta = beta
31+
32+
def fit(self, X, y):
33+
"""Scikit-learn required: Computes the feature importance scores from the training data.
34+
Parameters
35+
----------
36+
X: array-like {n_samples, n_features} Training instances to compute the feature importance scores from
37+
y: array-like {n_samples} Training labels
38+
Returns
39+
-------
40+
self
41+
"""
42+
43+
#Iterate, feeding the resulting weights of the first run into the fit of the next run (how are they translated?)
44+
last_iteration_scores = None
45+
last_last_iteration_scores = None
46+
for i in range(self.max_iter):
47+
copy_relief_object = copy.deepcopy(self.relief_object)
48+
if i == 0:
49+
copy_relief_object.fit(X,y)
50+
last_iteration_scores = copy_relief_object.feature_importances_
51+
elif i == 1:
52+
if self.rank_absolute:
53+
absolute_weights = np.absolute(last_iteration_scores)
54+
transformed_weights = absolute_weights/np.max(absolute_weights)
55+
else:
56+
transformed_weights = self.transform_weights(last_iteration_scores)
57+
copy_relief_object.fit(X, y, weights=transformed_weights)
58+
if self.has_converged(last_iteration_scores,copy_relief_object.feature_importances_):
59+
last_iteration_scores = copy_relief_object.feature_importances_
60+
break
61+
last_last_iteration_scores = copy.deepcopy(transformed_weights)
62+
last_iteration_scores = copy_relief_object.feature_importances_
63+
else:
64+
if self.rank_absolute:
65+
absolute_weights = np.absolute(last_iteration_scores)
66+
new_weights = absolute_weights/np.max(absolute_weights)
67+
else:
68+
new_weights = self.transform_weights(last_iteration_scores)
69+
70+
transformed_weights = self.widrow_hoff(last_last_iteration_scores,new_weights,self.beta)
71+
copy_relief_object.fit(X,y,weights=transformed_weights)
72+
if self.has_converged(last_iteration_scores,copy_relief_object.feature_importances_):
73+
last_iteration_scores = copy_relief_object.feature_importances_
74+
break
75+
last_last_iteration_scores = copy.deepcopy(transformed_weights)
76+
last_iteration_scores = copy_relief_object.feature_importances_
77+
78+
#DEBUGGING
79+
#print(last_iteration_scores)
80+
81+
#Save final FI as feature_importances_
82+
self.feature_importances_ = last_iteration_scores
83+
84+
if self.rank_absolute:
85+
self.top_features_ = np.argsort(np.absolute(self.feature_importances_))[::-1]
86+
else:
87+
self.top_features_ = np.argsort(self.feature_importances_)[::-1]
88+
89+
return self
90+
91+
def widrow_hoff(self,originalw, neww,beta):
92+
diff = neww-originalw
93+
return originalw + (beta*diff)
94+
95+
def has_converged(self,weight1,weight2):
96+
for i in range(len(weight1)):
97+
if abs(weight1[i] - weight2[i]) >= self.converage_threshold:
98+
return False
99+
return True
100+
101+
def transform_weights(self,weights):
102+
max_val = np.max(weights)
103+
for i in range(len(weights)):
104+
if weights[i] < 0:
105+
weights[i] = 0
106+
else:
107+
if max_val == 0:
108+
weights[i] = 0
109+
else:
110+
weights[i] = weights[i]/max_val
111+
return weights
112+
113+
def check_is_int(self, num):
114+
try:
115+
n = float(num)
116+
if num - int(num) == 0:
117+
return True
118+
else:
119+
return False
120+
except:
121+
return False
122+
123+
def check_is_float(self, num):
124+
try:
125+
n = float(num)
126+
return True
127+
except:
128+
return False
129+
130+
def transform(self, X):
131+
if X.shape[1] < self.relief_object.n_features_to_select:
132+
raise ValueError('Number of features to select is larger than the number of features in the dataset.')
133+
134+
return X[:, self.top_features_[:self.relief_object.n_features_to_select]]
135+
136+
def fit_transform(self, X, y):
137+
self.fit(X, y)
138+
return self.transform(X)

skrebate/multisurf.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,15 @@ def _run_algorithm(self):
7171

7272
NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
7373

74-
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
75-
MultiSURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
76-
NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
77-
for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)
74+
if isinstance(self._weights, np.ndarray) and self.weight_final_scores:
75+
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
76+
MultiSURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
77+
NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type, self._weights)
78+
for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)
79+
else:
80+
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
81+
MultiSURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
82+
NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
83+
for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)
7884

7985
return np.array(scores)

skrebate/multisurfstar.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,19 @@ def _run_algorithm(self):
7676
NN_near_list = [i[0] for i in NNlist]
7777
NN_far_list = [i[1] for i in NNlist]
7878

79-
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
80-
MultiSURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
81-
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
82-
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
79+
if isinstance(self._weights, np.ndarray) and self.weight_final_scores:
80+
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
81+
MultiSURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
82+
NN_near, NN_far, self._headers, self._class_type, self._X, self._y,
83+
self._labels_std, self.data_type, self._weights)
84+
for instance_num, NN_near, NN_far in
85+
zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
86+
else:
87+
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
88+
MultiSURFstar_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
89+
NN_near, NN_far, self._headers, self._class_type, self._X, self._y,
90+
self._labels_std, self.data_type)
91+
for instance_num, NN_near, NN_far in
92+
zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
8393

8494
return np.array(scores)

0 commit comments

Comments
 (0)