diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
index b9689e306..8c97065a9 100755
--- a/docs/sources/CHANGELOG.md
+++ b/docs/sources/CHANGELOG.md
@@ -18,7 +18,10 @@ The CHANGELOG for the current development version is available at
##### New Features
-The fit method of the SequentialFeatureSelector now optionally accepts **fit_params for the estimator that is used for the feature selection. ([#350](https://github.com/rasbt/mlxtend/pull/350) by Zach Griffith)
+- The fit method of the ExhaustiveFeatureSelector now optionally accepts
+ **fit_params for the estimator that is used for the feature selection. ([#354](https://github.com/rasbt/mlxtend/pull/354) by Zach Griffith)
+- The fit method of the SequentialFeatureSelector now optionally accepts
+ **fit_params for the estimator that is used for the feature selection. ([#350](https://github.com/rasbt/mlxtend/pull/350) by Zach Griffith)
- -
diff --git a/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb b/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb
index 4648936c7..51368635e 100644
--- a/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb
+++ b/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb
@@ -1666,7 +1666,7 @@
"\n",
"
\n",
"\n",
- "*fit(X, y)*\n",
+ "*fit(X, y, **fit_params)*\n",
"\n",
"Perform feature selection and learn model from training data.\n",
"\n",
@@ -1681,6 +1681,10 @@
"\n",
" Target values.\n",
"\n",
+ "- `fit_params` : dict of string -> object, optional\n",
+ "\n",
+ " Parameters to pass to to the fit method of classifier.\n",
+ "\n",
"**Returns**\n",
"\n",
"- `self` : object\n",
@@ -1688,7 +1692,7 @@
"\n",
"
\n",
"\n",
- "*fit_transform(X, y)*\n",
+ "*fit_transform(X, y, **fit_params)*\n",
"\n",
"Fit to training data and return the best selected features from X.\n",
"\n",
@@ -1699,6 +1703,14 @@
" Training vectors, where n_samples is the number of samples and\n",
" n_features is the number of features.\n",
"\n",
+ "- `y` : array-like, shape = [n_samples]\n",
+ "\n",
+ " Target values.\n",
+ "\n",
+ "- `fit_params` : dict of string -> object, optional\n",
+ "\n",
+ " Parameters to pass to to the fit method of classifier.\n",
+ "\n",
"**Returns**\n",
"\n",
"Feature subset of X, shape={n_samples, k_features}\n",
@@ -1815,7 +1827,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.1"
+ "version": "3.6.3"
}
},
"nbformat": 4,
diff --git a/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb b/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb
index 107f4fc04..61528a92d 100644
--- a/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb
+++ b/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb
@@ -1592,6 +1592,10 @@
" Training vectors, where n_samples is the number of samples and\n",
" n_features is the number of features.\n",
"\n",
+ "- `y` : array-like, shape = [n_samples]\n",
+ "\n",
+ " Target values.\n",
+ "\n",
"- `fit_params` : dict of string -> object, optional\n",
"\n",
" Parameters to pass to to the fit method of classifier.\n",
diff --git a/mlxtend/feature_selection/exhaustive_feature_selector.py b/mlxtend/feature_selection/exhaustive_feature_selector.py
index e71fe8e02..41a2f3192 100644
--- a/mlxtend/feature_selection/exhaustive_feature_selector.py
+++ b/mlxtend/feature_selection/exhaustive_feature_selector.py
@@ -25,16 +25,17 @@
from sklearn.externals.joblib import Parallel, delayed
-def _calc_score(selector, X, y, indices):
+def _calc_score(selector, X, y, indices, **fit_params):
if selector.cv:
scores = cross_val_score(selector.est_,
X[:, indices], y,
cv=selector.cv,
scoring=selector.scorer,
n_jobs=1,
- pre_dispatch=selector.pre_dispatch)
+ pre_dispatch=selector.pre_dispatch,
+ fit_params=fit_params)
else:
- selector.est_.fit(X[:, indices], y)
+ selector.est_.fit(X[:, indices], y, **fit_params)
scores = np.array([selector.scorer(selector.est_, X[:, indices], y)])
return indices, scores
@@ -127,7 +128,7 @@ def __init__(self, estimator, min_features=1, max_features=1,
self.est_ = self.estimator
self.fitted = False
- def fit(self, X, y):
+ def fit(self, X, y, **fit_params):
"""Perform feature selection and learn model from training data.
Parameters
@@ -137,6 +138,8 @@ def fit(self, X, y):
n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
+ fit_params : dict of string -> object, optional
+ Parameters to pass to to the fit method of classifier.
Returns
-------
@@ -160,41 +163,42 @@ def fit(self, X, y):
raise AttributeError('min_features must be <= max_features')
candidates = chain(*((combinations(range(X.shape[1]), r=i))
- for i in range(self.min_features,
- self.max_features + 1)))
+ for i in range(self.min_features,
+ self.max_features + 1)))
self.subsets_ = {}
-
+
def ncr(n, r):
"""Return the number of combinations of length r from n items.
-
+
Parameters
----------
n : {integer}
Total number of items
r : {integer}
Number of items to select from n
-
+
Returns
-------
Number of combinations, integer
-
+
"""
-
+
r = min(r, n-r)
if r == 0:
return 1
numer = reduce(op.mul, range(n, n-r, -1))
denom = reduce(op.mul, range(1, r+1))
return numer//denom
-
+
all_comb = np.sum([ncr(n=X.shape[1], r=i)
for i in range(self.min_features,
self.max_features + 1)])
-
+
n_jobs = min(self.n_jobs, all_comb)
parallel = Parallel(n_jobs=n_jobs, pre_dispatch=self.pre_dispatch)
- work = enumerate(parallel(delayed(_calc_score)(self, X, y, c)
+ work = enumerate(parallel(delayed(_calc_score)
+ (self, X, y, c, **fit_params)
for c in candidates))
for iteration, (c, cv_scores) in work:
@@ -239,7 +243,7 @@ def transform(self, X):
self._check_fitted()
return X[:, self.best_idx_]
- def fit_transform(self, X, y):
+ def fit_transform(self, X, y, **fit_params):
"""Fit to training data and return the best selected features from X.
Parameters
@@ -247,13 +251,17 @@ def fit_transform(self, X, y):
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
+ y : array-like, shape = [n_samples]
+ Target values.
+ fit_params : dict of string -> object, optional
+ Parameters to pass to to the fit method of classifier.
Returns
-------
Feature subset of X, shape={n_samples, k_features}
"""
- self.fit(X, y)
+ self.fit(X, y, **fit_params)
return self.transform(X)
def get_metric_dict(self, confidence_interval=0.95):
diff --git a/mlxtend/feature_selection/sequential_feature_selector.py b/mlxtend/feature_selection/sequential_feature_selector.py
index 2f8aed793..3f7fa8903 100644
--- a/mlxtend/feature_selection/sequential_feature_selector.py
+++ b/mlxtend/feature_selection/sequential_feature_selector.py
@@ -481,6 +481,8 @@ def fit_transform(self, X, y, **fit_params):
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
+ y : array-like, shape = [n_samples]
+ Target values.
fit_params : dict of string -> object, optional
Parameters to pass to to the fit method of classifier.
diff --git a/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py b/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py
index 82f796d14..d9dc6eecd 100644
--- a/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py
+++ b/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py
@@ -8,6 +8,7 @@
import numpy as np
from numpy.testing import assert_almost_equal
from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS
+from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from mlxtend.classifier import SoftmaxRegression
from sklearn.datasets import load_iris
@@ -164,6 +165,40 @@ def test_knn_cv3():
assert round(efs1.best_score_, 4) == 0.9728
+def test_fit_params():
+ iris = load_iris()
+ X = iris.data
+ y = iris.target
+ sample_weight = np.ones(X.shape[0])
+ forest = RandomForestClassifier(n_estimators=100, random_state=123)
+ efs1 = EFS(forest,
+ min_features=3,
+ max_features=3,
+ scoring='accuracy',
+ cv=4,
+ print_progress=False)
+ efs1 = efs1.fit(X, y, sample_weight=sample_weight)
+ expect = {0: {'feature_idx': (0, 1, 2),
+ 'cv_scores': np.array([0.94871795, 0.92307692,
+ 0.91666667, 0.97222222]),
+ 'avg_score': 0.9401709401709402},
+ 1: {'feature_idx': (0, 1, 3),
+ 'cv_scores': np.array([0.92307692, 0.92307692,
+ 0.88888889, 1.]),
+ 'avg_score': 0.9337606837606838},
+ 2: {'feature_idx': (0, 2, 3),
+ 'cv_scores': np.array([0.97435897, 0.94871795,
+ 0.94444444, 0.97222222]),
+ 'avg_score': 0.9599358974358974},
+ 3: {'feature_idx': (1, 2, 3),
+ 'cv_scores': np.array([0.97435897, 0.94871795,
+ 0.91666667, 1.]),
+ 'avg_score': 0.9599358974358974}}
+ dict_compare_utility(d1=expect, d2=efs1.subsets_)
+ assert efs1.best_idx_ == (0, 2, 3)
+ assert round(efs1.best_score_, 4) == 0.9599
+
+
def test_regression():
boston = load_boston()
X, y = boston.data[:, [1, 2, 6, 8, 12]], boston.target