Skip to content

Commit 55b0dfa

Browse files
committed
Drop feature_evaluated_* and n_features_evaluated_ from sklearn estimators
Indeed, these characterize the analysis process, not the resulting model itself.
1 parent 1f93dbf commit 55b0dfa

5 files changed

Lines changed: 0 additions & 170 deletions

File tree

doc/samples/samples_sklearn.rst

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ Samples
6969
khc.fit(X_train, y_train)
7070
7171
# Show the feature importance info
72-
print(f"Features evaluated: {khc.n_features_evaluated_}")
7372
print(f"Features selected : {khc.n_features_used_}")
7473
print("Top 3 used features")
7574
for i, feature in enumerate(khc.feature_used_names_[:3]):
@@ -196,7 +195,6 @@ Samples
196195
khc.fit(X_train, y_train)
197196
198197
# Show the feature importance info
199-
print(f"Features evaluated: {khc.n_features_evaluated_}")
200198
print(f"Features selected : {khc.n_features_used_}")
201199
print("Top 3 used features")
202200
for i, feature in enumerate(khc.feature_used_names_[:3]):
@@ -317,7 +315,6 @@ Samples
317315
khc.fit(X_train, y_train)
318316
319317
# Show the feature importance info
320-
print(f"Features evaluated: {khc.n_features_evaluated_}")
321318
print(f"Features selected : {khc.n_features_used_}")
322319
print("Top 3 used features")
323320
for i, feature in enumerate(khc.feature_used_names_[:3]):
@@ -549,7 +546,6 @@ Samples
549546
khr.fit(X_train, y_train)
550547
551548
# Show the feature importance info
552-
print(f"Features evaluated: {khr.n_features_evaluated_}")
553549
print(f"Features selected : {khr.n_features_used_}")
554550
print("Top 3 used features")
555551
for i, feature in enumerate(khr.feature_used_names_[:3]):
@@ -676,13 +672,6 @@ Samples
676672
khe = KhiopsEncoder(n_features=10)
677673
khe.fit(X, y)
678674
679-
# Show the feature importance info
680-
print(f"Features evaluated: {khe.n_features_evaluated_}")
681-
print("Top 3 evaluated features")
682-
for i, feature in enumerate(khe.feature_evaluated_names_[:3]):
683-
print(f"{feature} - Level: {khe.feature_evaluated_importances_[i]}")
684-
print("---")
685-
686675
# Transform the train dataset
687676
print("Encoded feature names:")
688677
print(khe.feature_names_out_)

khiops/samples/samples_sklearn.ipynb

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
"khc.fit(X_train, y_train)\n",
5656
"\n",
5757
"# Show the feature importance info\n",
58-
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
5958
"print(f\"Features selected : {khc.n_features_used_}\")\n",
6059
"print(\"Top 3 used features\")\n",
6160
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
@@ -208,7 +207,6 @@
208207
"khc.fit(X_train, y_train)\n",
209208
"\n",
210209
"# Show the feature importance info\n",
211-
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
212210
"print(f\"Features selected : {khc.n_features_used_}\")\n",
213211
"print(\"Top 3 used features\")\n",
214212
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
@@ -355,7 +353,6 @@
355353
"khc.fit(X_train, y_train)\n",
356354
"\n",
357355
"# Show the feature importance info\n",
358-
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
359356
"print(f\"Features selected : {khc.n_features_used_}\")\n",
360357
"print(\"Top 3 used features\")\n",
361358
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
@@ -639,7 +636,6 @@
639636
"khr.fit(X_train, y_train)\n",
640637
"\n",
641638
"# Show the feature importance info\n",
642-
"print(f\"Features evaluated: {khr.n_features_evaluated_}\")\n",
643639
"print(f\"Features selected : {khr.n_features_used_}\")\n",
644640
"print(\"Top 3 used features\")\n",
645641
"for i, feature in enumerate(khr.feature_used_names_[:3]):\n",
@@ -805,13 +801,6 @@
805801
"khe = KhiopsEncoder(n_features=10)\n",
806802
"khe.fit(X, y)\n",
807803
"\n",
808-
"# Show the feature importance info\n",
809-
"print(f\"Features evaluated: {khe.n_features_evaluated_}\")\n",
810-
"print(\"Top 3 evaluated features\")\n",
811-
"for i, feature in enumerate(khe.feature_evaluated_names_[:3]):\n",
812-
" print(f\"{feature} - Level: {khe.feature_evaluated_importances_[i]}\")\n",
813-
"print(\"---\")\n",
814-
"\n",
815804
"# Transform the train dataset\n",
816805
"print(\"Encoded feature names:\")\n",
817806
"print(khe.feature_names_out_)\n",

khiops/samples/samples_sklearn.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def khiops_classifier():
5858
khc.fit(X_train, y_train)
5959

6060
# Show the feature importance info
61-
print(f"Features evaluated: {khc.n_features_evaluated_}")
6261
print(f"Features selected : {khc.n_features_used_}")
6362
print("Top 3 used features")
6463
for i, feature in enumerate(khc.feature_used_names_[:3]):
@@ -191,7 +190,6 @@ def khiops_classifier_text():
191190
khc.fit(X_train, y_train)
192191

193192
# Show the feature importance info
194-
print(f"Features evaluated: {khc.n_features_evaluated_}")
195193
print(f"Features selected : {khc.n_features_used_}")
196194
print("Top 3 used features")
197195
for i, feature in enumerate(khc.feature_used_names_[:3]):
@@ -320,7 +318,6 @@ def khiops_classifier_multitable_snowflake():
320318
khc.fit(X_train, y_train)
321319

322320
# Show the feature importance info
323-
print(f"Features evaluated: {khc.n_features_evaluated_}")
324321
print(f"Features selected : {khc.n_features_used_}")
325322
print("Top 3 used features")
326323
for i, feature in enumerate(khc.feature_used_names_[:3]):
@@ -560,7 +557,6 @@ def khiops_regressor():
560557
khr.fit(X_train, y_train)
561558

562559
# Show the feature importance info
563-
print(f"Features evaluated: {khr.n_features_evaluated_}")
564560
print(f"Features selected : {khr.n_features_used_}")
565561
print("Top 3 used features")
566562
for i, feature in enumerate(khr.feature_used_names_[:3]):
@@ -705,13 +701,6 @@ def khiops_encoder_multitable_snowflake():
705701
khe = KhiopsEncoder(n_features=10)
706702
khe.fit(X, y)
707703

708-
# Show the feature importance info
709-
print(f"Features evaluated: {khe.n_features_evaluated_}")
710-
print("Top 3 evaluated features")
711-
for i, feature in enumerate(khe.feature_evaluated_names_[:3]):
712-
print(f"{feature} - Level: {khe.feature_evaluated_importances_[i]}")
713-
print("---")
714-
715704
# Transform the train dataset
716705
print("Encoded feature names:")
717706
print(khe.feature_names_out_)

khiops/sklearn/estimators.py

Lines changed: 0 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,59 +1449,6 @@ def _fit_training_post_process(self, ds):
14491449
if self.model_main_dictionary_name_ is None:
14501450
raise ValueError("No model dictionary after Khiops call")
14511451

1452-
# Extract, from the preparation reports, the number of evaluated features,
1453-
# their names and their levels
1454-
univariate_preparation_report = self.model_report_.preparation_report
1455-
if self.model_report_.bivariate_preparation_report is not None:
1456-
bivariate_preparation_report = (
1457-
self.model_report_.bivariate_preparation_report
1458-
)
1459-
pair_feature_evaluated_names_ = (
1460-
bivariate_preparation_report.get_variable_pair_names()
1461-
)
1462-
pair_feature_evaluated_levels_ = [
1463-
bivariate_preparation_report.get_variable_pair_statistics(*var).level
1464-
for var in bivariate_preparation_report.get_variable_pair_names()
1465-
]
1466-
else:
1467-
pair_feature_evaluated_names_ = []
1468-
pair_feature_evaluated_levels_ = []
1469-
if self.model_report_.tree_preparation_report is not None:
1470-
tree_preparation_report = self.model_report_.tree_preparation_report
1471-
tree_feature_evaluated_names_ = tree_preparation_report.get_variable_names()
1472-
tree_feature_evaluated_levels_ = [
1473-
tree_preparation_report.get_variable_statistics(var).level
1474-
for var in tree_preparation_report.get_variable_names()
1475-
]
1476-
else:
1477-
tree_feature_evaluated_names_ = []
1478-
tree_feature_evaluated_levels_ = []
1479-
1480-
feature_evaluated_names_ = (
1481-
univariate_preparation_report.get_variable_names()
1482-
+ pair_feature_evaluated_names_
1483-
+ tree_feature_evaluated_names_
1484-
)
1485-
feature_evaluated_importances_ = np.array(
1486-
[
1487-
univariate_preparation_report.get_variable_statistics(var).level
1488-
for var in univariate_preparation_report.get_variable_names()
1489-
]
1490-
+ pair_feature_evaluated_levels_
1491-
+ tree_feature_evaluated_levels_
1492-
)
1493-
1494-
# Sort the features by level
1495-
combined = list(zip(feature_evaluated_names_, feature_evaluated_importances_))
1496-
combined.sort(key=lambda x: x[1], reverse=True)
1497-
1498-
# Set the sklearn attributes
1499-
self.feature_evaluated_names_ = np.array(
1500-
[x[0] for x in combined], dtype=np.dtype("object")
1501-
)
1502-
self.feature_evaluated_importances_ = np.array([x[1] for x in combined])
1503-
self.n_features_evaluated_ = len(combined)
1504-
15051452
def _transform_check_dataset(self, ds):
15061453
assert isinstance(ds, Dataset), "'ds' is not 'Dataset'"
15071454

@@ -1859,13 +1806,6 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
18591806
- the ``n_pairs`` parameter must be left to its default value, 0;
18601807
- the ``n_text_features`` parameter must be set to 0.
18611808
1862-
n_features_evaluated_ : int
1863-
The number of features evaluated by the classifier.
1864-
feature_evaluated_names_ : `ndarray <numpy.ndarray>` of shape (n_features_evaluated\_,)
1865-
Names of the features evaluated by the classifier.
1866-
feature_evaluated_importances_ : `ndarray <numpy.ndarray>` of shape (n_features_evaluated\_,)
1867-
Level of the features evaluated by the classifier.
1868-
See below for a definition of the level.
18691809
n_features_used_ : int
18701810
The number of features used by the classifier.
18711811
feature_used_names_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_, )
@@ -2294,13 +2234,6 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
22942234
- the ``n_pairs`` parameter must be left to its default value, 0;
22952235
- the ``n_text_features`` parameter must be set to 0.
22962236
2297-
n_features_evaluated_ : int
2298-
The number of features evaluated by the classifier.
2299-
feature_evaluated_names_ : `ndarray <numpy.ndarray>` of shape (n_features_evaluated\_,)
2300-
Names of the features evaluated by the classifier.
2301-
feature_evaluated_importances_ : `ndarray <numpy.ndarray>` of shape (n_features_evaluated\_,)
2302-
Level of the features evaluated by the classifier.
2303-
See below for a definition of the level.
23042237
n_features_used_ : int
23052238
The number of features used by the classifier.
23062239
feature_used_names_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_, )
@@ -2561,14 +2494,6 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
25612494
25622495
Attributes
25632496
----------
2564-
n_features_evaluated_ : int
2565-
The number of features evaluated by the classifier.
2566-
feature_evaluated_names_ : `ndarray <numpy.ndarray>` of shape (n_features_evaluated\_,)
2567-
Names of the features evaluated by the classifier.
2568-
feature_evaluated_importances_ : `ndarray <numpy.ndarray>` of shape (n_features_evaluated\_,)
2569-
Level of the features evaluated by the classifier. The Level is measure of the
2570-
predictive importance of the feature taken individually. It ranges between 0 (no
2571-
predictive interest) and 1 (optimal predictive importance).
25722497
is_multitable_model_ : bool
25732498
``True`` if the model was fitted on a multi-table dataset.
25742499
model_ : `.DictionaryDomain`

tests/test_estimator_attributes.py

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -79,68 +79,6 @@ def assert_attribute_values_ok(self, model, X, y):
7979

8080
# Extract the features and their levels from the report
8181
# TODO: Eliminate this as this is the implementation
82-
# Think of a better lighter test: For example verify that the variable are
83-
# in order within the 3 feature lists (simple, pairs and trees).
84-
# Do similarly below with the selected variables.
85-
univariate_preparation_report = model.model_report_.preparation_report
86-
if model.model_report_.bivariate_preparation_report is not None:
87-
bivariate_preparation_report = (
88-
model.model_report_.bivariate_preparation_report
89-
)
90-
pair_feature_evaluated_names_ = (
91-
bivariate_preparation_report.get_variable_pair_names()
92-
)
93-
pair_feature_evaluated_levels_ = [
94-
[
95-
bivariate_preparation_report.get_variable_pair_statistics(
96-
var[0], var[1]
97-
).level
98-
]
99-
for var in bivariate_preparation_report.get_variable_pair_names()
100-
]
101-
else:
102-
pair_feature_evaluated_names_ = []
103-
pair_feature_evaluated_levels_ = []
104-
if model.model_report_.tree_preparation_report is not None:
105-
tree_preparation_report = model.model_report_.tree_preparation_report
106-
tree_feature_evaluated_names_ = tree_preparation_report.get_variable_names()
107-
tree_feature_evaluated_levels_ = [
108-
[tree_preparation_report.get_variable_statistics(var).level]
109-
for var in tree_preparation_report.get_variable_names()
110-
]
111-
else:
112-
tree_feature_evaluated_names_ = []
113-
tree_feature_evaluated_levels_ = []
114-
115-
feature_evaluated_names_report_ = (
116-
univariate_preparation_report.get_variable_names()
117-
+ pair_feature_evaluated_names_
118-
+ tree_feature_evaluated_names_
119-
)
120-
feature_evaluated_importances_report = np.array(
121-
[
122-
[univariate_preparation_report.get_variable_statistics(var).level]
123-
for var in univariate_preparation_report.get_variable_names()
124-
]
125-
+ pair_feature_evaluated_levels_
126-
+ tree_feature_evaluated_levels_
127-
)
128-
129-
# Sort the features by level
130-
combined = list(
131-
zip(feature_evaluated_names_report_, feature_evaluated_importances_report)
132-
)
133-
combined.sort(key=lambda x: x[1], reverse=True)
134-
feature_names = list(x[0] for x in combined)
135-
feature_levels = list(x[1] for x in combined)
136-
137-
# Check that the features and their levels were extracted in order
138-
self.assertEqual(
139-
model.n_features_evaluated_, len(feature_evaluated_names_report_)
140-
)
141-
self.assertEqual(model.feature_evaluated_names_.tolist(), list(feature_names))
142-
self.assertEqual(model.feature_evaluated_importances_.tolist(), feature_levels)
143-
14482
modeling_report = model.model_report_.modeling_report
14583
# Check the selected variables for the regressor and classifier
14684
if not isinstance(model, KhiopsEncoder):

0 commit comments

Comments
 (0)