Skip to content

Commit 0785a1b

Browse files
committed
Drop level and weight from the .feature_used_importances_ estimator attribute
The level and weight of the features characterize the analysis process, not the resulting model itself.
1 parent 55b0dfa commit 0785a1b

5 files changed

Lines changed: 24 additions & 50 deletions

File tree

doc/samples/samples_sklearn.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ Samples
7272
print(f"Features selected : {khc.n_features_used_}")
7373
print("Top 3 used features")
7474
for i, feature in enumerate(khc.feature_used_names_[:3]):
75-
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
75+
print(f"{feature} - Importance: {khc.feature_used_importances_[i]}")
7676
print("---")
7777
7878
print("Top 5 used features, among those present in the dataset")
@@ -198,7 +198,7 @@ Samples
198198
print(f"Features selected : {khc.n_features_used_}")
199199
print("Top 3 used features")
200200
for i, feature in enumerate(khc.feature_used_names_[:3]):
201-
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
201+
print(f"{feature} - Importance: {khc.feature_used_importances_[i]}")
202202
print("---")
203203
204204
# Predict the classes on the test dataset
@@ -318,7 +318,7 @@ Samples
318318
print(f"Features selected : {khc.n_features_used_}")
319319
print("Top 3 used features")
320320
for i, feature in enumerate(khc.feature_used_names_[:3]):
321-
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
321+
print(f"{feature} - Importance: {khc.feature_used_importances_[i]}")
322322
print("---")
323323
324324
# Predict the class on the test dataset
@@ -549,7 +549,7 @@ Samples
549549
print(f"Features selected : {khr.n_features_used_}")
550550
print("Top 3 used features")
551551
for i, feature in enumerate(khr.feature_used_names_[:3]):
552-
print(f"{feature} - Importance: {khr.feature_used_importances_[i][2]}")
552+
print(f"{feature} - Importance: {khr.feature_used_importances_[i]}")
553553
print("---")
554554
555555
# Predict the values on the test dataset

khiops/samples/samples_sklearn.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
"print(f\"Features selected : {khc.n_features_used_}\")\n",
5959
"print(\"Top 3 used features\")\n",
6060
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
61-
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
61+
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i]}\")\n",
6262
"print(\"---\")\n",
6363
"\n",
6464
"print(\"Top 5 used features, among those present in the dataset\")\n",
@@ -210,7 +210,7 @@
210210
"print(f\"Features selected : {khc.n_features_used_}\")\n",
211211
"print(\"Top 3 used features\")\n",
212212
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
213-
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
213+
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i]}\")\n",
214214
"print(\"---\")\n",
215215
"\n",
216216
"# Predict the classes on the test dataset\n",
@@ -356,7 +356,7 @@
356356
"print(f\"Features selected : {khc.n_features_used_}\")\n",
357357
"print(\"Top 3 used features\")\n",
358358
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
359-
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
359+
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i]}\")\n",
360360
"print(\"---\")\n",
361361
"\n",
362362
"# Predict the class on the test dataset\n",
@@ -639,7 +639,7 @@
639639
"print(f\"Features selected : {khr.n_features_used_}\")\n",
640640
"print(\"Top 3 used features\")\n",
641641
"for i, feature in enumerate(khr.feature_used_names_[:3]):\n",
642-
" print(f\"{feature} - Importance: {khr.feature_used_importances_[i][2]}\")\n",
642+
" print(f\"{feature} - Importance: {khr.feature_used_importances_[i]}\")\n",
643643
"print(\"---\")\n",
644644
"\n",
645645
"# Predict the values on the test dataset\n",

khiops/samples/samples_sklearn.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def khiops_classifier():
6161
print(f"Features selected : {khc.n_features_used_}")
6262
print("Top 3 used features")
6363
for i, feature in enumerate(khc.feature_used_names_[:3]):
64-
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
64+
print(f"{feature} - Importance: {khc.feature_used_importances_[i]}")
6565
print("---")
6666

6767
print("Top 5 used features, among those present in the dataset")
@@ -193,7 +193,7 @@ def khiops_classifier_text():
193193
print(f"Features selected : {khc.n_features_used_}")
194194
print("Top 3 used features")
195195
for i, feature in enumerate(khc.feature_used_names_[:3]):
196-
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
196+
print(f"{feature} - Importance: {khc.feature_used_importances_[i]}")
197197
print("---")
198198

199199
# Predict the classes on the test dataset
@@ -321,7 +321,7 @@ def khiops_classifier_multitable_snowflake():
321321
print(f"Features selected : {khc.n_features_used_}")
322322
print("Top 3 used features")
323323
for i, feature in enumerate(khc.feature_used_names_[:3]):
324-
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
324+
print(f"{feature} - Importance: {khc.feature_used_importances_[i]}")
325325
print("---")
326326

327327
# Predict the class on the test dataset
@@ -560,7 +560,7 @@ def khiops_regressor():
560560
print(f"Features selected : {khr.n_features_used_}")
561561
print("Top 3 used features")
562562
for i, feature in enumerate(khr.feature_used_names_[:3]):
563-
print(f"{feature} - Importance: {khr.feature_used_importances_[i][2]}")
563+
print(f"{feature} - Importance: {khr.feature_used_importances_[i]}")
564564
print("---")
565565

566566
# Predict the values on the test dataset

khiops/sklearn/estimators.py

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,9 +1567,7 @@ def _fit_training_post_process(self, ds):
15671567
for feature_name in self.feature_names_in_:
15681568
if feature_name in feature_used_names:
15691569
feature_index = np.where(feature_used_names == feature_name)
1570-
feature_importance = self.feature_used_importances_[
1571-
feature_index
1572-
].ravel()[2]
1570+
feature_importance = self.feature_used_importances_[feature_index][0]
15731571
else:
15741572
feature_importance = 0.0
15751573
feature_importances.append(feature_importance)
@@ -1672,10 +1670,7 @@ def get_feature_used_statistics(self, modeling_report):
16721670
[var.name for var in modeling_report.selected_variables]
16731671
)
16741672
feature_used_importances_ = np.array(
1675-
[
1676-
[var.level, var.weight, var.importance]
1677-
for var in modeling_report.selected_variables
1678-
]
1673+
[var.importance for var in modeling_report.selected_variables]
16791674
)
16801675
# Return empty arrays if no selected variables are available
16811676
else:
@@ -1810,20 +1805,10 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
18101805
The number of features used by the classifier.
18111806
feature_used_names_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_, )
18121807
Names of the features used by the classifier.
1813-
feature_used_importances_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_, 3)
1814-
Level, Weight and Importance of the features used by the classifier:
1815-
1816-
- Level: A measure of the predictive importance of the feature taken
1817-
individually. It ranges between 0 (no predictive interest) and 1 (optimal
1818-
predictive importance).
1819-
1820-
- Weight: A measure of the predictive importance of the feature taken relative
1821-
to all features selected by the classifier. It ranges between 0 (little
1822-
contribution to the model) and 1 (large contribution to the model).
1823-
1824-
- Importance: Average of the exact Shapley values of each used feature
1825-
across the training data.
1826-
1808+
feature_used_importances_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_,)
1809+
Importance of the features used by the classifier. The importance is
1810+
computed as the average of the exact Shapley values of each used feature
1811+
across the training dataset.
18271812
is_multitable_model_ : bool
18281813
``True`` if the model was fitted on a multi-table dataset.
18291814
model_ : `.DictionaryDomain`
@@ -2238,20 +2223,10 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
22382223
The number of features used by the classifier.
22392224
feature_used_names_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_, )
22402225
Names of the features used by the classifier.
2241-
feature_used_importances_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_, 3)
2242-
Level, Weight and Importance of the features used by the classifier:
2243-
2244-
- Level: A measure of the predictive importance of the feature taken
2245-
individually. It ranges between 0 (no predictive interest) and 1 (optimal
2246-
predictive importance).
2247-
2248-
- Weight: A measure of the predictive importance of the feature taken relative
2249-
to all features selected by the classifier. It ranges between 0 (little
2250-
contribution to the model) and 1 (large contribution to the model).
2251-
2252-
- Importance: Average of the exact Shapley values of each used feature
2253-
across the training data.
2254-
2226+
feature_used_importances_ : `ndarray <numpy.ndarray>` of shape (n_features_used\_,)
2227+
Importance of the features used by the classifier. The importance is
2228+
computed as the average of the exact Shapley values of each used feature
2229+
across the training dataset.
22552230
is_multitable_model_ : bool
22562231
``True`` if the model was fitted on a multi-table dataset.
22572232
model_ : `.DictionaryDomain`

tests/test_estimator_attributes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import warnings
1010
from os import path
1111

12-
import numpy as np
1312
import pandas as pd
1413

1514
from khiops import core as kh
@@ -89,7 +88,7 @@ def assert_attribute_values_ok(self, model, X, y):
8988
for var in modeling_report.get_snb_predictor().selected_variables
9089
]
9190
feature_used_importances_report = [
92-
[var.level, var.weight, var.importance]
91+
var.importance
9392
for var in modeling_report.get_snb_predictor().selected_variables
9493
]
9594

@@ -113,7 +112,7 @@ def assert_attribute_values_ok(self, model, X, y):
113112
if feature_name in feature_used_names:
114113
feature_index = feature_used_names.index(feature_name)
115114
feature_importances_report.append(
116-
feature_used_importances_report[feature_index][2]
115+
feature_used_importances_report[feature_index]
117116
)
118117
else:
119118
feature_importances_report.append(0.0)

0 commit comments

Comments
 (0)