Skip to content

Commit c3f1e7b

Browse files
authored
Merge pull request #544 from KhiopsML/533-add-max-parts-supervised-estimators
Add the `n_feature_parts` parameter to the supervised estimators
2 parents 26eff62 + 1275356 commit c3f1e7b

File tree

3 files changed

+54
-8
lines changed

3 files changed

+54
-8
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
- Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
77
- Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.
88

9+
## Unreleased
10+
11+
### Added
12+
- (`sklearn`) `n_feature_parts` parameter to the supervised estimators
13+
914
## 11.0.0.2 - 2026-01-26
1015

1116
## Fixed

khiops/sklearn/estimators.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def _check_pair_parameters(estimator):
180180
if not isinstance(estimator.n_pairs, int):
181181
raise TypeError(type_error_message("n_pairs", estimator.n_pairs, int))
182182
if estimator.n_pairs < 0:
183-
raise ValueError("'n_pairs' must be positive")
183+
raise ValueError("'n_pairs' must be non-negative")
184184
if estimator.specific_pairs is not None:
185185
if not is_list_like(estimator.specific_pairs):
186186
raise TypeError(
@@ -955,7 +955,7 @@ def _simplify(
955955
type_error_message("'max_part_numbers' values", value, int)
956956
)
957957
elif value < 0:
958-
raise ValueError("'max_part_numbers' values must be positive")
958+
raise ValueError("'max_part_numbers' values must be non-negative")
959959
# Create temporary directory and tables
960960
computation_dir = self._create_computation_dir("simplify")
961961
output_dir = self._get_output_dir(computation_dir)
@@ -1195,6 +1195,7 @@ def __init__(
11951195
specific_pairs=None,
11961196
all_possible_pairs=True,
11971197
construction_rules=None,
1198+
n_feature_parts=0,
11981199
verbose=False,
11991200
output_dir=None,
12001201
auto_sort=True,
@@ -1211,6 +1212,7 @@ def __init__(
12111212
self.specific_pairs = specific_pairs
12121213
self.all_possible_pairs = all_possible_pairs
12131214
self.construction_rules = construction_rules
1215+
self.n_feature_parts = n_feature_parts
12141216
self._original_target_dtype = None
12151217
self._predicted_target_meta_data_tag = None
12161218
self._khiops_baseline_model_prefix = None
@@ -1270,17 +1272,17 @@ def _fit_check_params(self, ds, **kwargs):
12701272
if not isinstance(self.n_features, int):
12711273
raise TypeError(type_error_message("n_features", self.n_features, int))
12721274
if self.n_features < 0:
1273-
raise ValueError("'n_features' must be positive")
1275+
raise ValueError("'n_features' must be non-negative")
12741276
if not isinstance(self.n_trees, int):
12751277
raise TypeError(type_error_message("n_trees", self.n_trees, int))
12761278
if self.n_trees < 0:
1277-
raise ValueError("'n_trees' must be positive")
1279+
raise ValueError("'n_trees' must be non-negative")
12781280
if not isinstance(self.n_text_features, int):
12791281
raise TypeError(
12801282
type_error_message("n_text_features", self.n_text_features, int)
12811283
)
12821284
if self.n_text_features < 0:
1283-
raise ValueError("'n_text_features' must be positive")
1285+
raise ValueError("'n_text_features' must be non-negative")
12841286
if not isinstance(self.type_text_features, str):
12851287
raise TypeError(
12861288
type_error_message("type_text_features", self.type_text_features, str)
@@ -1300,6 +1302,12 @@ def _fit_check_params(self, ds, **kwargs):
13001302
for rule in self.construction_rules:
13011303
if not isinstance(rule, str):
13021304
raise TypeError(type_error_message(rule, rule, str))
1305+
if not isinstance(self.n_feature_parts, int):
1306+
raise TypeError(
1307+
type_error_message("n_feature_parts", self.n_feature_parts, int)
1308+
)
1309+
if self.n_feature_parts < 0:
1310+
raise ValueError("'n_feature_parts' must be non-negative")
13031311

13041312
def _fit_train_model(self, ds, computation_dir, **kwargs):
13051313
# Train the model with Khiops
@@ -1384,6 +1392,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
13841392
kwargs["max_trees"] = kwargs.pop("n_trees")
13851393
kwargs["max_text_features"] = kwargs.pop("n_text_features")
13861394
kwargs["text_features"] = kwargs.pop("type_text_features")
1395+
kwargs["max_parts"] = kwargs.pop("n_feature_parts")
13871396

13881397
# Add the additional_data_tables parameter
13891398
kwargs["additional_data_tables"] = additional_data_tables
@@ -1513,6 +1522,7 @@ def __init__(
15131522
specific_pairs=None,
15141523
all_possible_pairs=True,
15151524
construction_rules=None,
1525+
n_feature_parts=0,
15161526
verbose=False,
15171527
output_dir=None,
15181528
auto_sort=True,
@@ -1525,6 +1535,7 @@ def __init__(
15251535
specific_pairs=specific_pairs,
15261536
all_possible_pairs=all_possible_pairs,
15271537
construction_rules=construction_rules,
1538+
n_feature_parts=n_feature_parts,
15281539
verbose=verbose,
15291540
output_dir=output_dir,
15301541
auto_sort=auto_sort,
@@ -1624,9 +1635,9 @@ def _fit_check_params(self, ds, **kwargs):
16241635

16251636
# Check estimator parameters
16261637
if self.n_evaluated_features < 0:
1627-
raise ValueError("'n_evaluated_features' must be positive")
1638+
raise ValueError("'n_evaluated_features' must be non-negative")
16281639
if self.n_selected_features < 0:
1629-
raise ValueError("'n_selected_features' must be positive")
1640+
raise ValueError("'n_selected_features' must be non-negative")
16301641

16311642

16321643
# Note: scikit-learn **requires** inherit first the mixins and then other classes
@@ -1685,7 +1696,10 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
16851696
construction_rules : list of str, optional
16861697
Allowed rules for the automatic feature construction. If not set, Khiops
16871698
uses the multi-table construction rules listed in
1688-
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`
1699+
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
1700+
n_feature_parts : int, default 0
1701+
Maximum number of variable parts produced by preprocessing methods. If equal
1702+
to 0 it is automatically calculated.
16891703
group_target_value : bool, default ``False``
16901704
Allows grouping of the target values in classification. It can substantially
16911705
increase the training time.
@@ -1744,6 +1758,7 @@ def __init__(
17441758
specific_pairs=None,
17451759
all_possible_pairs=True,
17461760
construction_rules=None,
1761+
n_feature_parts=0,
17471762
group_target_value=False,
17481763
verbose=False,
17491764
output_dir=None,
@@ -1757,6 +1772,7 @@ def __init__(
17571772
n_selected_features=n_selected_features,
17581773
n_evaluated_features=n_evaluated_features,
17591774
construction_rules=construction_rules,
1775+
n_feature_parts=n_feature_parts,
17601776
verbose=verbose,
17611777
output_dir=output_dir,
17621778
auto_sort=auto_sort,
@@ -2086,6 +2102,9 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
20862102
Allowed rules for the automatic feature construction. If not set, Khiops
20872103
uses the multi-table construction rules listed in
20882104
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
2105+
n_feature_parts : int, default 0
2106+
Maximum number of variable parts produced by preprocessing methods. If equal
2107+
to 0 it is automatically calculated.
20892108
verbose : bool, default ``False``
20902109
If ``True`` it prints debug information and it does not erase temporary files
20912110
when fitting, predicting or transforming.
@@ -2129,6 +2148,7 @@ def __init__(
21292148
n_selected_features=0,
21302149
n_evaluated_features=0,
21312150
construction_rules=None,
2151+
n_feature_parts=0,
21322152
verbose=False,
21332153
output_dir=None,
21342154
auto_sort=True,
@@ -2141,6 +2161,7 @@ def __init__(
21412161
n_selected_features=n_selected_features,
21422162
n_evaluated_features=n_evaluated_features,
21432163
construction_rules=construction_rules,
2164+
n_feature_parts=n_feature_parts,
21442165
verbose=verbose,
21452166
output_dir=output_dir,
21462167
auto_sort=auto_sort,
@@ -2296,6 +2317,9 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
22962317
Allowed rules for the automatic feature construction. If not set, Khiops
22972318
uses the multi-table construction rules listed in
22982319
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
2320+
n_feature_parts : int, default 0
2321+
Maximum number of variable parts produced by preprocessing methods. If equal
2322+
to 0 it is automatically calculated.
22992323
informative_features_only : bool, default ``True``
23002324
If ``True`` keeps only informative features.
23012325
group_target_value : bool, default ``False``
@@ -2374,6 +2398,7 @@ def __init__(
23742398
specific_pairs=None,
23752399
all_possible_pairs=True,
23762400
construction_rules=None,
2401+
n_feature_parts=0,
23772402
informative_features_only=True,
23782403
group_target_value=False,
23792404
keep_initial_variables=False,
@@ -2390,6 +2415,7 @@ def __init__(
23902415
n_text_features=n_text_features,
23912416
type_text_features=type_text_features,
23922417
construction_rules=construction_rules,
2418+
n_feature_parts=n_feature_parts,
23932419
verbose=verbose,
23942420
output_dir=output_dir,
23952421
auto_sort=auto_sort,

tests/test_sklearn.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,7 @@ def setUpClass(cls):
762762
"specific_pairs": [("age", "race")],
763763
"all_possible_pairs": False,
764764
"construction_rules": ["TableMode", "TableSelection"],
765+
"max_parts": 3,
765766
"group_target_value": False,
766767
"additional_data_tables": {},
767768
}
@@ -790,6 +791,7 @@ def setUpClass(cls):
790791
"max_selected_variables": 1,
791792
"max_evaluated_variables": 3,
792793
"construction_rules": ["TableMode", "TableSelection"],
794+
"max_parts": 5,
793795
"additional_data_tables": {},
794796
}
795797
},
@@ -818,6 +820,7 @@ def setUpClass(cls):
818820
"specific_pairs": [("age", "race")],
819821
"all_possible_pairs": False,
820822
"construction_rules": ["TableMode", "TableSelection"],
823+
"max_parts": 7,
821824
"informative_variables_only": True,
822825
"group_target_value": False,
823826
"keep_initial_categorical_variables": False,
@@ -860,6 +863,7 @@ def setUpClass(cls):
860863
"specific_pairs": [],
861864
"all_possible_pairs": False,
862865
"construction_rules": ["TableMode", "TableSelection"],
866+
"max_parts": 4,
863867
"group_target_value": False,
864868
"additional_data_tables": {"SpliceJunctionDNA"},
865869
}
@@ -889,6 +893,7 @@ def setUpClass(cls):
889893
"max_selected_variables": 1,
890894
"max_evaluated_variables": 3,
891895
"construction_rules": ["TableMode", "TableSelection"],
896+
"max_parts": 6,
892897
"additional_data_tables": {"SpliceJunctionDNA"},
893898
}
894899
},
@@ -918,6 +923,7 @@ def setUpClass(cls):
918923
"specific_pairs": [],
919924
"all_possible_pairs": False,
920925
"construction_rules": ["TableMode", "TableSelection"],
926+
"max_parts": 8,
921927
"informative_variables_only": True,
922928
"group_target_value": False,
923929
"keep_initial_categorical_variables": False,
@@ -1435,6 +1441,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe(self):
14351441
"specific_pairs": [("age", "race")],
14361442
"all_possible_pairs": False,
14371443
"construction_rules": ["TableMode", "TableSelection"],
1444+
"n_feature_parts": 3,
14381445
"group_target_value": False,
14391446
},
14401447
)
@@ -1458,6 +1465,7 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe_with_df_y(
14581465
"specific_pairs": [("age", "race")],
14591466
"all_possible_pairs": False,
14601467
"construction_rules": ["TableMode", "TableSelection"],
1468+
"n_feature_parts": 3,
14611469
"group_target_value": False,
14621470
},
14631471
)
@@ -1480,6 +1488,7 @@ def test_parameter_transfer_classifier_fit_from_multitable_dataframe(self):
14801488
"specific_pairs": [],
14811489
"all_possible_pairs": False,
14821490
"construction_rules": ["TableMode", "TableSelection"],
1491+
"n_feature_parts": 4,
14831492
"group_target_value": False,
14841493
},
14851494
)
@@ -1517,6 +1526,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe(self):
15171526
"specific_pairs": [("age", "race")],
15181527
"all_possible_pairs": False,
15191528
"construction_rules": ["TableMode", "TableSelection"],
1529+
"n_feature_parts": 7,
15201530
"informative_features_only": True,
15211531
"group_target_value": False,
15221532
"keep_initial_variables": False,
@@ -1543,6 +1553,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe_with_df_y(
15431553
"specific_pairs": [("age", "race")],
15441554
"all_possible_pairs": False,
15451555
"construction_rules": ["TableMode", "TableSelection"],
1556+
"n_feature_parts": 7,
15461557
"informative_features_only": True,
15471558
"group_target_value": False,
15481559
"keep_initial_variables": False,
@@ -1568,6 +1579,7 @@ def test_parameter_transfer_encoder_fit_from_multitable_dataframe(self):
15681579
"specific_pairs": [],
15691580
"all_possible_pairs": False,
15701581
"construction_rules": ["TableMode", "TableSelection"],
1582+
"n_feature_parts": 8,
15711583
"informative_features_only": True,
15721584
"group_target_value": False,
15731585
"keep_initial_variables": False,
@@ -1608,6 +1620,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self):
16081620
"n_text_features": 300000,
16091621
"type_text_features": "ngrams",
16101622
"construction_rules": ["TableMode", "TableSelection"],
1623+
"n_feature_parts": 5,
16111624
},
16121625
)
16131626

@@ -1626,6 +1639,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y(
16261639
"n_text_features": 300000,
16271640
"type_text_features": "ngrams",
16281641
"construction_rules": ["TableMode", "TableSelection"],
1642+
"n_feature_parts": 5,
16291643
},
16301644
)
16311645

@@ -1644,6 +1658,7 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self):
16441658
"n_selected_features": 1,
16451659
"n_evaluated_features": 3,
16461660
"construction_rules": ["TableMode", "TableSelection"],
1661+
"n_feature_parts": 6,
16471662
},
16481663
)
16491664

0 commit comments

Comments
 (0)