@@ -749,6 +749,8 @@ def setUpClass(cls):
749749 "header_line" : True ,
750750 "max_pairs" : 1 ,
751751 "max_trees" : 5 ,
752+ "max_text_features" : 300000 ,
753+ "text_features" : "ngrams" ,
752754 "max_selected_variables" : 1 ,
753755 "max_evaluated_variables" : 3 ,
754756 "specific_pairs" : [("age" , "race" )],
@@ -777,6 +779,8 @@ def setUpClass(cls):
777779 "detect_format" : False ,
778780 "header_line" : True ,
779781 "max_trees" : 0 ,
782+ "max_text_features" : 300000 ,
783+ "text_features" : "ngrams" ,
780784 "max_selected_variables" : 1 ,
781785 "max_evaluated_variables" : 3 ,
782786 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -803,6 +807,8 @@ def setUpClass(cls):
803807 "header_line" : True ,
804808 "max_pairs" : 1 ,
805809 "max_trees" : 5 ,
810+ "max_text_features" : 300000 ,
811+ "text_features" : "ngrams" ,
806812 "specific_pairs" : [("age" , "race" )],
807813 "all_possible_pairs" : False ,
808814 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -841,6 +847,8 @@ def setUpClass(cls):
841847 "max_constructed_variables" : 10 ,
842848 "max_pairs" : 1 ,
843849 "max_trees" : 5 ,
850+ "max_text_features" : 300000 ,
851+ "text_features" : "ngrams" ,
844852 "max_selected_variables" : 1 ,
845853 "max_evaluated_variables" : 3 ,
846854 "specific_pairs" : [],
@@ -870,6 +878,8 @@ def setUpClass(cls):
870878 "header_line" : True ,
871879 "max_constructed_variables" : 10 ,
872880 "max_trees" : 0 ,
881+ "max_text_features" : 300000 ,
882+ "text_features" : "ngrams" ,
873883 "max_selected_variables" : 1 ,
874884 "max_evaluated_variables" : 3 ,
875885 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -897,6 +907,8 @@ def setUpClass(cls):
897907 "max_constructed_variables" : 10 ,
898908 "max_pairs" : 1 ,
899909 "max_trees" : 5 ,
910+ "max_text_features" : 300000 ,
911+ "text_features" : "ngrams" ,
900912 "specific_pairs" : [],
901913 "all_possible_pairs" : False ,
902914 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -1410,6 +1422,8 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe(self):
14101422 extra_estimator_kwargs = {
14111423 "n_pairs" : 1 ,
14121424 "n_trees" : 5 ,
1425+ "n_text_features" : 300000 ,
1426+ "type_text_features" : "ngrams" ,
14131427 "n_selected_features" : 1 ,
14141428 "n_evaluated_features" : 3 ,
14151429 "specific_pairs" : [("age" , "race" )],
@@ -1431,6 +1445,8 @@ def test_parameter_transfer_classifier_fit_from_monotable_dataframe_with_df_y(
14311445 extra_estimator_kwargs = {
14321446 "n_pairs" : 1 ,
14331447 "n_trees" : 5 ,
1448+ "n_text_features" : 300000 ,
1449+ "type_text_features" : "ngrams" ,
14341450 "n_selected_features" : 1 ,
14351451 "n_evaluated_features" : 3 ,
14361452 "specific_pairs" : [("age" , "race" )],
@@ -1451,6 +1467,8 @@ def test_parameter_transfer_classifier_fit_from_multitable_dataframe(self):
14511467 "n_features" : 10 ,
14521468 "n_pairs" : 1 ,
14531469 "n_trees" : 5 ,
1470+ "n_text_features" : 300000 ,
1471+ "type_text_features" : "ngrams" ,
14541472 "n_selected_features" : 1 ,
14551473 "n_evaluated_features" : 3 ,
14561474 "specific_pairs" : [],
@@ -1488,6 +1506,8 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe(self):
14881506 extra_estimator_kwargs = {
14891507 "n_pairs" : 1 ,
14901508 "n_trees" : 5 ,
1509+ "n_text_features" : 300000 ,
1510+ "type_text_features" : "ngrams" ,
14911511 "specific_pairs" : [("age" , "race" )],
14921512 "all_possible_pairs" : False ,
14931513 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -1512,6 +1532,8 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe_with_df_y(
15121532 extra_estimator_kwargs = {
15131533 "n_pairs" : 1 ,
15141534 "n_trees" : 5 ,
1535+ "n_text_features" : 300000 ,
1536+ "type_text_features" : "ngrams" ,
15151537 "specific_pairs" : [("age" , "race" )],
15161538 "all_possible_pairs" : False ,
15171539 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -1535,6 +1557,8 @@ def test_parameter_transfer_encoder_fit_from_multitable_dataframe(self):
15351557 "n_features" : 10 ,
15361558 "n_pairs" : 1 ,
15371559 "n_trees" : 5 ,
1560+ "n_text_features" : 300000 ,
1561+ "type_text_features" : "ngrams" ,
15381562 "specific_pairs" : [],
15391563 "all_possible_pairs" : False ,
15401564 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -1575,6 +1599,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self):
15751599 extra_estimator_kwargs = {
15761600 "n_selected_features" : 1 ,
15771601 "n_evaluated_features" : 3 ,
1602+ "n_text_features" : 300000 ,
1603+ "type_text_features" : "ngrams" ,
15781604 "construction_rules" : ["TableMode" , "TableSelection" ],
15791605 },
15801606 )
@@ -1591,6 +1617,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y(
15911617 extra_estimator_kwargs = {
15921618 "n_selected_features" : 1 ,
15931619 "n_evaluated_features" : 3 ,
1620+ "n_text_features" : 300000 ,
1621+ "type_text_features" : "ngrams" ,
15941622 "construction_rules" : ["TableMode" , "TableSelection" ],
15951623 },
15961624 )
@@ -1605,6 +1633,8 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self):
16051633 extra_estimator_kwargs = {
16061634 "n_features" : 10 ,
16071635 "n_trees" : 0 ,
1636+ "n_text_features" : 300000 ,
1637+ "type_text_features" : "ngrams" ,
16081638 "n_selected_features" : 1 ,
16091639 "n_evaluated_features" : 3 ,
16101640 "construction_rules" : ["TableMode" , "TableSelection" ],
@@ -1693,6 +1723,7 @@ def test_sklearn_check_estimator(self):
16931723 # Set the estimators to test
16941724 # Notes:
16951725 # - We use n_trees=0 so the tests execute faster
1726+ # - We use n_text_features=0 so the tests execute faster
16961727 # - We omit KhiopsCoclustering because he needs special inputs to work well
16971728 # and sklearn's check_estimator method does not accept them.
16981729 # - KhiopsEncoder:
@@ -1701,10 +1732,11 @@ def test_sklearn_check_estimator(self):
17011732 # - We set it with informative_features_only=False so it always have output
17021733 # columns (sklearn estimator checks expect non-empty encoders)
17031734 khiops_estimators = [
1704- KhiopsClassifier (n_trees = 0 ),
1705- KhiopsRegressor (n_trees = 0 ),
1735+ KhiopsClassifier (n_trees = 0 , n_text_features = 0 ),
1736+ KhiopsRegressor (n_trees = 0 , n_text_features = 0 ),
17061737 KhiopsEncoder (
17071738 n_trees = 0 ,
1739+ n_text_features = 0 ,
17081740 informative_features_only = False ,
17091741 transform_type_numerical = "0-1_normalization" ,
17101742 ),
0 commit comments