Skip to content

Commit cea06e6

Browse files
committed
Expose default construction rules and fix relevant documentation
- separate construction rules into: - rules applied by default (`DEFAULT_CONSTRUCTION_RULES`); - calendar-related rules (`CALENDRICAL_CONSTRUCTION_RULES`); - document the construction rules; - fix the `construction_rules` parameter documentation in the Core API; - fix the `n_features` parameter documentation the Sklearn estimator API.
1 parent adc9bbe commit cea06e6

4 files changed

Lines changed: 60 additions & 32 deletions

File tree

khiops/core/api.py

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,10 @@
3131
from khiops.core.internals.runner import get_runner
3232
from khiops.core.internals.task import get_task_registry
3333

34-
# List of all available construction rules in the Khiops tool
35-
all_construction_rules = [
36-
"Day",
37-
"DecimalTime",
38-
"DecimalWeekDay",
39-
"DecimalYear",
40-
"DecimalYearTS",
41-
"GetDate",
42-
"GetTime",
34+
# Construction rules
35+
DEFAULT_CONSTRUCTION_RULES = [
4336
"GetValue",
4437
"GetValueC",
45-
"LocalTimestamp",
4638
"TableCount",
4739
"TableCountDistinct",
4840
"TableMax",
@@ -53,9 +45,37 @@
5345
"TableSelection",
5446
"TableStdDev",
5547
"TableSum",
48+
]
49+
"""List of construction rules that Khiops uses by default
50+
51+
.. note::
52+
These are all the multi-table rules.
53+
""" # pylint: disable=pointless-string-statement
54+
55+
CALENDRICAL_CONSTRUCTION_RULES = [
56+
"Day",
57+
"DecimalTime",
58+
"DecimalWeekDay",
59+
"DecimalYear",
60+
"DecimalYearTS",
61+
"GetDate",
62+
"GetTime",
63+
"LocalTimestamp",
5664
"WeekDay",
5765
"YearDay",
5866
]
67+
"""List of calendrical construction rules
68+
69+
These rules include: date, time and timestamp rules.
70+
71+
.. note::
72+
These rules are not enabled by default. The user needs to explicitly
73+
select each of them via the ``construction_rules`` parameter of the
74+
relevant Core API functions.
75+
""" # pylint: disable=pointless-string-statement
76+
77+
# List of all available construction rules in the Khiops tool
78+
ALL_CONSTRUCTION_RULES = DEFAULT_CONSTRUCTION_RULES + CALENDRICAL_CONSTRUCTION_RULES
5979

6080
##########################
6181
# Private module methods #
@@ -758,8 +778,9 @@ def train_predictor(
758778
max_constructed_variables : int, default 1000
759779
Maximum number of variables to construct.
760780
construction_rules : list of str, optional
761-
Allowed rules for the automatic variable construction. If not set it uses all
762-
possible rules.
781+
Allowed rules for the automatic variable construction. If not set, Khiops
782+
uses the multi-table construction rules listed in
783+
`DEFAULT_CONSTRUCTION_RULES`.
763784
max_text_features : int, default 10000
764785
Maximum number of text features to construct.
765786
text_features : str, default "words"
@@ -1190,21 +1211,22 @@ def train_recoder(
11901211
max_constructed_variables : int, default 100
11911212
Maximum number of variables to construct.
11921213
construction_rules : list of str, optional
1193-
Allowed rules for the automatic variable construction. If not set it uses all
1194-
possible rules.
1214+
Allowed rules for the automatic variable construction. If not set, Khiops
1215+
uses the multi-table construction rules listed in
1216+
`DEFAULT_CONSTRUCTION_RULES`.
11951217
max_text_features : int, default 10000
11961218
Maximum number of text features to construct.
11971219
text_features : str, default "words"
11981220
Type of the text features. Can be either one of:
11991221
1200-
- "words": sequences of non-space characters
1201-
- "ngrams": sequences of bytes
1202-
- "tokens": user-defined
1222+
- "words": sequences of non-space characters
1223+
- "ngrams": sequences of bytes
1224+
- "tokens": user-defined
12031225
12041226
max_trees : int, default 10
12051227
Maximum number of trees to construct.
12061228
max_pairs : int, default 0
1207-
Maximum number of variables pairs to construct.
1229+
Maximum number of variable pairs to construct.
12081230
specific_pairs : list of tuple, optional
12091231
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
12101232
contains only one non-empty feature name, then it generates all the pairs

khiops/samples/samples.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@
503503
"metadata": {},
504504
"source": [
505505
"### `train_predictor_mt_with_specific_rules()`\n\n",
506-
"Trains a multi-table predictor with specific construction rules\n\n It is the same as `.train_predictor_mt` but with the specification of the allowed\n variable construction rules. The list of available rules is found in the field\n ``kh.all_construction_rules``\n \n"
506+
"Trains a multi-table predictor with specific construction rules\n\n It is the same as `.train_predictor_mt` but with the specification of the allowed\n variable construction rules. The list of available rules is found in the field\n ``kh.ALL_CONSTRUCTION_RULES``\n \n"
507507
]
508508
},
509509
{

khiops/samples/samples.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def train_predictor_mt_with_specific_rules():
432432
433433
It is the same as `.train_predictor_mt` but with the specification of the allowed
434434
variable construction rules. The list of available rules is found in the field
435-
``kh.all_construction_rules``
435+
``kh.ALL_CONSTRUCTION_RULES``
436436
"""
437437
# Imports
438438
import os

khiops/sklearn/estimators.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,8 +1733,9 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
17331733
Parameters
17341734
----------
17351735
n_features : int, default 100
1736-
*Multi-table only* : Maximum number of multi-table aggregate features to
1737-
construct. See :doc:`/multi_table_primer` for more details.
1736+
Maximum number of features to construct automatically. See
1737+
:doc:`/multi_table_primer` for more details on the multi-table-specific
1738+
features.
17381739
n_pairs : int, default 0
17391740
Maximum number of pair features to construct. These features are 2D grid
17401741
partitions of univariate feature pairs. The grid is optimized such that in each
@@ -1769,8 +1770,9 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
17691770
Pairs specified with ``specific_pairs`` have top priority: they are constructed
17701771
first.
17711772
construction_rules : list of str, optional
1772-
Allowed rules for the automatic feature construction. If not set, it uses all
1773-
possible rules.
1773+
Allowed rules for the automatic feature construction. If not set, Khiops
1774+
uses the multi-table construction rules listed in
1775+
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`
17741776
group_target_value : bool, default ``False``
17751777
Allows grouping of the target values in classification. It can substantially
17761778
increase the training time.
@@ -2181,17 +2183,19 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
21812183
Parameters
21822184
----------
21832185
n_features : int, default 100
2184-
*Multi-table only* : Maximum number of multi-table aggregate features to
2185-
construct. See :doc:`/multi_table_primer` for more details.
2186+
Maximum number of features to construct automatically. See
2187+
:doc:`/multi_table_primer` for more details on the multi-table-specific
2188+
features.
21862189
n_selected_features : int, default 0
21872190
Maximum number of features to be selected in the SNB predictor. If equal to
21882191
0 it selects all the features kept in the training.
21892192
n_evaluated_features : int, default 0
21902193
Maximum number of features to be evaluated in the SNB predictor training. If
21912194
equal to 0 it evaluates all informative features.
21922195
construction_rules : list of str, optional
2193-
Allowed rules for the automatic feature construction. If not set, it uses all
2194-
possible rules.
2196+
Allowed rules for the automatic feature construction. If not set, Khiops
2197+
uses the multi-table construction rules listed in
2198+
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
21952199
verbose : bool, default ``False``
21962200
If ``True`` it prints debug information and it does not erase temporary files
21972201
when fitting, predicting or transforming.
@@ -2403,8 +2407,9 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
24032407
categorical_target : bool, default ``True``
24042408
``True`` if the target column is categorical.
24052409
n_features : int, default 100
2406-
*Multi-table only* : Maximum number of multi-table aggregate features to
2407-
construct. See :doc:`/multi_table_primer` for more details.
2410+
Maximum number of features to construct automatically. See
2411+
:doc:`/multi_table_primer` for more details on the multi-table-specific
2412+
features.
24082413
n_pairs : int, default 0
24092414
Maximum number of pair features to construct. These features are 2D grid
24102415
partitions of univariate feature pairs. The grid is optimized such that in each
@@ -2432,8 +2437,9 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
24322437
Pairs specified with ``specific_pairs`` have top priority: they are constructed
24332438
first.
24342439
construction_rules : list of str, optional
2435-
Allowed rules for the automatic feature construction. If not set, it uses all
2436-
possible rules.
2440+
Allowed rules for the automatic feature construction. If not set, Khiops
2441+
uses the multi-table construction rules listed in
2442+
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
24372443
informative_features_only : bool, default ``True``
24382444
If ``True`` keeps only informative features.
24392445
group_target_value : bool, default ``False``

0 commit comments

Comments
 (0)