Skip to content

Commit 73318ef

Browse files
Merge pull request #311 from KhiopsML/243-improve-specific_pairs-documentation
2 parents 7918d5e + d52a294 commit 73318ef

3 files changed

Lines changed: 47 additions & 36 deletions

File tree

doc/conf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,11 @@
9292
"color-admonition-title-background--note": "#CC6100",
9393
"font-stack": "Helvetica Neue, Helvetica, sans-serif",
9494
},
95-
"source_repository": "https://github.com/khiopsml/khiops/",
9695
# Sets the Github Icon (the SVG is embedded, copied from furo's repo)
9796
"footer_icons": [
9897
{
9998
"name": "GitHub",
100-
"url": "https://github.com/khiopsml/khiops",
99+
"url": "https://github.com/khiopsml/khiops-python",
101100
"html": """
102101
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
103102
<path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path>

khiops/core/api.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -748,12 +748,14 @@ def train_predictor(
748748
max_pairs : int, default 0
749749
Maximum number of variables pairs to construct.
750750
specific_pairs : list of tuple, optional
751-
User-specified pairs as a list of 2-tuples of variable names. If a given tuple
752-
contains only one non-empty variable name, then it generates all the pairs
753-
containing it (within the limit ``max_pairs``).
751+
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
752+
contains only one non-empty feature name, then it generates all the pairs
753+
containing it (within the maximum limit ``max_pairs``). These pairs have top
754+
priority: they are constructed first.
754755
all_possible_pairs : bool, default ``True``
755756
If ``True`` tries to create all possible pairs within the limit ``max_pairs``.
756-
The pairs and variables given in ``specific_pairs`` have priority.
757+
Pairs specified with ``specific_pairs`` have top priority: they are constructed
758+
first.
757759
only_pairs_with : str, default ""
758760
Constructs only pairs with the specifed variable name. If equal to the empty
759761
string "" it considers all variables to make pairs.
@@ -1072,12 +1074,14 @@ def train_recoder(
10721074
max_pairs : int, default 0
10731075
Maximum number of variables pairs to construct.
10741076
specific_pairs : list of tuple, optional
1075-
User-specified pairs as a list of 2-tuples of variable names. If a given tuple
1076-
contains only one non-empty variable name, then it generates all the pairs
1077-
containing it (within the limit ``max_pairs``).
1077+
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
1078+
contains only one non-empty feature name, then it generates all the pairs
1079+
containing it (within the maximum limit ``max_pairs``). These pairs have top
1080+
priority: they are constructed first.
10781081
all_possible_pairs : bool, default ``True``
10791082
If ``True`` tries to create all possible pairs within the limit ``max_pairs``.
1080-
The pairs and variables given in ``specific_pairs`` have priority.
1083+
Pairs specified with ``specific_pairs`` have top priority: they are constructed
1084+
first.
10811085
only_pairs_with : str, default ""
10821086
Constructs only pairs with the specifed variable name. If equal to the empty
10831087
string "" it considers all variables to make pairs.

khiops/sklearn/estimators.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1926,11 +1926,11 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
19261926
*Multi-table only* : Maximum number of multi-table aggregate features to
19271927
construct. See :doc:`/multi_table_primer` for more details.
19281928
n_pairs : int, default 0
1929-
Maximum number of pair features to construct. These features represent a 2D grid
1930-
partition of the domain of a pair of features in which is optimized in a way
1931-
that the cells are the purest possible with respect to the target. Only pairs
1932-
which jointly are more informative that its univariate components may be taken
1933-
into account in the classifier.
1929+
Maximum number of pair features to construct. These features are 2D grid
1930+
partitions of univariate feature pairs. The grid is optimized such that in each
1931+
cell the target distribution is well approximated by a constant histogram. Only
1932+
pairs that are jointly more informative than their marginals may be taken into
1933+
account in the classifier.
19341934
n_trees : int, default 10
19351935
Maximum number of decision tree features to construct. The constructed trees
19361936
combine other features, either native or constructed. These features usually
@@ -1945,13 +1945,15 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
19451945
specific_pairs : list of tuple, optional
19461946
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
19471947
contains only one non-empty feature name, then it generates all the pairs
1948-
containing it (within the maximum limit n_pairs).
1949-
all_possible_pairs : bool, default True
1950-
If True tries to create all possible pairs within the limit max_pairs.
1951-
The pairs and features given in specific_pairs have priority.
1948+
containing it (within the maximum limit ``n_pairs``). These pairs have top
1949+
priority: they are constructed first.
1950+
all_possible_pairs : bool, default ``True``
1951+
If ``True`` tries to create all possible pairs within the limit ``n_pairs``.
1952+
Pairs specified with ``specific_pairs`` have top priority: they are constructed
1953+
first.
19521954
construction_rules : list of str, optional
19531955
Allowed rules for the automatic feature construction. If not set, it uses all
1954-
possible rules.
1956+
possible rules.
19551957
group_target_value : bool, default ``False``
19561958
Allows grouping of the target values in classification. It can substantially
19571959
increase the training time.
@@ -2405,11 +2407,11 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
24052407
*Multi-table only* : Maximum number of multi-table aggregate features to
24062408
construct. See :doc:`/multi_table_primer` for more details.
24072409
n_pairs : int, default 0
2408-
Maximum number of pair features to construct. These features represent a 2D grid
2409-
partition of the domain of a pair of features in which is optimized in a way
2410-
that the cells are the purest possible with respect to the target. Only pairs
2411-
which jointly are more informative that its univariate components may be taken
2412-
into account in the regressor.
2410+
Maximum number of pair features to construct. These features are 2D grid
2411+
partitions of univariate feature pairs. The grid is optimized such that in each
2412+
cell the target distribution is well approximated by a constant histogram. Only
2413+
pairs that are jointly more informative than their marginals may be taken into
2414+
account in the regressor.
24132415
n_selected_features : int, default 0
24142416
Maximum number of features to be selected in the SNB predictor. If equal to
24152417
0 it selects all the features kept in the training.
@@ -2419,10 +2421,12 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
24192421
specific_pairs : list of tuple, optional
24202422
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
24212423
contains only one non-empty feature name, then it generates all the pairs
2422-
containing it (within the maximum limit n_pairs).
2423-
all_possible_pairs : bool, default True
2424-
If True tries to create all possible pairs within the limit max_pairs.
2425-
The pairs and features given in specific_pairs have priority.
2424+
containing it (within the maximum limit ``n_pairs``). These pairs have top
2425+
priority: they are constructed first.
2426+
all_possible_pairs : bool, default ``True``
2427+
If ``True`` tries to create all possible pairs within the limit ``n_pairs``.
2428+
Pairs specified with ``specific_pairs`` have top priority: they are constructed
2429+
first.
24262430
construction_rules : list of str, optional
24272431
Allowed rules for the automatic feature construction. If not set, it uses all
24282432
possible rules.
@@ -2678,20 +2682,24 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
26782682
*Multi-table only* : Maximum number of multi-table aggregate features to
26792683
construct. See :doc:`/multi_table_primer` for more details.
26802684
n_pairs : int, default 0
2681-
Maximum number of pair features to construct. These features represent a 2D grid
2682-
partition of the domain of a pair of features in which is optimized in a way
2683-
that the cells are the purest possible with respect to the target.
2685+
Maximum number of pair features to construct. These features are 2D grid
2686+
partitions of univariate feature pairs. The grid is optimized such that in each
2687+
cell the target distribution is well approximated by a constant histogram. Only
2688+
pairs that are jointly more informative than their marginals may be taken into
2689+
account in the encoder.
26842690
n_trees : int, default 10
26852691
Maximum number of decision tree features to construct. The constructed trees
26862692
combine other features, either native or constructed. These features usually
26872693
improve a predictor's performance at the cost of interpretability of the model.
26882694
specific_pairs : list of tuple, optional
26892695
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
26902696
contains only one non-empty feature name, then it generates all the pairs
2691-
containing it (within the maximum limit n_pairs).
2692-
all_possible_pairs : bool, default True
2693-
If True tries to create all possible pairs within the limit max_pairs.
2694-
The pairs and features given in specific_pairs have priority.
2697+
containing it (within the maximum limit ``n_pairs``). These pairs have top
2698+
priority: they are constructed first.
2699+
all_possible_pairs : bool, default ``True``
2700+
If ``True`` tries to create all possible pairs within the limit ``n_pairs``.
2701+
Pairs specified with ``specific_pairs`` have top priority: they are constructed
2702+
first.
26952703
construction_rules : list of str, optional
26962704
Allowed rules for the automatic feature construction. If not set, it uses all
26972705
possible rules.

0 commit comments

Comments
 (0)