Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,23 @@
comments, and dictionary and variable block internal comments.
- (`core`) Dictionary `Rule` class and supporting API for serializing `Rule` instances.
- (`core`) New way to add a variable to a dictionary using a complete specification.
- (`core`) New API constants for rules used in automatic variable construction:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add the removed attributes in the Removed section.

@popescu-v popescu-v Oct 14, 2025

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, right. They had been added in 10.2.2.4.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

- `DEFAULT_CONSTRUCTION_RULES`: names of table and entity construction rules,
which are applied by default
- `CALENDRICAL_CONSTRUCTION_RULES`: names of date, time and timestamp rules.
- (`sklearn`) `Text` Khiops type support at the estimator level.

### Changed
- (`core`) Dictionary API (DictionaryDomain, Dictionary, MetaData),
when a requested key is not found in getters, return ``None`` instead
of raising a `KeyError` exception.

### Removed
- (`sklearn`) Remove the `n_features_evaluated_`, `feature_evaluated_names`,
`feature_evaluated_importances_`, `n_features_used_`, `feature_used_names_`
and `feature_used_importances_` Khiops classifier and regressor estimator
attributes.

### Fixed
- (General) Inconsistency between the `tools.download_datasets` function and the
current samples directory according to `core.api.get_samples_dir()`.
Expand Down
39 changes: 0 additions & 39 deletions doc/samples/samples_sklearn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,6 @@ Samples
# Train the classifier
khc.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khc.n_features_evaluated_}")
print(f"Features selected : {khc.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khc.feature_used_names_[:3]):
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
print("---")

# Predict the classes on the test dataset
y_test_pred = khc.predict(X_test)
print("Predicted classes (first 10):")
Expand Down Expand Up @@ -186,14 +178,6 @@ Samples
# Train the classifier
khc.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khc.n_features_evaluated_}")
print(f"Features selected : {khc.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khc.feature_used_names_[:3]):
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
print("---")

# Predict the classes on the test dataset
y_test_pred = khc.predict(X_test)
print("Predicted classes (first 10):")
Expand Down Expand Up @@ -307,14 +291,6 @@ Samples
khc = KhiopsClassifier(n_trees=0)
khc.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khc.n_features_evaluated_}")
print(f"Features selected : {khc.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khc.feature_used_names_[:3]):
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
print("---")

# Predict the class on the test dataset
y_test_pred = khc.predict(X_test)
print("Predicted classes (first 10):")
Expand Down Expand Up @@ -539,14 +515,6 @@ Samples
# Train the regressor
khr.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khr.n_features_evaluated_}")
print(f"Features selected : {khr.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khr.feature_used_names_[:3]):
print(f"{feature} - Importance: {khr.feature_used_importances_[i][2]}")
print("---")

# Predict the values on the test dataset
y_test_pred = khr.predict(X_test)
print("Predicted values for 'age' (first 10):")
Expand Down Expand Up @@ -667,13 +635,6 @@ Samples
khe = KhiopsEncoder(n_features=10)
khe.fit(X, y)

# Show the feature importance info
print(f"Features evaluated: {khe.n_features_evaluated_}")
print("Top 3 evaluated features")
for i, feature in enumerate(khe.feature_evaluated_names_[:3]):
print(f"{feature} - Level: {khe.feature_evaluated_importances_[i]}")
print("---")

# Transform the train dataset
print("Encoded feature names:")
print(khe.feature_names_out_)
Expand Down
58 changes: 40 additions & 18 deletions khiops/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,10 @@
from khiops.core.internals.runner import get_runner
from khiops.core.internals.task import get_task_registry

# List of all available construction rules in the Khiops tool
all_construction_rules = [
"Day",
"DecimalTime",
"DecimalWeekDay",
"DecimalYear",
"DecimalYearTS",
"GetDate",
"GetTime",
# Construction rules
DEFAULT_CONSTRUCTION_RULES = [

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am surprised that the default construct rules do not include the "calendrical" ones because legacy tools based on khiops seems to assume that.
Is this a change introduced with v11 ?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opening the Khiops 11.0.0-b.0 GUI, then "Parameters" > "Advanced predictor parameters" > "Variable construction parameters" shows which rules are enabled by default and which are not: we can see that multi-table rules (Entity and Table) are enabled by default, and that "calendrical" rules are not. In this PR we lift these to the Python API.

"GetValue",
"GetValueC",
"LocalTimestamp",
"TableCount",
"TableCountDistinct",
"TableMax",
Expand All @@ -53,9 +45,37 @@
"TableSelection",
"TableStdDev",
"TableSum",
]
"""List of construction rules that Khiops uses by default

.. note::
These are all the multi-table rules.
""" # pylint: disable=pointless-string-statement

CALENDRICAL_CONSTRUCTION_RULES = [
"Day",
"DecimalTime",
"DecimalWeekDay",
"DecimalYear",
"DecimalYearTS",
"GetDate",
"GetTime",
"LocalTimestamp",
"WeekDay",
"YearDay",
]
"""List of calendrical construction rules

These rules include: date, time and timestamp rules.

.. note::
These rules are not enabled by default. The user needs to explicitly
select each of them via the ``construction_rules`` parameter of the
relevant Core API functions.
""" # pylint: disable=pointless-string-statement

# List of all available construction rules in the Khiops tool
ALL_CONSTRUCTION_RULES = DEFAULT_CONSTRUCTION_RULES + CALENDRICAL_CONSTRUCTION_RULES

##########################
# Private module methods #
Expand Down Expand Up @@ -758,8 +778,9 @@ def train_predictor(
max_constructed_variables : int, default 1000
Maximum number of variables to construct.
construction_rules : list of str, optional
Allowed rules for the automatic variable construction. If not set it uses all
possible rules.
Allowed rules for the automatic variable construction. If not set, Khiops
uses the multi-table construction rules listed in
`DEFAULT_CONSTRUCTION_RULES`.
max_text_features : int, default 10000
Maximum number of text features to construct.
text_features : str, default "words"
Expand Down Expand Up @@ -1190,21 +1211,22 @@ def train_recoder(
max_constructed_variables : int, default 100
Maximum number of variables to construct.
construction_rules : list of str, optional
Allowed rules for the automatic variable construction. If not set it uses all
possible rules.
Allowed rules for the automatic variable construction. If not set, Khiops
uses the multi-table construction rules listed in
`DEFAULT_CONSTRUCTION_RULES`.
max_text_features : int, default 10000
Maximum number of text features to construct.
text_features : str, default "words"
Type of the text features. Can be either one of:

- "words": sequences of non-space characters
- "ngrams": sequences of bytes
- "tokens": user-defined
- "words": sequences of non-space characters
- "ngrams": sequences of bytes
- "tokens": user-defined

max_trees : int, default 10
Maximum number of trees to construct.
max_pairs : int, default 0
Maximum number of variables pairs to construct.
Maximum number of variable pairs to construct.
specific_pairs : list of tuple, optional
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
contains only one non-empty feature name, then it generates all the pairs
Expand Down
6 changes: 3 additions & 3 deletions khiops/core/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,7 @@ def get_value(self, key):

Returns
-------
`Metadata`
`MetaData`
Metadata value associated to the specified key. ``None`` is returned
if the metadata key is not found.
"""
Expand Down Expand Up @@ -1267,7 +1267,7 @@ def get_value(self, key):

Returns
-------
`Metadata`
`MetaData`
Metadata value associated to the specified key. ``None`` is returned
if the metadata key is not found.
"""
Expand Down Expand Up @@ -1527,7 +1527,7 @@ def get_value(self, key):

Returns
-------
`Metadata`
`MetaData`
Metadata value associated to the specified key. ``None`` is returned
if the metadata key is not found.
"""
Expand Down
2 changes: 1 addition & 1 deletion khiops/samples/samples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@
"metadata": {},
"source": [
"### `train_predictor_mt_with_specific_rules()`\n\n",
"Trains a multi-table predictor with specific construction rules\n\n It is the same as `.train_predictor_mt` but with the specification of the allowed\n variable construction rules. The list of available rules is found in the field\n ``kh.all_construction_rules``\n \n"
"Trains a multi-table predictor with specific construction rules\n\n It is the same as `.train_predictor_mt` but with the specification of the allowed\n variable construction rules. The list of available rules is found in the field\n ``kh.ALL_CONSTRUCTION_RULES``\n \n"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion khiops/samples/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def train_predictor_mt_with_specific_rules():

It is the same as `.train_predictor_mt` but with the specification of the allowed
variable construction rules. The list of available rules is found in the field
``kh.all_construction_rules``
``kh.ALL_CONSTRUCTION_RULES``
"""
# Imports
import os
Expand Down
39 changes: 0 additions & 39 deletions khiops/samples/samples_sklearn.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,6 @@
"# Train the classifier\n",
"khc.fit(X_train, y_train)\n",
"\n",
"# Show the feature importance info\n",
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
"print(f\"Features selected : {khc.n_features_used_}\")\n",
"print(\"Top 3 used features\")\n",
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
"print(\"---\")\n",
"\n",
"# Predict the classes on the test dataset\n",
"y_test_pred = khc.predict(X_test)\n",
"print(\"Predicted classes (first 10):\")\n",
Expand Down Expand Up @@ -198,14 +190,6 @@
"# Train the classifier\n",
"khc.fit(X_train, y_train)\n",
"\n",
"# Show the feature importance info\n",
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
"print(f\"Features selected : {khc.n_features_used_}\")\n",
"print(\"Top 3 used features\")\n",
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
"print(\"---\")\n",
"\n",
"# Predict the classes on the test dataset\n",
"y_test_pred = khc.predict(X_test)\n",
"print(\"Predicted classes (first 10):\")\n",
Expand Down Expand Up @@ -345,14 +329,6 @@
"khc = KhiopsClassifier(n_trees=0)\n",
"khc.fit(X_train, y_train)\n",
"\n",
"# Show the feature importance info\n",
"print(f\"Features evaluated: {khc.n_features_evaluated_}\")\n",
"print(f\"Features selected : {khc.n_features_used_}\")\n",
"print(\"Top 3 used features\")\n",
"for i, feature in enumerate(khc.feature_used_names_[:3]):\n",
" print(f\"{feature} - Importance: {khc.feature_used_importances_[i][2]}\")\n",
"print(\"---\")\n",
"\n",
"# Predict the class on the test dataset\n",
"y_test_pred = khc.predict(X_test)\n",
"print(\"Predicted classes (first 10):\")\n",
Expand Down Expand Up @@ -629,14 +605,6 @@
"# Train the regressor\n",
"khr.fit(X_train, y_train)\n",
"\n",
"# Show the feature importance info\n",
"print(f\"Features evaluated: {khr.n_features_evaluated_}\")\n",
"print(f\"Features selected : {khr.n_features_used_}\")\n",
"print(\"Top 3 used features\")\n",
"for i, feature in enumerate(khr.feature_used_names_[:3]):\n",
" print(f\"{feature} - Importance: {khr.feature_used_importances_[i][2]}\")\n",
"print(\"---\")\n",
"\n",
"# Predict the values on the test dataset\n",
"y_test_pred = khr.predict(X_test)\n",
"print(\"Predicted values for 'age' (first 10):\")\n",
Expand Down Expand Up @@ -796,13 +764,6 @@
"khe = KhiopsEncoder(n_features=10)\n",
"khe.fit(X, y)\n",
"\n",
"# Show the feature importance info\n",
"print(f\"Features evaluated: {khe.n_features_evaluated_}\")\n",
"print(\"Top 3 evaluated features\")\n",
"for i, feature in enumerate(khe.feature_evaluated_names_[:3]):\n",
" print(f\"{feature} - Level: {khe.feature_evaluated_importances_[i]}\")\n",
"print(\"---\")\n",
"\n",
"# Transform the train dataset\n",
"print(\"Encoded feature names:\")\n",
"print(khe.feature_names_out_)\n",
Expand Down
39 changes: 0 additions & 39 deletions khiops/samples/samples_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,6 @@ def khiops_classifier():
# Train the classifier
khc.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khc.n_features_evaluated_}")
print(f"Features selected : {khc.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khc.feature_used_names_[:3]):
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
print("---")

# Predict the classes on the test dataset
y_test_pred = khc.predict(X_test)
print("Predicted classes (first 10):")
Expand Down Expand Up @@ -181,14 +173,6 @@ def khiops_classifier_text():
# Train the classifier
khc.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khc.n_features_evaluated_}")
print(f"Features selected : {khc.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khc.feature_used_names_[:3]):
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
print("---")

# Predict the classes on the test dataset
y_test_pred = khc.predict(X_test)
print("Predicted classes (first 10):")
Expand Down Expand Up @@ -310,14 +294,6 @@ def khiops_classifier_multitable_snowflake():
khc = KhiopsClassifier(n_trees=0)
khc.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khc.n_features_evaluated_}")
print(f"Features selected : {khc.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khc.feature_used_names_[:3]):
print(f"{feature} - Importance: {khc.feature_used_importances_[i][2]}")
print("---")

# Predict the class on the test dataset
y_test_pred = khc.predict(X_test)
print("Predicted classes (first 10):")
Expand Down Expand Up @@ -550,14 +526,6 @@ def khiops_regressor():
# Train the regressor
khr.fit(X_train, y_train)

# Show the feature importance info
print(f"Features evaluated: {khr.n_features_evaluated_}")
print(f"Features selected : {khr.n_features_used_}")
print("Top 3 used features")
for i, feature in enumerate(khr.feature_used_names_[:3]):
print(f"{feature} - Importance: {khr.feature_used_importances_[i][2]}")
print("---")

# Predict the values on the test dataset
y_test_pred = khr.predict(X_test)
print("Predicted values for 'age' (first 10):")
Expand Down Expand Up @@ -696,13 +664,6 @@ def khiops_encoder_multitable_snowflake():
khe = KhiopsEncoder(n_features=10)
khe.fit(X, y)

# Show the feature importance info
print(f"Features evaluated: {khe.n_features_evaluated_}")
print("Top 3 evaluated features")
for i, feature in enumerate(khe.feature_evaluated_names_[:3]):
print(f"{feature} - Level: {khe.feature_evaluated_importances_[i]}")
print("---")

# Transform the train dataset
print("Encoded feature names:")
print(khe.feature_names_out_)
Expand Down
Loading
Loading