Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
### Added
- (`core`) Dictionary API support for dictionary, variable and variable block
comments, and dictionary and variable block internal comments.
- (`core`) Dictionary `Rule` class and supporting API for adding and getting
rules to / from variables and variable blocks.
- (`core`) Dictionary `Rule` class and supporting API for serializing `Rule` instances.
- (`core`) New way to add a variable to a dictionary using a complete specification.
- (`sklearn`) `Text` Khiops type support at the estimator level.

Expand Down
3 changes: 0 additions & 3 deletions doc/notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ scenario_prologue: str, default ""
force_ansi_scenario : bool, default ``False``
*Advanced* If True the internal scenario generated by Khiops will force characters such as
accentuated ones to be decoded with the UTF8->ANSI khiops transformation.
batch_mode : bool, default ``True``
*Deprecated* Will be removed in Khiops 11. If ``True`` activates batch mode (command line option
``-b`` of the desktop app).

.. _core-api-input-types:

Expand Down
8 changes: 4 additions & 4 deletions doc/samples/samples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ Samples
third_dictionary.add_variable_from_spec(
name="computed",
type="Numerical",
rule=kh.Rule("Ceil", kh.Rule("Product", 3, kh.Rule("Random()"))),
rule=str(kh.Rule("Ceil", kh.Rule("Product", 3, kh.Rule("Random")))),
Comment thread
tramora marked this conversation as resolved.
)

# Add the variables used in a multi-table context in the first dictionary.
Expand Down Expand Up @@ -649,15 +649,15 @@ Samples

# Create fold indexing rule and set it on `fold_index_variable`
fold_index_variable = dictionary.get_variable("FoldIndex")
fold_index_variable.set_rule(
kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random()"))),
fold_index_variable.rule = str(
kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random"))),
Comment thread
tramora marked this conversation as resolved.
)

# Add variables that indicate if the instance is in the train dataset:
for fold_index in range(1, fold_number + 1):
name = "IsInTrainDataset" + str(fold_index)
dictionary.add_variable_from_spec(name=name, type="Numerical", used=False)
dictionary.get_variable(name).set_rule(
dictionary.get_variable(name).rule = str(
kh.Rule("NEQ", fold_index_variable, fold_index),
)

Expand Down
199 changes: 122 additions & 77 deletions khiops/core/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ def _quote_value(value):


def _check_name(name):
"""Ensures the variable name is consistent
with the Khiops core name constraints
"""Ensures the variable name is consistent with the Khiops core name constraints

Plain string or bytes are both accepted as input. The Khiops core forbids a name:

Plain string or bytes are both accepted as input.
The Khiops core forbids a name
- with a length outside the [1,128] interval
- containing a simple (Unix) carriage-return (\n)
- with leading and trailing spaces.
This function must check at least these constraints.

This function must check these constraints.

Parameters
----------
Expand Down Expand Up @@ -857,8 +857,8 @@ def add_variable_from_spec(
Object type. Ignored if variable type not in ["Entity", "Table"].
structure_type : str, optional
Structure type. Ignored if variable type is not "Structure".
rule : `Rule`, optional
Variable rule.
rule : str, optional
String representation of a variable rule.
meta_data : dict, optional
A Python dictionary which holds the metadata specification.
The dictionary keys are str. The values can be str, bool, float or int.
Expand Down Expand Up @@ -906,8 +906,8 @@ def add_variable_from_spec(
type_error_message("structure_type", structure_type, "string-like")
)
if rule is not None:
if not isinstance(rule, Rule):
raise TypeError(type_error_message("rule", rule, Rule))
if not isinstance(rule, str):
raise TypeError(type_error_message("rule", rule, str))

# Variable initialization
variable = Variable()
Expand All @@ -923,7 +923,7 @@ def add_variable_from_spec(
if structure_type is not None:
variable.structure_type = structure_type
if rule is not None:
variable.set_rule(rule)
variable.rule = str(rule)
self.add_variable(variable)

def remove_variable(self, variable_name):
Expand Down Expand Up @@ -1363,34 +1363,6 @@ def full_type(self):
full_type += f"({self.structure_type})"
return full_type

def get_rule(self):
"""Gets the rule of the variable

Returns
-------
`Rule`
A `Rule` instance created as a verbatim rule from the ``rule``
attribute of the variable.
"""
return Rule(verbatim=self.rule, is_reference=self.is_reference_rule())

def set_rule(self, rule):
"""Sets a rule on a specified variable in the dictionary

Parameters
----------
rule : `Rule`
The rule to be set on the variable.

Raises
------
`TypeError`
If ``rule`` is not of type `Rule`.
"""
if not isinstance(rule, Rule):
raise TypeError(type_error_message("rule", rule, Rule))
self.rule = repr(rule)

def write(self, writer):
"""Writes the domain to a file writer in ``.kdic`` format

Expand Down Expand Up @@ -1576,39 +1548,6 @@ def get_value(self, key):
"""
return self.meta_data.get_value(key)

def get_rule(self):
"""Gets the rule of the variable block

Returns
-------
`Rule`
A `Rule` instance created as a verbatim rule from the ``rule``
attribute of the variable block.
"""
return Rule(verbatim=self.rule)

def set_rule(self, rule):
"""Sets a rule on a specified variable block in the dictionary

Parameters
----------
rule : `Rule`
The rule to be set on the variable block.

Raises
------
`TypeError`
If ``rule`` is not of type `Rule`.

`ValueError`
If ``rule`` is a reference rule.
"""
if not isinstance(rule, Rule):
raise TypeError(type_error_message("rule", rule, Rule))
if rule.is_reference:
raise ValueError("Cannot set reference rule on a variable block")
self.rule = repr(rule)

def write(self, writer):
"""Writes the variable block to a file writer in ``.kdic`` format

Expand Down Expand Up @@ -1664,6 +1603,17 @@ def write(self, writer):
class Rule:
"""A rule of a variable or variable block in a Khiops dictionary

This object is a convenience feature which eases rule creation and
serialization, especially in complex cases (rule operands which are
variables or rules themselves, sometimes upper-scoped). A `Rule` instance
must be converted to `str` before setting it in a `Variable` or
`VariableBlock` instance.

`Rule` instances can be created either from full operand specifications, or
from verbatim rules. The latter is useful when the rule is retrieved from an
existing variable or variable block and is used as an operand in another
rule.

Parameters
----------
name_and_operands : tuple
Expand Down Expand Up @@ -1710,6 +1660,91 @@ class Rule:

.. note::
This attribute cannot be changed on a `Rule` instance.

Examples
--------
- basic rule, with variables as operands:
- verbatim:
.. code-block::

Product(PetalLength, PetalWidth)

- object construction:
.. highlight:: python
.. code-block:: python

petal_length_var = kh.Variable()
petal_length_var.name = "PetalLength"
petal_length_var.type = "Numerical"
petal_width_var = kh.Variable()
petal_width_var.name = "PetalWidth"
petal_width_var.type = "Numerical"
rule = kh.Rule("Product", petal_length_var, petal_width_var)

- multi-table rule:
- verbatim:
.. code-block::

TableCount(
TableSelection(
Vehicles,
EQ(PassengerNumber, 1)
)
)

- object construction:
.. highlight:: python
.. code-block:: python

vehicles_var = accidents_dictionary.get_variable("Vehicles")
passenger_number_var = vehicles_dictionary.get_variable(
"PassengerNumber"
)
rule = kh.Rule(
"TableCount",
kh.Rule(
"TableSelection",
vehicles_var,
kh.Rule("EQ", passenger_number_var, 1)
)
)

- multi-table rule with upper-scoped operands (advanced usage):
- verbatim:
.. code-block::

TableSelection(
Vehicles,
EQ(
PassengerNumber,
.TableMax(Vehicles, PassengerNumber)
)
)

- object construction:
.. highlight:: python
.. code-block:: python

vehicles_var = accidents_dictionary.get_variable("Vehicles")
passenger_number_var = vehicles_dictionary.get_variable(
"PassengerNumber"
)
rule = kh.Rule(
"TableSelection",
vehicles_var,
kh.Rule(
"EQ",
passenger_number_var,
kh.upper_scope(
kh.Rule(
"TableMax",
vehicle_var,
passenger_number_var
)
)
)
)

"""

def __init__(self, *name_and_operands, verbatim=None, is_reference=False):
Expand Down Expand Up @@ -1792,6 +1827,13 @@ def copy(self):
def write(self, writer):
"""Writes the rule to a file writer in the ``.kdic`` format

This method ensures proper `Rule` serialization, automatically handling:

- back-quote recoding in variable names
- double-quote recoding in categorical constants
- missing data (``inf``, ``-inf``, ``NaN``) serialization as ``#Missing``
- upper-scope operator serialization as ``.``

Parameters
----------
writer : `.KhiopsOutputWriter`
Expand All @@ -1805,9 +1847,9 @@ def write(self, writer):
raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter))

# Write standard rule
rule_pattern = r"^[A-Z]([a-zA-Z]*)\(?.*\)?$"
rule_regex = re.compile(rule_pattern)
bytes_rule_regex = re.compile(bytes(rule_pattern, encoding="ascii"))
rule_name_pattern = r"^[A-Z]([a-zA-Z]*)$"
rule_name_regex = re.compile(rule_name_pattern)
bytes_rule_name_regex = re.compile(bytes(rule_name_pattern, encoding="ascii"))
if self.operands:
if self.is_reference:
writer.write("[")
Expand Down Expand Up @@ -1838,11 +1880,14 @@ def write(self, writer):
# Write no-operand rule
elif (
isinstance(self.name, str)
and rule_regex.match(self.name)
and rule_name_regex.match(self.name)
or isinstance(self.name, bytes)
and bytes_rule_regex.match(self.name)
and bytes_rule_name_regex.match(self.name)
):
writer.write(self.name)
writer.write(_format_name(self.name))

# Add parentheses automatically
Comment thread
tramora marked this conversation as resolved.
writer.write("()")
# Write verbatim-given rule
elif self._verbatim:
writer.write(self._verbatim)
Expand Down
8 changes: 4 additions & 4 deletions khiops/samples/samples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
"third_dictionary.add_variable_from_spec(\n",
" name=\"computed\",\n",
" type=\"Numerical\",\n",
" rule=kh.Rule(\"Ceil\", kh.Rule(\"Product\", 3, kh.Rule(\"Random()\"))),\n",
" rule=str(kh.Rule(\"Ceil\", kh.Rule(\"Product\", 3, kh.Rule(\"Random\")))),\n",
")\n",
"\n",
"# Add the variables used in a multi-table context in the first dictionary.\n",
Expand Down Expand Up @@ -869,15 +869,15 @@
"\n",
"# Create fold indexing rule and set it on `fold_index_variable`\n",
"fold_index_variable = dictionary.get_variable(\"FoldIndex\")\n",
"fold_index_variable.set_rule(\n",
" kh.Rule(\"Ceil\", kh.Rule(\"Product\", fold_number, kh.Rule(\"Random()\"))),\n",
"fold_index_variable.rule = str(\n",
" kh.Rule(\"Ceil\", kh.Rule(\"Product\", fold_number, kh.Rule(\"Random\"))),\n",
")\n",
"\n",
"# Add variables that indicate if the instance is in the train dataset:\n",
"for fold_index in range(1, fold_number + 1):\n",
" name = \"IsInTrainDataset\" + str(fold_index)\n",
" dictionary.add_variable_from_spec(name=name, type=\"Numerical\", used=False)\n",
" dictionary.get_variable(name).set_rule(\n",
" dictionary.get_variable(name).rule = str(\n",
" kh.Rule(\"NEQ\", fold_index_variable, fold_index),\n",
" )\n",
"\n",
Expand Down
8 changes: 4 additions & 4 deletions khiops/samples/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def create_dictionary_domain():
third_dictionary.add_variable_from_spec(
name="computed",
type="Numerical",
rule=kh.Rule("Ceil", kh.Rule("Product", 3, kh.Rule("Random()"))),
rule=str(kh.Rule("Ceil", kh.Rule("Product", 3, kh.Rule("Random")))),
)

# Add the variables used in a multi-table context in the first dictionary.
Expand Down Expand Up @@ -724,15 +724,15 @@ def train_predictor_with_cross_validation():

# Create fold indexing rule and set it on `fold_index_variable`
fold_index_variable = dictionary.get_variable("FoldIndex")
fold_index_variable.set_rule(
kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random()"))),
fold_index_variable.rule = str(
kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random"))),
)

# Add variables that indicate if the instance is in the train dataset:
for fold_index in range(1, fold_number + 1):
name = "IsInTrainDataset" + str(fold_index)
dictionary.add_variable_from_spec(name=name, type="Numerical", used=False)
dictionary.get_variable(name).set_rule(
dictionary.get_variable(name).rule = str(
kh.Rule("NEQ", fold_index_variable, fold_index),
)

Expand Down
Loading
Loading