Skip to content

Commit f156330

Browse files
committed
Add Khiops dictionary rule API support
- add a `Rule` class - add `{Variable,VariableBlock}.{get,set}_rule` methods - add an `upper_scope` function which applies the upper-scope operand `.` to `Variable` and `Rule` instances and to upper-scoped instances of these.
1 parent c8fbe9e commit f156330

File tree

5 files changed

+596
-7
lines changed

5 files changed

+596
-7
lines changed

doc/samples/samples.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -655,17 +655,23 @@ Samples
655655
fold_index_variable.name = "FoldIndex"
656656
fold_index_variable.type = "Numerical"
657657
fold_index_variable.used = False
658-
fold_index_variable.rule = "Ceil(Product(" + str(fold_number) + ", Random()))"
659658
dictionary.add_variable(fold_index_variable)
660659
660+
# Create fold indexing rule and set it on `fold_index_variable`
661+
dictionary.get_variable(fold_index_variable.name).set_rule(
662+
kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random()"))),
663+
)
664+
661665
# Add variables that indicate if the instance is in the train dataset:
662666
for fold_index in range(1, fold_number + 1):
663667
is_in_train_dataset_variable = kh.Variable()
664668
is_in_train_dataset_variable.name = "IsInTrainDataset" + str(fold_index)
665669
is_in_train_dataset_variable.type = "Numerical"
666670
is_in_train_dataset_variable.used = False
667-
is_in_train_dataset_variable.rule = "NEQ(FoldIndex, " + str(fold_index) + ")"
668671
dictionary.add_variable(is_in_train_dataset_variable)
672+
dictionary.get_variable(is_in_train_dataset_variable.name).set_rule(
673+
kh.Rule("NEQ", fold_index_variable, fold_index),
674+
)
669675
670676
# Print dictionary with fold variables
671677
print("Dictionary file with fold variables")

khiops/core/dictionary.py

Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
1414
"""
1515
import io
16+
import math
1617
import os
1718
import re
1819
import warnings
@@ -989,8 +990,10 @@ class Variable:
989990
rule : str
990991
Derivation rule or external table reference. Set to "" if there is no
991992
rule associated to this variable. Examples:
993+
992994
- standard rule: "Sum(Var1, Var2)"
993995
- reference rule: "[TableName]"
996+
994997
variable_block : `VariableBlock`
995998
Block to which the variable belongs. Not set if the variable does not belong to
996999
a block.
@@ -1182,6 +1185,27 @@ def full_type(self):
11821185
full_type += f"({self.structure_type})"
11831186
return full_type
11841187

1188+
def get_rule(self):
1189+
"""Gets `Rule` from a specified variable"""
1190+
return Rule(name=self.rule)
1191+
1192+
def set_rule(self, rule):
1193+
"""Sets a rule on a specified variable in the dictionary
1194+
1195+
Parameters
1196+
----------
1197+
rule : `Rule`
1198+
The rule to be set on the variable.
1199+
1200+
Raises
1201+
------
1202+
`TypeError`
1203+
If ``rule`` is not of type `Rule`
1204+
"""
1205+
if not isinstance(rule, Rule):
1206+
raise TypeError(type_error_message("rule", rule, Rule))
1207+
self.rule = repr(rule)
1208+
11851209
def write(self, writer):
11861210
"""Writes the domain to a file writer in ``.kdic`` format
11871211
@@ -1357,6 +1381,27 @@ def get_value(self, key):
13571381
"""
13581382
return self.meta_data.get_value(key)
13591383

1384+
def get_rule(self):
1385+
"""Gets `Rule` from a specified variable block"""
1386+
return Rule(name=self.rule)
1387+
1388+
def set_rule(self, rule):
1389+
"""Sets a rule on a specified variable block in the dictionary
1390+
1391+
Parameters
1392+
----------
1393+
rule : `Rule`
1394+
The rule to be set on the variable block
1395+
1396+
Raises
1397+
------
1398+
`TypeError`
1399+
If ``rule`` is not of type `Rule`
1400+
"""
1401+
if not isinstance(rule, Rule):
1402+
raise TypeError(type_error_message("rule", rule, Rule))
1403+
self.rule = repr(rule)
1404+
13601405
def write(self, writer):
13611406
"""Writes the variable block to a file writer in ``.kdic`` format
13621407
@@ -1409,6 +1454,222 @@ def write(self, writer):
14091454
writer.writeln("")
14101455

14111456

1457+
class Rule:
1458+
"""A rule of a variable in a Khiops dictionary
1459+
1460+
Parameters
1461+
----------
1462+
name : str or bytes
1463+
Name or verbatim of the rule. It is intepreted as the verbatim
1464+
representation of an entire rule if and only if:
1465+
1466+
- it starts with an UpperCamelCase string, followed by a
1467+
parenthesized block (...)
1468+
- ``operands`` is empty
1469+
1470+
operands : tuple of operands
1471+
Each operand can have one of the following types:
1472+
1473+
- str
1474+
- bytes
1475+
- int
1476+
- float
1477+
- `Variable`
1478+
- `Rule`
1479+
- upper-scoped `Variable`
1480+
- upper-scoped `Rule`
1481+
1482+
is_reference : bool, default ``False``
1483+
If set to ``True``, then the rule is serialized as a reference rule:
1484+
``Rule(Operand1, Operand2, ...)`` is serialized as
1485+
``[Operand1, Operand2, ...]``.
1486+
1487+
Attributes
1488+
----------
1489+
name : str or bytes
1490+
Name of the rule.
1491+
operands : tuple of operands
1492+
Each operand has one of the following types:
1493+
1494+
- str
1495+
- bytes
1496+
- int
1497+
- float
1498+
- `Variable`
1499+
- `Rule`
1500+
- upper-scoped `Variable`
1501+
- upper-scoped `Rule`
1502+
1503+
is_reference : bool
1504+
The reference status of the rule.
1505+
1506+
.. note::
1507+
This attribute cannot be changed on a `Rule` instance.
1508+
"""
1509+
1510+
def __init__(self, name, *operands, is_reference=False):
1511+
"""See class docstring"""
1512+
# Check input parameters
1513+
if not is_string_like(name):
1514+
raise TypeError(type_error_message("name", name, "string-like"))
1515+
for operand in operands:
1516+
if not is_string_like(operand) and not isinstance(
1517+
operand, (int, float, Variable, Rule, _ScopedOperand)
1518+
):
1519+
raise TypeError(
1520+
type_error_message(
1521+
f"Operand '{operand}'",
1522+
operand,
1523+
"string-like",
1524+
int,
1525+
float,
1526+
Variable,
1527+
Rule,
1528+
"upper-scoped Variable",
1529+
"upper-scoped Rule",
1530+
)
1531+
)
1532+
if not isinstance(is_reference, bool):
1533+
raise TypeError(type_error_message("is_reference", is_reference, bool))
1534+
if not is_reference and not name:
1535+
raise ValueError("'name' must be a non-empty string")
1536+
1537+
# Initialize attributes
1538+
self.name = name
1539+
self.operands = operands
1540+
self._is_reference = is_reference
1541+
1542+
@property
1543+
def is_reference(self):
1544+
return self._is_reference
1545+
1546+
def __repr__(self):
1547+
stream = io.BytesIO()
1548+
writer = KhiopsOutputWriter(stream)
1549+
self.write(writer)
1550+
return str(stream.getvalue(), encoding="utf8", errors="replace")
1551+
1552+
def copy(self):
1553+
"""Copies this rule instance
1554+
1555+
Returns
1556+
-------
1557+
`Rule`
1558+
A copy of this instance
1559+
"""
1560+
return Rule(self.name, *self.operands)
1561+
1562+
def write(self, writer):
1563+
"""Writes the rule to a file writer in the ``.kdic`` format
1564+
1565+
Parameters
1566+
----------
1567+
writer : `.KhiopsOutputWriter`
1568+
Output writer.
1569+
1570+
.. note::
1571+
If ``self.is_reference`` is set, then ``self.name`` is not
1572+
included in the serialization.
1573+
"""
1574+
# Check the type of the writer
1575+
if not isinstance(writer, KhiopsOutputWriter):
1576+
raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter))
1577+
1578+
# Write standard rule
1579+
rule_pattern = r"^[A-Z]([a-zA-Z]*)\(?.*\)?$"
1580+
rule_regex = re.compile(rule_pattern)
1581+
bytes_rule_regex = re.compile(bytes(rule_pattern, encoding="ascii"))
1582+
if self.operands:
1583+
if self.is_reference:
1584+
writer.write("[")
1585+
else:
1586+
writer.write(_format_name(self.name))
1587+
writer.write("(")
1588+
1589+
# Write operand, according to its type
1590+
# Variable operands have their name written only
1591+
for i, operand in enumerate(self.operands):
1592+
if isinstance(operand, (Rule, _ScopedOperand)):
1593+
operand.write(writer)
1594+
elif isinstance(operand, Variable):
1595+
writer.write(_format_name(operand.name))
1596+
elif is_string_like(operand):
1597+
writer.write(_quote_value(operand))
1598+
elif isinstance(operand, float) and not math.isfinite(operand):
1599+
writer.write("#Missing")
1600+
# int or finite float cases
1601+
else:
1602+
writer.write(str(operand))
1603+
if i < len(self.operands) - 1:
1604+
writer.write(", ")
1605+
if self.is_reference:
1606+
writer.write("]")
1607+
else:
1608+
writer.write(")")
1609+
# Write verbatim-given rule
1610+
elif (
1611+
isinstance(self.name, str)
1612+
and rule_regex.match(self.name)
1613+
or isinstance(self.name, bytes)
1614+
and bytes_rule_regex.match(self.name)
1615+
):
1616+
writer.write(self.name)
1617+
1618+
1619+
class _ScopedOperand:
1620+
def __init__(self, operand):
1621+
assert type(operand) in (Variable, Rule, _ScopedOperand), type_error_message(
1622+
"operand", operand, Variable, Rule, "upper-scoped Variable or Rule"
1623+
)
1624+
self.operand = operand
1625+
1626+
def write(self, writer):
1627+
assert isinstance(writer, KhiopsOutputWriter), type_error_message(
1628+
"writer", writer, KhiopsOutputWriter
1629+
)
1630+
writer.write(".")
1631+
if isinstance(self.operand, Variable):
1632+
writer.write(_format_name(self.operand.name))
1633+
else:
1634+
self.operand.write(writer)
1635+
1636+
def __repr__(self):
1637+
stream = io.BytesIO()
1638+
writer = KhiopsOutputWriter(stream)
1639+
self.write(writer)
1640+
return str(stream.getvalue(), encoding="utf8", errors="replace")
1641+
1642+
1643+
def upper_scope(operand):
1644+
"""Applies the upper-scope operator ``.`` to an operand
1645+
1646+
Parameters
1647+
----------
1648+
operand : `Variable`, `Rule`, upper-scoped `Variable` or upper-scoped `Rule`
1649+
Operand that is upper-scoped.
1650+
1651+
Raises
1652+
------
1653+
`TypeError`
1654+
If the type of ``operand`` is not `Variable`, `Rule`, upper-scoped `Variable`
1655+
or upper-scoped `Rule`.
1656+
1657+
Returns
1658+
-------
1659+
upper-scoped operand
1660+
The upper-scoped operand, as if the upper-scope operator ``.`` were
1661+
applied to an operand in a rule in the ``.kdic`` dictionary language.
1662+
1663+
"""
1664+
if not isinstance(operand, (Variable, Rule, _ScopedOperand)):
1665+
raise TypeError(
1666+
type_error_message(
1667+
"operand", operand, Variable, Rule, "upper-scoped Variable or Rule"
1668+
)
1669+
)
1670+
return _ScopedOperand(operand)
1671+
1672+
14121673
class MetaData:
14131674
"""A metadata container for a dictionary, a variable or variable block
14141675

khiops/samples/samples.ipynb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -875,17 +875,23 @@
875875
"fold_index_variable.name = \"FoldIndex\"\n",
876876
"fold_index_variable.type = \"Numerical\"\n",
877877
"fold_index_variable.used = False\n",
878-
"fold_index_variable.rule = \"Ceil(Product(\" + str(fold_number) + \", Random()))\"\n",
879878
"dictionary.add_variable(fold_index_variable)\n",
880879
"\n",
880+
"# Create fold indexing rule and set it on `fold_index_variable`\n",
881+
"dictionary.get_variable(fold_index_variable.name).set_rule(\n",
882+
" kh.Rule(\"Ceil\", kh.Rule(\"Product\", fold_number, kh.Rule(\"Random()\"))),\n",
883+
")\n",
884+
"\n",
881885
"# Add variables that indicate if the instance is in the train dataset:\n",
882886
"for fold_index in range(1, fold_number + 1):\n",
883887
" is_in_train_dataset_variable = kh.Variable()\n",
884888
" is_in_train_dataset_variable.name = \"IsInTrainDataset\" + str(fold_index)\n",
885889
" is_in_train_dataset_variable.type = \"Numerical\"\n",
886890
" is_in_train_dataset_variable.used = False\n",
887-
" is_in_train_dataset_variable.rule = \"NEQ(FoldIndex, \" + str(fold_index) + \")\"\n",
888891
" dictionary.add_variable(is_in_train_dataset_variable)\n",
892+
" dictionary.get_variable(is_in_train_dataset_variable.name).set_rule(\n",
893+
" kh.Rule(\"NEQ\", fold_index_variable, fold_index),\n",
894+
" )\n",
889895
"\n",
890896
"# Print dictionary with fold variables\n",
891897
"print(\"Dictionary file with fold variables\")\n",

khiops/samples/samples.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,17 +728,23 @@ def train_predictor_with_cross_validation():
728728
fold_index_variable.name = "FoldIndex"
729729
fold_index_variable.type = "Numerical"
730730
fold_index_variable.used = False
731-
fold_index_variable.rule = "Ceil(Product(" + str(fold_number) + ", Random()))"
732731
dictionary.add_variable(fold_index_variable)
733732

733+
# Create fold indexing rule and set it on `fold_index_variable`
734+
dictionary.get_variable(fold_index_variable.name).set_rule(
735+
kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random()"))),
736+
)
737+
734738
# Add variables that indicate if the instance is in the train dataset:
735739
for fold_index in range(1, fold_number + 1):
736740
is_in_train_dataset_variable = kh.Variable()
737741
is_in_train_dataset_variable.name = "IsInTrainDataset" + str(fold_index)
738742
is_in_train_dataset_variable.type = "Numerical"
739743
is_in_train_dataset_variable.used = False
740-
is_in_train_dataset_variable.rule = "NEQ(FoldIndex, " + str(fold_index) + ")"
741744
dictionary.add_variable(is_in_train_dataset_variable)
745+
dictionary.get_variable(is_in_train_dataset_variable.name).set_rule(
746+
kh.Rule("NEQ", fold_index_variable, fold_index),
747+
)
742748

743749
# Print dictionary with fold variables
744750
print("Dictionary file with fold variables")

0 commit comments

Comments
 (0)