Skip to content

Commit cd65017

Browse files
committed
Add Khiops dictionary rule API support
- add a `Rule` class which supports: - standard rules - reference rules - rules provided as verbatims - add `{Variable,VariableBlock}.{get,set}_rule` methods - add an `upper_scope` function which applies the upper-scope operand `.` to `Variable` and `Rule` instances and to upper-scoped instances of these.
1 parent c8fbe9e commit cd65017

File tree

5 files changed

+667
-9
lines changed

5 files changed

+667
-9
lines changed

doc/samples/samples.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -655,17 +655,23 @@ Samples
655655
fold_index_variable.name = "FoldIndex"
656656
fold_index_variable.type = "Numerical"
657657
fold_index_variable.used = False
658-
fold_index_variable.rule = "Ceil(Product(" + str(fold_number) + ", Random()))"
659658
dictionary.add_variable(fold_index_variable)
660659
660+
# Create fold indexing rule and set it on `fold_index_variable`
661+
dictionary.get_variable(fold_index_variable.name).set_rule(
662+
kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random()"))),
663+
)
664+
661665
# Add variables that indicate if the instance is in the train dataset:
662666
for fold_index in range(1, fold_number + 1):
663667
is_in_train_dataset_variable = kh.Variable()
664668
is_in_train_dataset_variable.name = "IsInTrainDataset" + str(fold_index)
665669
is_in_train_dataset_variable.type = "Numerical"
666670
is_in_train_dataset_variable.used = False
667-
is_in_train_dataset_variable.rule = "NEQ(FoldIndex, " + str(fold_index) + ")"
668671
dictionary.add_variable(is_in_train_dataset_variable)
672+
dictionary.get_variable(is_in_train_dataset_variable.name).set_rule(
673+
kh.Rule("NEQ", fold_index_variable, fold_index),
674+
)
669675
670676
# Print dictionary with fold variables
671677
print("Dictionary file with fold variables")

khiops/core/dictionary.py

Lines changed: 307 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
1414
"""
1515
import io
16+
import math
1617
import os
1718
import re
1819
import warnings
@@ -989,8 +990,10 @@ class Variable:
989990
rule : str
990991
Derivation rule or external table reference. Set to "" if there is no
991992
rule associated to this variable. Examples:
993+
992994
- standard rule: "Sum(Var1, Var2)"
993995
- reference rule: "[TableName]"
996+
994997
variable_block : `VariableBlock`
995998
Block to which the variable belongs. Not set if the variable does not belong to
996999
a block.
@@ -1158,11 +1161,11 @@ def is_reference_rule(self):
11581161
"""
11591162
if self.rule:
11601163
if isinstance(self.rule, str):
1161-
if self.rule[0] == "[":
1164+
if self.rule.startswith("[") and self.rule.endswith("]"):
11621165
return True
11631166
else:
11641167
assert isinstance(self.rule, bytes)
1165-
if self.rule[0] == b"[":
1168+
if self.rule.startswith(b"[") and self.rule.endswith(b"]"):
11661169
return True
11671170
return False
11681171

@@ -1182,6 +1185,34 @@ def full_type(self):
11821185
full_type += f"({self.structure_type})"
11831186
return full_type
11841187

1188+
def get_rule(self):
1189+
"""Gets the rule of the variable
1190+
1191+
Returns
1192+
-------
1193+
`Rule`
1194+
A `Rule` instance created as a verbatim rule from the ``rule``
1195+
attribute of the variable.
1196+
"""
1197+
return Rule(verbatim=self.rule, is_reference=self.is_reference_rule())
1198+
1199+
def set_rule(self, rule):
1200+
"""Sets a rule on a specified variable in the dictionary
1201+
1202+
Parameters
1203+
----------
1204+
rule : `Rule`
1205+
The rule to be set on the variable.
1206+
1207+
Raises
1208+
------
1209+
`TypeError`
1210+
If ``rule`` is not of type `Rule`.
1211+
"""
1212+
if not isinstance(rule, Rule):
1213+
raise TypeError(type_error_message("rule", rule, Rule))
1214+
self.rule = repr(rule)
1215+
11851216
def write(self, writer):
11861217
"""Writes the domain to a file writer in ``.kdic`` format
11871218
@@ -1357,6 +1388,39 @@ def get_value(self, key):
13571388
"""
13581389
return self.meta_data.get_value(key)
13591390

1391+
def get_rule(self):
1392+
"""Gets the rule of the variable block
1393+
1394+
Returns
1395+
-------
1396+
`Rule`
1397+
A `Rule` instance created as a verbatim rule from the ``rule``
1398+
attribute of the variable block.
1399+
"""
1400+
return Rule(verbatim=self.rule)
1401+
1402+
def set_rule(self, rule):
1403+
"""Sets a rule on a specified variable block in the dictionary
1404+
1405+
Parameters
1406+
----------
1407+
rule : `Rule`
1408+
The rule to be set on the variable block.
1409+
1410+
Raises
1411+
------
1412+
`TypeError`
1413+
If ``rule`` is not of type `Rule`.
1414+
1415+
`ValueError`
1416+
If ``rule`` is a reference rule.
1417+
"""
1418+
if not isinstance(rule, Rule):
1419+
raise TypeError(type_error_message("rule", rule, Rule))
1420+
if rule.is_reference:
1421+
raise ValueError("Cannot set reference rule on a variable block")
1422+
self.rule = repr(rule)
1423+
13601424
def write(self, writer):
13611425
"""Writes the variable block to a file writer in ``.kdic`` format
13621426
@@ -1409,6 +1473,247 @@ def write(self, writer):
14091473
writer.writeln("")
14101474

14111475

1476+
class Rule:
1477+
"""A rule of a variable or variable block in a Khiops dictionary
1478+
1479+
Parameters
1480+
----------
1481+
name_and_operands : tuple
1482+
Each tuple member can have one of the following types:
1483+
1484+
- str
1485+
- bytes
1486+
- int
1487+
- float
1488+
- `Variable`
1489+
- `Rule`
1490+
- upper-scoped `Variable`
1491+
- upper-scoped `Rule`
1492+
1493+
The first element of the ``name_and_operands`` tuple is the name of the
1494+
rule and must be str or bytes and non-empty for a standard rule, i.e. if
1495+
``is_reference`` is not set.
1496+
verbatim : str or bytes, optional
1497+
Verbatim representation of an entire rule. If set, then ``names_and_operands``
1498+
must be empty.
1499+
is_reference : bool, default ``False``
1500+
If set to ``True``, then the rule is serialized as a reference rule:
1501+
``Rule(Operand1, Operand2, ...)`` is serialized as
1502+
``[Operand1, Operand2, ...]``.
1503+
1504+
Attributes
1505+
----------
1506+
name : str or bytes or ``None``
1507+
Name of the rule. It is ``None`` for reference rules.
1508+
operands : tuple of operands
1509+
Each operand has one of the following types:
1510+
1511+
- str
1512+
- bytes
1513+
- int
1514+
- float
1515+
- `Variable`
1516+
- `Rule`
1517+
- upper-scoped `Variable`
1518+
- upper-scoped `Rule`
1519+
1520+
is_reference : bool
1521+
The reference status of the rule.
1522+
1523+
.. note::
1524+
This attribute cannot be changed on a `Rule` instance.
1525+
"""
1526+
1527+
def __init__(self, *name_and_operands, verbatim=None, is_reference=False):
1528+
"""See class docstring"""
1529+
# Check input parameters and initialize rule fragments accordigly
1530+
if not isinstance(is_reference, bool):
1531+
raise TypeError(type_error_message("is_reference", is_reference, bool))
1532+
1533+
# Rule provided as name plus operands
1534+
if verbatim is None:
1535+
if not name_and_operands:
1536+
raise ValueError("A name must be provided to a standard rule")
1537+
if is_reference:
1538+
self.name = None
1539+
self.operands = name_and_operands
1540+
else:
1541+
name, *operands = name_and_operands
1542+
if not is_string_like(name):
1543+
raise TypeError(type_error_message("name", name, "string-like"))
1544+
if not name:
1545+
raise ValueError("'name' must be a non-empty string")
1546+
self.name = name
1547+
self.operands = operands
1548+
# Rule provided as verbatim
1549+
else:
1550+
if not is_string_like(verbatim):
1551+
raise TypeError(type_error_message("verbatim", verbatim, "string-like"))
1552+
if not verbatim:
1553+
raise ValueError("'verbatim' must be a non-empty string")
1554+
if name_and_operands:
1555+
raise ValueError(
1556+
"Rule name and operands must not be provided for verbatim rules"
1557+
)
1558+
self.name = None
1559+
self.operands = ()
1560+
1561+
# Check operand types
1562+
for operand in self.operands:
1563+
if not is_string_like(operand) and not isinstance(
1564+
operand, (int, float, Variable, Rule, _ScopedOperand)
1565+
):
1566+
raise TypeError(
1567+
type_error_message(
1568+
f"Operand '{operand}'",
1569+
operand,
1570+
"string-like",
1571+
int,
1572+
float,
1573+
Variable,
1574+
Rule,
1575+
"upper-scoped Variable",
1576+
"upper-scoped Rule",
1577+
)
1578+
)
1579+
1580+
# Initialize private attributes
1581+
self._verbatim = verbatim
1582+
self._is_reference = is_reference
1583+
1584+
@property
1585+
def is_reference(self):
1586+
return self._is_reference
1587+
1588+
def __repr__(self):
1589+
stream = io.BytesIO()
1590+
writer = KhiopsOutputWriter(stream)
1591+
self.write(writer)
1592+
return str(stream.getvalue(), encoding="utf8", errors="replace")
1593+
1594+
def copy(self):
1595+
"""Copies this rule instance
1596+
1597+
Returns
1598+
-------
1599+
`Rule`
1600+
A copy of this instance.
1601+
"""
1602+
return Rule(self.name, *self.operands)
1603+
1604+
def write(self, writer):
1605+
"""Writes the rule to a file writer in the ``.kdic`` format
1606+
1607+
Parameters
1608+
----------
1609+
writer : `.KhiopsOutputWriter`
1610+
Output writer.
1611+
1612+
.. note::
1613+
``self.name`` is not included in the serialization of reference rules.
1614+
"""
1615+
# Check the type of the writer
1616+
if not isinstance(writer, KhiopsOutputWriter):
1617+
raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter))
1618+
1619+
# Write standard rule
1620+
rule_pattern = r"^[A-Z]([a-zA-Z]*)\(?.*\)?$"
1621+
rule_regex = re.compile(rule_pattern)
1622+
bytes_rule_regex = re.compile(bytes(rule_pattern, encoding="ascii"))
1623+
if self.operands:
1624+
if self.is_reference:
1625+
writer.write("[")
1626+
else:
1627+
writer.write(_format_name(self.name))
1628+
writer.write("(")
1629+
1630+
# Write operand, according to its type
1631+
# Variable operands have their name written only
1632+
for i, operand in enumerate(self.operands):
1633+
if isinstance(operand, (Rule, _ScopedOperand)):
1634+
operand.write(writer)
1635+
elif isinstance(operand, Variable):
1636+
writer.write(_format_name(operand.name))
1637+
elif is_string_like(operand):
1638+
writer.write(_quote_value(operand))
1639+
elif isinstance(operand, float) and not math.isfinite(operand):
1640+
writer.write("#Missing")
1641+
# int or finite float cases
1642+
else:
1643+
writer.write(str(operand))
1644+
if i < len(self.operands) - 1:
1645+
writer.write(", ")
1646+
if self.is_reference:
1647+
writer.write("]")
1648+
else:
1649+
writer.write(")")
1650+
# Write no-operand rule
1651+
elif (
1652+
isinstance(self.name, str)
1653+
and rule_regex.match(self.name)
1654+
or isinstance(self.name, bytes)
1655+
and bytes_rule_regex.match(self.name)
1656+
):
1657+
writer.write(self.name)
1658+
# Write verbatim-given rule
1659+
elif self._verbatim:
1660+
writer.write(self._verbatim)
1661+
1662+
1663+
class _ScopedOperand:
1664+
def __init__(self, operand):
1665+
assert type(operand) in (Variable, Rule, _ScopedOperand), type_error_message(
1666+
"operand", operand, Variable, Rule, "upper-scoped Variable or Rule"
1667+
)
1668+
self.operand = operand
1669+
1670+
def write(self, writer):
1671+
assert isinstance(writer, KhiopsOutputWriter), type_error_message(
1672+
"writer", writer, KhiopsOutputWriter
1673+
)
1674+
writer.write(".")
1675+
if isinstance(self.operand, Variable):
1676+
writer.write(_format_name(self.operand.name))
1677+
else:
1678+
self.operand.write(writer)
1679+
1680+
def __repr__(self):
1681+
stream = io.BytesIO()
1682+
writer = KhiopsOutputWriter(stream)
1683+
self.write(writer)
1684+
return str(stream.getvalue(), encoding="utf8", errors="replace")
1685+
1686+
1687+
def upper_scope(operand):
1688+
"""Applies the upper-scope operator ``.`` to an operand
1689+
1690+
Parameters
1691+
----------
1692+
operand : `Variable`, `Rule`, upper-scoped `Variable` or upper-scoped `Rule`
1693+
Operand that is upper-scoped.
1694+
1695+
Raises
1696+
------
1697+
`TypeError`
1698+
If the type of ``operand`` is not `Variable`, `Rule`, upper-scoped `Variable`
1699+
or upper-scoped `Rule`.
1700+
1701+
Returns
1702+
-------
1703+
upper-scoped operand
1704+
The upper-scoped operand, as if the upper-scope operator ``.`` were
1705+
applied to an operand in a rule in the ``.kdic`` dictionary language.
1706+
1707+
"""
1708+
if not isinstance(operand, (Variable, Rule, _ScopedOperand)):
1709+
raise TypeError(
1710+
type_error_message(
1711+
"operand", operand, Variable, Rule, "upper-scoped Variable or Rule"
1712+
)
1713+
)
1714+
return _ScopedOperand(operand)
1715+
1716+
14121717
class MetaData:
14131718
"""A metadata container for a dictionary, a variable or variable block
14141719

0 commit comments

Comments
 (0)