Skip to content

Commit 1929371

Browse files
committed
Add Khiops dictionary rule API support
- add a `Rule` class - add `Dictionary.set_{variable,variable_block}rule` and `Dictionary.get_{variable,variable_block}rule` methods - add an `upper_scope` `Variable` and `Rule` method which prepends a "." to the serialization of these objects
1 parent 2490905 commit 1929371

File tree

2 files changed

+499
-1
lines changed

2 files changed

+499
-1
lines changed

khiops/core/dictionary.py

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
1414
"""
1515
import io
16+
import math
1617
import os
1718
import re
1819
import warnings
@@ -80,6 +81,30 @@ def _quote_value(value):
8081
return quoted_value
8182

8283

84+
class _ScopedOperand:
85+
def __init__(self, operand):
86+
assert type(operand) in (Variable, Rule), type_error_message(
87+
"operand", operand, Variable, Rule
88+
)
89+
self.operand = operand
90+
91+
def write(self, writer):
92+
assert isinstance(writer, KhiopsOutputWriter), type_error_message(
93+
"writer", writer, KhiopsOutputWriter
94+
)
95+
writer.write(".")
96+
if isinstance(self.operand, Variable):
97+
writer.write(_format_name(self.operand.name))
98+
else:
99+
self.operand.write(writer)
100+
101+
def __repr__(self):
102+
stream = io.BytesIO()
103+
writer = KhiopsOutputWriter(stream)
104+
self.write(writer)
105+
return str(stream.getvalue(), encoding="utf8", errors="replace")
106+
107+
83108
class DictionaryDomain(KhiopsJSONObject):
84109
"""Main class containing the information of a Khiops dictionary file
85110
@@ -880,6 +905,131 @@ def remove_variable_block(
880905

881906
return removed_block
882907

908+
def set_variable_rule(self, variable_name, rule):
909+
"""Sets a rule on a specified variable in the dictionary
910+
911+
Parameters
912+
----------
913+
variable_name : str
914+
Name of the variable the rule is set on.
915+
rule : `Rule`
916+
The rule to be set on the variable whose name is ``variable_name``.
917+
918+
Raises
919+
------
920+
`TypeError`
921+
If ``rule`` is not of type `Rule`
922+
If ``variable_name`` is not of type `str`
923+
924+
`ValueError`
925+
If ``variable_name`` is the empty string
926+
927+
`KeyError`
928+
If no variable of name ``variable_name`` exists in the dictionary
929+
"""
930+
if not is_string_like(variable_name):
931+
raise TypeError(
932+
type_error_message("variable_name", variable_name, "string-like")
933+
)
934+
if not variable_name:
935+
raise ValueError("'variable_name' must not be empty")
936+
if not isinstance(rule, Rule):
937+
raise TypeError(type_error_message("rule", rule, Rule))
938+
self.get_variable(variable_name).rule = repr(rule)
939+
940+
def set_variable_block_rule(self, variable_block_name, rule):
941+
"""Sets a rule on a specified variable block in the dictionary
942+
943+
Parameters
944+
----------
945+
variable_block_name : str
946+
Name of the variable block the rule is set on.
947+
rule : `Rule`
948+
The rule to be set on the variable block whose name is
949+
``variable_block_name``.
950+
951+
Raises
952+
------
953+
`TypeError`
954+
If ``rule`` is not of type `Rule`
955+
If ``variable_block_name`` is not of type `str`
956+
957+
`ValueError`
958+
If ``variable_block_name`` is the empty string
959+
960+
`KeyError`
961+
If no variable block of name ``variable_block_name`` exists in the
962+
dictionary
963+
"""
964+
if not is_string_like(variable_block_name):
965+
raise TypeError(
966+
type_error_message(
967+
"variable_block_name", variable_block_name, "string-like"
968+
)
969+
)
970+
if not variable_block_name:
971+
raise ValueError("'variable_block_name' must not be empty")
972+
if not isinstance(rule, Rule):
973+
raise TypeError(type_error_message("rule", rule, Rule))
974+
self.get_variable_block(variable_block_name).rule = repr(rule)
975+
976+
def get_variable_rule(self, variable_name):
977+
"""Gets `Rule` from a specified variable
978+
979+
Parameters
980+
----------
981+
variable_name : str
982+
Name of the variable the rule is set on.
983+
984+
Raises
985+
------
986+
`TypeError`
987+
If ``variable_name`` is not of type `str`
988+
989+
`ValueError`
990+
If ``variable_name`` is the empty string
991+
992+
`KeyError`
993+
If no variable of name ``variable_name`` exists in the dictionary
994+
"""
995+
if not is_string_like(variable_name):
996+
raise TypeError(
997+
type_error_message("variable_name", variable_name, "string-like")
998+
)
999+
if not variable_name:
1000+
raise ValueError("'variable_name' must not be empty")
1001+
return Rule(name=self.get_variable(variable_name).rule)
1002+
1003+
def get_variable_block_rule(self, variable_block_name):
1004+
"""Gets `Rule` from a specified variable block
1005+
1006+
Parameters
1007+
----------
1008+
variable_block_name : str
1009+
Name of the variable block_the rule is set on.
1010+
1011+
Raises
1012+
------
1013+
`TypeError`
1014+
If ``variable_block_name`` is not of type `str`
1015+
1016+
`ValueError`
1017+
If ``variable_block_name`` is the empty string
1018+
1019+
`KeyError`
1020+
If no variable block of name ``variable_block_name`` exists in the
1021+
dictionary
1022+
"""
1023+
if not is_string_like(variable_block_name):
1024+
raise TypeError(
1025+
type_error_message(
1026+
"variable_block_name", variable_block_name, "string-like"
1027+
)
1028+
)
1029+
if not variable_block_name:
1030+
raise ValueError("'variable_block_name' must not be empty")
1031+
return Rule(name=self.get_variable_block(variable_block_name).rule)
1032+
8831033
def is_key_variable(self, variable):
8841034
"""Returns ``True`` if a variable belongs to this dictionary's key
8851035
@@ -989,8 +1139,10 @@ class Variable:
9891139
rule : str
9901140
Derivation rule or external table reference. Set to "" if there is no
9911141
rule associated to this variable. Examples:
1142+
9921143
- standard rule: "Sum(Var1, Var2)"
9931144
- reference rule: "[TableName]"
1145+
9941146
variable_block : `VariableBlock`
9951147
Block to which the variable belongs. Not set if the variable does not belong to
9961148
a block.
@@ -1078,6 +1230,10 @@ def __str__(self):
10781230
self.write(writer)
10791231
return str(stream.getvalue(), encoding="utf8", errors="replace")
10801232

1233+
def upper_scope(self):
1234+
"""Adds the '.' upper-scope prefix to the serialization of the operand."""
1235+
return _ScopedOperand(self)
1236+
10811237
def copy(self):
10821238
"""Copies this variable instance
10831239
@@ -1402,6 +1558,164 @@ def write(self, writer):
14021558
writer.writeln("")
14031559

14041560

1561+
class Rule:
1562+
"""A rule of a variable in a Khiops dictionary
1563+
1564+
Parameters
1565+
----------
1566+
name : str or bytes
1567+
Name or verbatim of the rule. It is intepreted as the verbatim
1568+
representation of an entire rule if and only if:
1569+
1570+
- it starts with an UpperCamelCase string, followed by a
1571+
parenthesized block (...)
1572+
- ``operands`` is empty
1573+
1574+
It is intepreted as a reference rule if and only if:
1575+
1576+
- the first condition above does *not* apply
1577+
- the second condition above applies
1578+
1579+
operands : tuple of operands
1580+
Each operand can have one of the following types:
1581+
1582+
- str
1583+
- bytes
1584+
- int
1585+
- float
1586+
- ``Variable``
1587+
- ``Rule``
1588+
1589+
If no operand is specified, then the rule is:
1590+
1591+
- a standard rule if ``name`` is the verbatim representation of an
1592+
entire rule
1593+
- a reference rule if ``name`` does not satisfy the condition above
1594+
1595+
Attributes
1596+
----------
1597+
name : str or bytes
1598+
Name of the rule.
1599+
operands : tuple of operands
1600+
Each operand has one of the following types:
1601+
1602+
- str
1603+
- bytes
1604+
- int
1605+
- float
1606+
- ``Variable``
1607+
- ``Rule``
1608+
"""
1609+
1610+
def __init__(self, name, *operands):
1611+
"""See class docstring"""
1612+
# Check input parameters
1613+
if not is_string_like(name):
1614+
raise TypeError(type_error_message("name", name, "string-like"))
1615+
for operand in operands:
1616+
if not is_string_like(operand) and not isinstance(
1617+
operand, (int, float, Variable, Rule, _ScopedOperand)
1618+
):
1619+
raise TypeError(
1620+
type_error_message(
1621+
f"Operand '{operand}'",
1622+
operand,
1623+
"string-like",
1624+
int,
1625+
float,
1626+
Variable,
1627+
Rule,
1628+
"upper-scoped Variable",
1629+
"upper-scoped Rule",
1630+
)
1631+
)
1632+
if not name:
1633+
raise ValueError("'name' must be a non-empty string")
1634+
1635+
# Initialize attributes
1636+
self.name = name
1637+
self.operands = operands
1638+
1639+
def __repr__(self):
1640+
stream = io.BytesIO()
1641+
writer = KhiopsOutputWriter(stream)
1642+
self.write(writer)
1643+
return str(stream.getvalue(), encoding="utf8", errors="replace")
1644+
1645+
def upper_scope(self):
1646+
"""Adds the '.' upper-scope prefix to the serialization of the operand."""
1647+
return _ScopedOperand(self)
1648+
1649+
def copy(self):
1650+
"""Copies this rule instance
1651+
1652+
Returns
1653+
-------
1654+
`Rule`
1655+
A copy of this instance
1656+
"""
1657+
return Rule(self.name, *self.operands)
1658+
1659+
def write(self, writer):
1660+
"""Writes the rule to a file writer in the ``.kdic`` format
1661+
1662+
Parameters
1663+
----------
1664+
writer : `.KhiopsOutputWriter`
1665+
Output writer.
1666+
"""
1667+
# Check the type of the writer
1668+
if not isinstance(writer, KhiopsOutputWriter):
1669+
raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter))
1670+
1671+
# Write standard rule
1672+
rule_pattern = r"^[A-Z]([a-zA-Z]*)\(.*\)"
1673+
rule_regex = re.compile(rule_pattern)
1674+
bytes_rule_regex = re.compile(bytes(rule_pattern, encoding="ascii"))
1675+
if self.operands:
1676+
if isinstance(self.name, str):
1677+
writer.write(f"{_format_name(self.name)}(")
1678+
else:
1679+
assert isinstance(self.name, bytes)
1680+
writer.write(f"{_format_name(self.name).decode('ascii')}(")
1681+
1682+
# Write operand, according to its type
1683+
# Variable operands have their name written only
1684+
for i, operand in enumerate(self.operands):
1685+
if isinstance(operand, (Rule, _ScopedOperand)):
1686+
operand.write(writer)
1687+
elif isinstance(operand, Variable):
1688+
writer.write(_format_name(operand.name))
1689+
elif isinstance(operand, str):
1690+
writer.write(f'"{operand}"')
1691+
elif isinstance(operand, bytes):
1692+
writer.write('"')
1693+
writer.write(operand)
1694+
writer.write('"')
1695+
elif isinstance(operand, float) and not math.isfinite(operand):
1696+
writer.write("#Missing")
1697+
# int or finite float cases
1698+
else:
1699+
writer.write(str(operand))
1700+
if i < len(self.operands) - 1:
1701+
writer.write(", ")
1702+
writer.write(")")
1703+
# Write verbatim-given rule
1704+
elif isinstance(self.name, str) and rule_regex.match(self.name):
1705+
writer.write(self.name)
1706+
elif isinstance(self.name, bytes) and bytes_rule_regex.match(self.name):
1707+
writer.write(self.name)
1708+
# Write rule as a reference rule
1709+
else:
1710+
if isinstance(self.name, str):
1711+
writer.write(f"[{self.name}]")
1712+
else:
1713+
assert isinstance(self.name, bytes)
1714+
writer.write("[")
1715+
writer.write(self.name)
1716+
writer.write("]")
1717+
1718+
14051719
class MetaData:
14061720
"""A metadata container for a dictionary, a variable or variable block
14071721

0 commit comments

Comments
 (0)