Skip to content

Commit 99cb53a

Browse files
committed
Add support for byte-formatted Rule
1 parent 0957174 commit 99cb53a

2 files changed

Lines changed: 82 additions & 15 deletions

File tree

khiops/core/dictionary.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,19 @@ def __init__(self, operand):
8888
)
8989
self.operand = operand
9090

91-
def __repr__(self):
92-
stream = io.BytesIO()
93-
writer = KhiopsOutputWriter(stream)
91+
def write(self, writer):
92+
assert isinstance(writer, KhiopsOutputWriter), type_error_message(
93+
"writer", writer, KhiopsOutputWriter
94+
)
9495
if isinstance(self.operand, Variable):
9596
writer.write(_format_name(self.operand.name))
9697
else:
9798
self.operand.write(writer)
99+
100+
def __repr__(self):
101+
stream = io.BytesIO()
102+
writer = KhiopsOutputWriter(stream)
103+
self.write(writer)
98104
return "." + str(stream.getvalue(), encoding="utf8", errors="replace")
99105

100106

@@ -1556,7 +1562,7 @@ class Rule:
15561562
15571563
Parameters
15581564
----------
1559-
name : str
1565+
name : str or bytes
15601566
Name or verbatim of the rule. It is intepreted as the verbatim
15611567
representation of an entire rule if and only if:
15621568
@@ -1573,6 +1579,7 @@ class Rule:
15731579
Each operand can have one of the following types:
15741580
15751581
- str
1582+
- bytes
15761583
- int
15771584
- float
15781585
- ``Variable``
@@ -1586,12 +1593,13 @@ class Rule:
15861593
15871594
Attributes
15881595
----------
1589-
name : str
1596+
name : str or bytes
15901597
Name of the rule.
15911598
operands : tuple of operands
15921599
Each operand has one of the following types:
15931600
15941601
- str
1602+
- bytes
15951603
- int
15961604
- float
15971605
- ``Variable``
@@ -1601,17 +1609,17 @@ class Rule:
16011609
def __init__(self, name, *operands):
16021610
"""See class docstring"""
16031611
# Check input parameters
1604-
if not isinstance(name, str):
1605-
raise TypeError(type_error_message("name", name, str))
1612+
if not is_string_like(name):
1613+
raise TypeError(type_error_message("name", name, "string-like"))
16061614
for operand in operands:
1607-
if not isinstance(
1608-
operand, (str, int, float, Variable, Rule, _ScopedOperand)
1615+
if not is_string_like(operand) and not isinstance(
1616+
operand, (int, float, Variable, Rule, _ScopedOperand)
16091617
):
16101618
raise TypeError(
16111619
type_error_message(
16121620
f"Operand '{operand}'",
16131621
operand,
1614-
str,
1622+
"string-like",
16151623
int,
16161624
float,
16171625
Variable,
@@ -1660,9 +1668,15 @@ def write(self, writer):
16601668
raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter))
16611669

16621670
# Write standard rule
1663-
rule_regex = re.compile(r"^[A-Z]([a-zA-Z]*)\(.*\)")
1671+
rule_pattern = r"^[A-Z]([a-zA-Z]*)\(.*\)"
1672+
rule_regex = re.compile(rule_pattern)
1673+
bytes_rule_regex = re.compile(bytes(rule_pattern, encoding="ascii"))
16641674
if self.operands:
1665-
writer.write(f"{_format_name(self.name)}(")
1675+
if isinstance(self.name, str):
1676+
writer.write(f"{_format_name(self.name)}(")
1677+
else:
1678+
assert isinstance(self.name, bytes)
1679+
writer.write(f"{_format_name(self.name).decode('ascii')}(")
16661680

16671681
# Write operand, according to its type
16681682
# Variable operands have their name written only
@@ -1673,6 +1687,8 @@ def write(self, writer):
16731687
writer.write(_format_name(operand.name))
16741688
elif isinstance(operand, str):
16751689
writer.write(f'"{operand}"')
1690+
elif isinstance(operand, bytes):
1691+
writer.write(f'"{operand.decode("ascii")}"')
16761692
elif isinstance(operand, float) and not math.isfinite(operand):
16771693
writer.write("#Missing")
16781694
# int, finite float or _ScopedOperand cases
@@ -1682,11 +1698,17 @@ def write(self, writer):
16821698
writer.write(", ")
16831699
writer.write(")")
16841700
# Write verbatim-given rule
1685-
elif rule_regex.match(self.name):
1701+
elif isinstance(self.name, str) and rule_regex.match(self.name):
16861702
writer.write(self.name)
1703+
elif isinstance(self.name, bytes) and bytes_rule_regex.match(self.name):
1704+
writer.write(self.name.decode(encoding="ascii"))
16871705
# Write rule as a reference rule
16881706
else:
1689-
writer.write(f"[{self.name}]")
1707+
if isinstance(self.name, str):
1708+
writer.write(f"[{self.name}]")
1709+
else:
1710+
assert isinstance(self.name, bytes)
1711+
writer.write(f"[{self.name.decode('ascii')}]")
16901712

16911713

16921714
class MetaData:

tests/test_core.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1975,25 +1975,36 @@ def test_dictionary_rule_construction(self):
19751975
"""Tests the Rule construction and serialization"""
19761976
rule_verbatims = [
19771977
"SomeRule()",
1978+
b"SomeRule()",
19781979
'SomeRule("some_operand", 2)',
1980+
b'SomeRule("some_operand", 2)',
19791981
'SomeRule("some_operand", 2, SomeVariable)',
19801982
'SomeRule("some_operand", 2, .SomeVariable)',
1983+
b'SomeRule("some_operand", 2, .SomeVariable)',
19811984
'SomeRule("some_operand", #Missing)',
1985+
b'SomeRule("some_operand", #Missing)',
19821986
'SomeRule("some_operand", 2, SomeEmbeddedRule("some_other_operand"))',
1987+
b'SomeRule("some_operand", 2, SomeEmbeddedRule("some_other_operand"))',
19831988
'SomeRule("some_operand", 2, .SomeEmbeddedRule("some_other_operand"))',
19841989
(
19851990
'SomeRule("some_operand", 2, SomeEmbeddedRule("some_other_operand", '
19861991
'SomeOtherRule("some_embedded_operand", #Missing, 3)))'
19871992
),
19881993
"[SomeReferenceRule]",
1994+
b"[SomeReferenceRule]",
19891995
]
19901996

19911997
rules = [
19921998
[kh.Rule("SomeRule()")],
1999+
[kh.Rule(b"SomeRule()")],
19932000
[
19942001
kh.Rule("SomeRule", "some_operand", 2),
19952002
kh.Rule('SomeRule("some_operand", 2)'),
19962003
],
2004+
[
2005+
kh.Rule(b"SomeRule", b"some_operand", 2),
2006+
kh.Rule(b'SomeRule("some_operand", 2)'),
2007+
],
19972008
[
19982009
kh.Rule('SomeRule("some_operand", 2, SomeVariable)'),
19992010
kh.Rule(
@@ -2015,12 +2026,28 @@ def test_dictionary_rule_construction(self):
20152026
).upper_scope(),
20162027
),
20172028
],
2029+
[
2030+
kh.Rule(
2031+
b"SomeRule",
2032+
b"some_operand",
2033+
2,
2034+
kh.Variable(
2035+
json_data={"name": b"SomeVariable", "type": b"Categorical"}
2036+
).upper_scope(),
2037+
),
2038+
],
20182039
[
20192040
kh.Rule("SomeRule", "some_operand", math.nan),
20202041
kh.Rule("SomeRule", "some_operand", float("inf")),
20212042
kh.Rule("SomeRule", "some_operand", float("-inf")),
20222043
kh.Rule('SomeRule("some_operand", #Missing)'),
20232044
],
2045+
[
2046+
kh.Rule(b"SomeRule", b"some_operand", math.nan),
2047+
kh.Rule(b"SomeRule", b"some_operand", float("inf")),
2048+
kh.Rule(b"SomeRule", b"some_operand", float("-inf")),
2049+
kh.Rule(b'SomeRule("some_operand", #Missing)'),
2050+
],
20242051
[
20252052
kh.Rule(
20262053
"SomeRule",
@@ -2035,6 +2062,20 @@ def test_dictionary_rule_construction(self):
20352062
)
20362063
),
20372064
],
2065+
[
2066+
kh.Rule(
2067+
b"SomeRule",
2068+
b"some_operand",
2069+
2,
2070+
kh.Rule(b"SomeEmbeddedRule", b"some_other_operand"),
2071+
),
2072+
kh.Rule(
2073+
(
2074+
b'SomeRule("some_operand", 2, '
2075+
b'SomeEmbeddedRule("some_other_operand"))'
2076+
)
2077+
),
2078+
],
20382079
[
20392080
kh.Rule(
20402081
"SomeRule",
@@ -2060,7 +2101,11 @@ def test_dictionary_rule_construction(self):
20602101

20612102
for rule_list, rule_verbatim in zip(rules, rule_verbatims):
20622103
for rule in rule_list:
2063-
self.assertEqual(repr(rule), rule_verbatim)
2104+
if isinstance(rule_verbatim, str):
2105+
self.assertEqual(repr(rule), rule_verbatim)
2106+
else:
2107+
self.assertTrue(isinstance(rule_verbatim, bytes))
2108+
self.assertEqual(bytes(repr(rule), encoding="ascii"), rule_verbatim)
20642109

20652110
def test_dictionary_extract_data_paths(self):
20662111
"""Tests the extract_data_paths Dictionary method"""

0 commit comments

Comments
 (0)