diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dd925d1..2ee9de5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ ### Added - (`core`) Dictionary API support for dictionary, variable and variable block comments, and dictionary and variable block internal comments. +- (`core`) Dictionary `Rule` class and supporting API for adding and getting + rules to / from variables and variable blocks. - (`sklearn`) `Text` Khiops type support at the estimator level. ### Fixed diff --git a/doc/samples/samples.rst b/doc/samples/samples.rst index 333f04dd..60dcaded 100644 --- a/doc/samples/samples.rst +++ b/doc/samples/samples.rst @@ -655,17 +655,23 @@ Samples fold_index_variable.name = "FoldIndex" fold_index_variable.type = "Numerical" fold_index_variable.used = False - fold_index_variable.rule = "Ceil(Product(" + str(fold_number) + ", Random()))" dictionary.add_variable(fold_index_variable) + # Create fold indexing rule and set it on `fold_index_variable` + dictionary.get_variable(fold_index_variable.name).set_rule( + kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random()"))), + ) + # Add variables that indicate if the instance is in the train dataset: for fold_index in range(1, fold_number + 1): is_in_train_dataset_variable = kh.Variable() is_in_train_dataset_variable.name = "IsInTrainDataset" + str(fold_index) is_in_train_dataset_variable.type = "Numerical" is_in_train_dataset_variable.used = False - is_in_train_dataset_variable.rule = "NEQ(FoldIndex, " + str(fold_index) + ")" dictionary.add_variable(is_in_train_dataset_variable) + dictionary.get_variable(is_in_train_dataset_variable.name).set_rule( + kh.Rule("NEQ", fold_index_variable, fold_index), + ) # Print dictionary with fold variables print("Dictionary file with fold variables") diff --git a/khiops/core/api.py b/khiops/core/api.py index e9eb08ef..f90eb293 100644 --- a/khiops/core/api.py +++ b/khiops/core/api.py @@ -757,9 +757,11 @@ def train_predictor( Maximum number of text features to construct. text_features : str, default "words" Type of the text features. Can be either one of: + - "words": sequences of non-space characters - "ngrams": sequences of bytes - "tokens": user-defined + max_trees : int, default 10 Maximum number of trees to construct. max_pairs : int, default 0 @@ -788,8 +790,10 @@ def train_predictor( Maximum number of variable parts produced by preprocessing methods. If equal to 0 it is automatically calculated. Special default values for unsupervised analysis: + - If ``discretization_method`` is "EqualWidth" or "EqualFrequency": 10 - If ``grouping_method`` is "BasicGrouping": 10 + ... : See :ref:`core-api-common-params`. @@ -1181,9 +1185,11 @@ def train_recoder( Maximum number of text features to construct. text_features : str, default "words" Type of the text features. Can be either one of: + - "words": sequences of non-space characters - "ngrams": sequences of bytes - "tokens": user-defined + max_trees : int, default 10 Maximum number of trees to construct. max_pairs : int, default 0 @@ -1210,13 +1216,16 @@ def train_recoder( If ``True`` keeps initial numerical variables. categorical_recoding_method : str Type of recoding for categorical variables. Types available: + - "part Id" (default): An id for the interval/group - "part label": A label for the interval/group - "0-1 binarization": A 0's and 1's coding the interval/group id - "conditional info": Conditional information of the interval/group - "none": Keeps the variable as-is + numerical_recoding_method : str Type of recoding recoding for numerical variables. Types available: + - "part Id" (default): An id for the interval/group - "part label": A label for the interval/group - "0-1 binarization": A 0's and 1's coding the interval/group id @@ -1226,13 +1235,16 @@ def train_recoder( - "rank normalization": mean normalized rank (between 0 and 1) of the instances - "none": Keeps the variable as-is + pairs_recoding_method : str Type of recoding for bivariate variables. Types available: + - "part Id" (default): An id for the interval/group - "part label": A label for the interval/group - "0-1 binarization": A 0's and 1's coding the interval/group id - "conditional info": Conditional information of the interval/group - "none": Keeps the variable as-is + discretization_method : str, default "MODL" Name of the discretization method in case of unsupervised analysis. Its valid values are: "MODL", "EqualWidth", "EqualFrequency" or "none". @@ -1245,8 +1257,10 @@ def train_recoder( Maximum number of variable parts produced by preprocessing methods. If equal to 0 it is automatically calculated. Special default values for unsupervised analysis: + - If ``discretization_method`` is "EqualWidth" or "EqualFrequency": 10 - If ``grouping_method`` is "BasicGrouping": 10 + ... : See :ref:`core-api-common-params`. @@ -1254,6 +1268,7 @@ def train_recoder( ------- tuple A 2-tuple containing: + - The path of the JSON file report of the process - The path of the dictionary containing the recoding model diff --git a/khiops/core/dictionary.py b/khiops/core/dictionary.py index 007c6118..3a1618f1 100644 --- a/khiops/core/dictionary.py +++ b/khiops/core/dictionary.py @@ -13,6 +13,7 @@ """ import io +import math import os import re import warnings @@ -50,7 +51,7 @@ def _format_name(name): # Python isalnum is not used because of utf-8 encoding (accentuated chars # are considered alphanumeric) # Return original name if is an identifier, otherwise between backquotes - identifier_pattern = r"^[a-zA-Z][a-zA-Z0-9_]*" + identifier_pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*" str_identifier_regex = re.compile(identifier_pattern) bytes_identifier_regex = re.compile(bytes(identifier_pattern, encoding="ascii")) if isinstance(name, str): @@ -989,8 +990,10 @@ class Variable: rule : str Derivation rule or external table reference. Set to "" if there is no rule associated to this variable. Examples: + - standard rule: "Sum(Var1, Var2)" - reference rule: "[TableName]" + variable_block : `VariableBlock` Block to which the variable belongs. Not set if the variable does not belong to a block. @@ -1156,8 +1159,15 @@ def is_reference_rule(self): bool ``True`` if the special reference rule is used. """ - - return self.rule and self.rule[0] == "[" + if self.rule: + if isinstance(self.rule, str): + if self.rule.startswith("[") and self.rule.endswith("]"): + return True + else: + assert isinstance(self.rule, bytes) + if self.rule.startswith(b"[") and self.rule.endswith(b"]"): + return True + return False def full_type(self): """Returns the variable's full type @@ -1175,6 +1185,34 @@ def full_type(self): full_type += f"({self.structure_type})" return full_type + def get_rule(self): + """Gets the rule of the variable + + Returns + ------- + `Rule` + A `Rule` instance created as a verbatim rule from the ``rule`` + attribute of the variable. + """ + return Rule(verbatim=self.rule, is_reference=self.is_reference_rule()) + + def set_rule(self, rule): + """Sets a rule on a specified variable in the dictionary + + Parameters + ---------- + rule : `Rule` + The rule to be set on the variable. + + Raises + ------ + `TypeError` + If ``rule`` is not of type `Rule`. + """ + if not isinstance(rule, Rule): + raise TypeError(type_error_message("rule", rule, Rule)) + self.rule = repr(rule) + def write(self, writer): """Writes the domain to a file writer in ``.kdic`` format @@ -1350,6 +1388,39 @@ def get_value(self, key): """ return self.meta_data.get_value(key) + def get_rule(self): + """Gets the rule of the variable block + + Returns + ------- + `Rule` + A `Rule` instance created as a verbatim rule from the ``rule`` + attribute of the variable block. + """ + return Rule(verbatim=self.rule) + + def set_rule(self, rule): + """Sets a rule on a specified variable block in the dictionary + + Parameters + ---------- + rule : `Rule` + The rule to be set on the variable block. + + Raises + ------ + `TypeError` + If ``rule`` is not of type `Rule`. + + `ValueError` + If ``rule`` is a reference rule. + """ + if not isinstance(rule, Rule): + raise TypeError(type_error_message("rule", rule, Rule)) + if rule.is_reference: + raise ValueError("Cannot set reference rule on a variable block") + self.rule = repr(rule) + def write(self, writer): """Writes the variable block to a file writer in ``.kdic`` format @@ -1402,6 +1473,247 @@ def write(self, writer): writer.writeln("") +class Rule: + """A rule of a variable or variable block in a Khiops dictionary + + Parameters + ---------- + name_and_operands : tuple + Each tuple member can have one of the following types: + + - str + - bytes + - int + - float + - `Variable` + - `Rule` + - upper-scoped `Variable` + - upper-scoped `Rule` + + The first element of the ``name_and_operands`` tuple is the name of the + rule and must be str or bytes and non-empty for a standard rule, i.e. if + ``is_reference`` is not set. + verbatim : str or bytes, optional + Verbatim representation of an entire rule. If set, then ``names_and_operands`` + must be empty. + is_reference : bool, default ``False`` + If set to ``True``, then the rule is serialized as a reference rule: + ``Rule(Operand1, Operand2, ...)`` is serialized as + ``[Operand1, Operand2, ...]``. + + Attributes + ---------- + name : str or bytes or ``None`` + Name of the rule. It is ``None`` for reference rules. + operands : tuple of operands + Each operand has one of the following types: + + - str + - bytes + - int + - float + - `Variable` + - `Rule` + - upper-scoped `Variable` + - upper-scoped `Rule` + + is_reference : bool + The reference status of the rule. + + .. note:: + This attribute cannot be changed on a `Rule` instance. + """ + + def __init__(self, *name_and_operands, verbatim=None, is_reference=False): + """See class docstring""" + # Check input parameters and initialize rule fragments accordigly + if not isinstance(is_reference, bool): + raise TypeError(type_error_message("is_reference", is_reference, bool)) + + # Rule provided as name plus operands + if verbatim is None: + if not name_and_operands: + raise ValueError("A name must be provided to a standard rule") + if is_reference: + self.name = None + self.operands = name_and_operands + else: + name, *operands = name_and_operands + if not is_string_like(name): + raise TypeError(type_error_message("name", name, "string-like")) + if not name: + raise ValueError("'name' must be a non-empty string") + self.name = name + self.operands = operands + # Rule provided as verbatim + else: + if not is_string_like(verbatim): + raise TypeError(type_error_message("verbatim", verbatim, "string-like")) + if not verbatim: + raise ValueError("'verbatim' must be a non-empty string") + if name_and_operands: + raise ValueError( + "Rule name and operands must not be provided for verbatim rules" + ) + self.name = None + self.operands = () + + # Check operand types + for operand in self.operands: + if not is_string_like(operand) and not isinstance( + operand, (int, float, Variable, Rule, _ScopedOperand) + ): + raise TypeError( + type_error_message( + f"Operand '{operand}'", + operand, + "string-like", + int, + float, + Variable, + Rule, + "upper-scoped Variable", + "upper-scoped Rule", + ) + ) + + # Initialize private attributes + self._verbatim = verbatim + self._is_reference = is_reference + + @property + def is_reference(self): + return self._is_reference + + def __repr__(self): + stream = io.BytesIO() + writer = KhiopsOutputWriter(stream) + self.write(writer) + return str(stream.getvalue(), encoding="utf8", errors="replace") + + def copy(self): + """Copies this rule instance + + Returns + ------- + `Rule` + A copy of this instance. + """ + return Rule(self.name, *self.operands) + + def write(self, writer): + """Writes the rule to a file writer in the ``.kdic`` format + + Parameters + ---------- + writer : `.KhiopsOutputWriter` + Output writer. + + .. note:: + ``self.name`` is not included in the serialization of reference rules. + """ + # Check the type of the writer + if not isinstance(writer, KhiopsOutputWriter): + raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter)) + + # Write standard rule + rule_pattern = r"^[A-Z]([a-zA-Z]*)\(?.*\)?$" + rule_regex = re.compile(rule_pattern) + bytes_rule_regex = re.compile(bytes(rule_pattern, encoding="ascii")) + if self.operands: + if self.is_reference: + writer.write("[") + else: + writer.write(_format_name(self.name)) + writer.write("(") + + # Write operand, according to its type + # Variable operands have their name written only + for i, operand in enumerate(self.operands): + if isinstance(operand, (Rule, _ScopedOperand)): + operand.write(writer) + elif isinstance(operand, Variable): + writer.write(_format_name(operand.name)) + elif is_string_like(operand): + writer.write(_quote_value(operand)) + elif isinstance(operand, float) and not math.isfinite(operand): + writer.write("#Missing") + # int or finite float cases + else: + writer.write(str(operand)) + if i < len(self.operands) - 1: + writer.write(", ") + if self.is_reference: + writer.write("]") + else: + writer.write(")") + # Write no-operand rule + elif ( + isinstance(self.name, str) + and rule_regex.match(self.name) + or isinstance(self.name, bytes) + and bytes_rule_regex.match(self.name) + ): + writer.write(self.name) + # Write verbatim-given rule + elif self._verbatim: + writer.write(self._verbatim) + + +class _ScopedOperand: + def __init__(self, operand): + assert type(operand) in (Variable, Rule, _ScopedOperand), type_error_message( + "operand", operand, Variable, Rule, "upper-scoped Variable or Rule" + ) + self.operand = operand + + def write(self, writer): + assert isinstance(writer, KhiopsOutputWriter), type_error_message( + "writer", writer, KhiopsOutputWriter + ) + writer.write(".") + if isinstance(self.operand, Variable): + writer.write(_format_name(self.operand.name)) + else: + self.operand.write(writer) + + def __repr__(self): + stream = io.BytesIO() + writer = KhiopsOutputWriter(stream) + self.write(writer) + return str(stream.getvalue(), encoding="utf8", errors="replace") + + +def upper_scope(operand): + """Applies the upper-scope operator ``.`` to an operand + + Parameters + ---------- + operand : `Variable`, `Rule`, upper-scoped `Variable` or upper-scoped `Rule` + Operand that is upper-scoped. + + Raises + ------ + `TypeError` + If the type of ``operand`` is not `Variable`, `Rule`, upper-scoped `Variable` + or upper-scoped `Rule`. + + Returns + ------- + upper-scoped operand + The upper-scoped operand, as if the upper-scope operator ``.`` were + applied to an operand in a rule in the ``.kdic`` dictionary language. + + """ + if not isinstance(operand, (Variable, Rule, _ScopedOperand)): + raise TypeError( + type_error_message( + "operand", operand, Variable, Rule, "upper-scoped Variable or Rule" + ) + ) + return _ScopedOperand(operand) + + class MetaData: """A metadata container for a dictionary, a variable or variable block diff --git a/khiops/samples/samples.ipynb b/khiops/samples/samples.ipynb index 4a7cfc92..5e10b7e6 100644 --- a/khiops/samples/samples.ipynb +++ b/khiops/samples/samples.ipynb @@ -875,17 +875,23 @@ "fold_index_variable.name = \"FoldIndex\"\n", "fold_index_variable.type = \"Numerical\"\n", "fold_index_variable.used = False\n", - "fold_index_variable.rule = \"Ceil(Product(\" + str(fold_number) + \", Random()))\"\n", "dictionary.add_variable(fold_index_variable)\n", "\n", + "# Create fold indexing rule and set it on `fold_index_variable`\n", + "dictionary.get_variable(fold_index_variable.name).set_rule(\n", + " kh.Rule(\"Ceil\", kh.Rule(\"Product\", fold_number, kh.Rule(\"Random()\"))),\n", + ")\n", + "\n", "# Add variables that indicate if the instance is in the train dataset:\n", "for fold_index in range(1, fold_number + 1):\n", " is_in_train_dataset_variable = kh.Variable()\n", " is_in_train_dataset_variable.name = \"IsInTrainDataset\" + str(fold_index)\n", " is_in_train_dataset_variable.type = \"Numerical\"\n", " is_in_train_dataset_variable.used = False\n", - " is_in_train_dataset_variable.rule = \"NEQ(FoldIndex, \" + str(fold_index) + \")\"\n", " dictionary.add_variable(is_in_train_dataset_variable)\n", + " dictionary.get_variable(is_in_train_dataset_variable.name).set_rule(\n", + " kh.Rule(\"NEQ\", fold_index_variable, fold_index),\n", + " )\n", "\n", "# Print dictionary with fold variables\n", "print(\"Dictionary file with fold variables\")\n", diff --git a/khiops/samples/samples.py b/khiops/samples/samples.py index 57babefe..b44e338f 100644 --- a/khiops/samples/samples.py +++ b/khiops/samples/samples.py @@ -728,17 +728,23 @@ def train_predictor_with_cross_validation(): fold_index_variable.name = "FoldIndex" fold_index_variable.type = "Numerical" fold_index_variable.used = False - fold_index_variable.rule = "Ceil(Product(" + str(fold_number) + ", Random()))" dictionary.add_variable(fold_index_variable) + # Create fold indexing rule and set it on `fold_index_variable` + dictionary.get_variable(fold_index_variable.name).set_rule( + kh.Rule("Ceil", kh.Rule("Product", fold_number, kh.Rule("Random()"))), + ) + # Add variables that indicate if the instance is in the train dataset: for fold_index in range(1, fold_number + 1): is_in_train_dataset_variable = kh.Variable() is_in_train_dataset_variable.name = "IsInTrainDataset" + str(fold_index) is_in_train_dataset_variable.type = "Numerical" is_in_train_dataset_variable.used = False - is_in_train_dataset_variable.rule = "NEQ(FoldIndex, " + str(fold_index) + ")" dictionary.add_variable(is_in_train_dataset_variable) + dictionary.get_variable(is_in_train_dataset_variable.name).set_rule( + kh.Rule("NEQ", fold_index_variable, fold_index), + ) # Print dictionary with fold variables print("Dictionary file with fold variables") diff --git a/tests/test_core.py b/tests/test_core.py index 361f7bb2..6a8f92fe 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8,6 +8,7 @@ import glob import io import json +import math import os import shutil import tempfile @@ -1878,8 +1879,9 @@ def test_dictionary_accessors(self): dictionary_copy.get_variable_block(block.name) # Set the block as non-native add, and remove it - block.rule = "SomeBlockCreatingRule()" dictionary_copy.add_variable_block(block) + block_rule = kh.Rule("SomeBlockCreatingRule()") + dictionary_copy.get_variable_block(block.name).set_rule(block_rule) self.assertEqual(block, dictionary_copy.get_variable_block(block.name)) removed_block = dictionary_copy.remove_variable_block( block.name, @@ -1923,6 +1925,339 @@ def test_dictionary_accessors(self): removed_value = variable_block.meta_data.remove_key("SomeKey") self.assertEqual(removed_value, "SomeValue") + # Test rule getters / setters + dictionary_copy = dictionary.copy() + for variable_index, variable_name in enumerate( + [variable.name for variable in dictionary_copy.variables] + ): + some_rule = kh.Rule( + "SomeRuleForVariable" + variable_index * "i", + "an_operand", + 2, + kh.Rule("SomeEmbeddedRule()"), + ) + dictionary_copy.get_variable(variable_name).set_rule(some_rule) + self.assertEqual( + dictionary_copy.get_variable(variable_name).rule, + repr(some_rule), + ) + self.assertEqual( + repr(dictionary_copy.get_variable(variable_name).get_rule()), + repr(some_rule), + ) + some_reference_rule = kh.Rule( + "some_reference_operand_for_variable" + variable_index * "i", + kh.Variable( + json_data={ + "name": "SomeReferenceVariable" + variable_index * "i", + "type": "Categorical", + } + ), + is_reference=True, + ) + dictionary_copy.get_variable(variable_name).set_rule( + some_reference_rule + ) + self.assertEqual( + dictionary_copy.get_variable(variable_name).rule, + repr(some_reference_rule), + ) + self.assertEqual( + repr(dictionary_copy.get_variable(variable_name).get_rule()), + repr(some_reference_rule), + ) + for variable_block_index, variable_block_name in enumerate( + [ + variable_block.name + for variable_block in dictionary_copy.variable_blocks + ] + ): + some_rule = kh.Rule( + "SomeRuleForVariableBlock" + variable_block_index * "i", + "an_operand", + 2, + kh.Rule("SomeEmbeddedRule()"), + ) + dictionary_copy.get_variable_block(variable_block_name).set_rule( + some_rule + ) + self.assertEqual( + dictionary_copy.get_variable_block(variable_block_name).rule, + repr(some_rule), + ) + self.assertEqual( + repr( + dictionary_copy.get_variable_block( + variable_block_name + ).get_rule() + ), + repr(some_rule), + ) + some_reference_rule = kh.Rule( + "some_reference_operand_for_variable block" + + variable_block_index * "i", + 3, + is_reference=True, + ) + with self.assertRaises(ValueError): + dictionary_copy.get_variable_block( + variable_block_name + ).set_rule(some_reference_rule) + + def test_dictionary_rule_construction(self): + """Tests the Rule construction and serialization""" + rule_verbatims = [ + "SomeRule", + b"SomeRule", + 'SomeRule("some_operand", 2)', + b'SomeRule("some_operand", 2)', + 'SomeRule("some""operand", 2)', + b'SomeRule("some""operand", 2)', + 'SomeRule("some_operand", 2, SomeVariable)', + 'SomeRule("some_operand", 2, `Some#Variable`)', + b'SomeRule("some_operand", 2, `Some#Variable`)', + 'SomeRule("some_operand", 2, `Some``Variable`)', + b'SomeRule("some_operand", 2, `Some``Variable`)', + 'SomeRule("some_operand", 2, .SomeVariable)', + b'SomeRule("some_operand", 2, .SomeVariable)', + 'SomeRule("some_operand", 2, ..SomeVariable)', + 'SomeRule("some_operand", #Missing)', + b'SomeRule("some_operand", #Missing)', + 'SomeRule("some_operand", 2, SomeEmbeddedRule("some_other_operand"))', + b'SomeRule("some_operand", 2, SomeEmbeddedRule("some_other_operand"))', + 'SomeRule("some_operand", 2, .SomeEmbeddedRule("some_other_operand"))', + 'SomeRule("some_operand", 2, ..SomeEmbeddedRule("some_other_operand"))', + ( + 'SomeRule("some_operand", 2, SomeEmbeddedRule("some_other_operand", ' + 'SomeOtherRule("some_embedded_operand", #Missing, 3)))' + ), + '["some_reference_operand"]', + b'["some_reference_operand"]', + '["some_reference_operand", SomeReferenceVariable]', + b'["some_reference_operand", SomeReferenceVariable]', + ( + b'SomeRule("som\xe9_operand", 2, ' + b'SomeEmbeddedRule("som\xe9_other_operand"))' + ), + ] + + rules = [ + [kh.Rule("SomeRule")], + [kh.Rule(b"SomeRule")], + [ + kh.Rule("SomeRule", "some_operand", 2), + kh.Rule(verbatim='SomeRule("some_operand", 2)'), + ], + [ + kh.Rule(b"SomeRule", b"some_operand", 2), + kh.Rule(verbatim=b'SomeRule("some_operand", 2)'), + ], + [ + kh.Rule("SomeRule", 'some"operand', 2), + ], + [ + kh.Rule(b"SomeRule", b'some"operand', 2), + ], + [ + kh.Rule(verbatim='SomeRule("some_operand", 2, SomeVariable)'), + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.Variable( + json_data={"name": "SomeVariable", "type": "Categorical"} + ), + ), + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.Variable( + json_data={"name": "Some#Variable", "type": "Categorical"} + ), + ) + ], + [ + kh.Rule( + b"SomeRule", + b"some_operand", + 2, + kh.Variable( + json_data={"name": b"Some#Variable", "type": b"Categorical"} + ), + ) + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.Variable( + json_data={"name": "Some`Variable", "type": "Categorical"} + ), + ) + ], + [ + kh.Rule( + b"SomeRule", + b"some_operand", + 2, + kh.Variable( + json_data={"name": b"Some`Variable", "type": b"Categorical"} + ), + ) + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.upper_scope( + kh.Variable( + json_data={"name": "SomeVariable", "type": "Categorical"} + ) + ), + ), + ], + [ + kh.Rule( + b"SomeRule", + b"some_operand", + 2, + kh.upper_scope( + kh.Variable( + json_data={"name": b"SomeVariable", "type": b"Categorical"} + ) + ), + ), + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.upper_scope( + kh.upper_scope( + kh.Variable( + json_data={ + "name": "SomeVariable", + "type": "Categorical", + } + ) + ) + ), + ), + ], + [ + kh.Rule("SomeRule", "some_operand", math.nan), + kh.Rule("SomeRule", "some_operand", float("inf")), + kh.Rule("SomeRule", "some_operand", float("-inf")), + kh.Rule(verbatim='SomeRule("some_operand", #Missing)'), + ], + [ + kh.Rule(b"SomeRule", b"some_operand", math.nan), + kh.Rule(b"SomeRule", b"some_operand", float("inf")), + kh.Rule(b"SomeRule", b"some_operand", float("-inf")), + kh.Rule(verbatim=b'SomeRule("some_operand", #Missing)'), + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.Rule("SomeEmbeddedRule", "some_other_operand"), + ), + kh.Rule( + verbatim=( + 'SomeRule("some_operand", 2, ' + 'SomeEmbeddedRule("some_other_operand"))' + ), + ), + ], + [ + kh.Rule( + b"SomeRule", + b"some_operand", + 2, + kh.Rule(b"SomeEmbeddedRule", b"some_other_operand"), + ), + kh.Rule( + ( + b'SomeRule("some_operand", 2, ' + b'SomeEmbeddedRule("some_other_operand"))' + ) + ), + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.upper_scope(kh.Rule("SomeEmbeddedRule", "some_other_operand")), + ), + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.upper_scope( + kh.upper_scope( + kh.Rule("SomeEmbeddedRule", "some_other_operand") + ) + ), + ), + ], + [ + kh.Rule( + "SomeRule", + "some_operand", + 2, + kh.Rule( + "SomeEmbeddedRule", + "some_other_operand", + kh.Rule("SomeOtherRule", "some_embedded_operand", math.inf, 3), + ), + ) + ], + [kh.Rule("some_reference_operand", is_reference=True)], + [kh.Rule(b"some_reference_operand", is_reference=True)], + [ + kh.Rule( + "some_reference_operand", + kh.Variable( + json_data={ + "name": "SomeReferenceVariable", + "type": "Categorical", + } + ), + is_reference=True, + ) + ], + [ + kh.Rule( + b"some_reference_operand", + kh.Variable( + json_data={ + "name": b"SomeReferenceVariable", + "type": "Categorical", + } + ), + is_reference=True, + ) + ], + ] + + for rule_list, rule_verbatim in zip(rules, rule_verbatims): + for rule in rule_list: + if isinstance(rule_verbatim, str): + self.assertEqual(repr(rule), rule_verbatim) + else: + self.assertTrue(isinstance(rule_verbatim, bytes)) + self.assertEqual(bytes(repr(rule), encoding="utf8"), rule_verbatim) + def test_dictionary_extract_data_paths(self): """Tests the extract_data_paths Dictionary method""" # Set the test paths