Skip to content

Commit b86df6c

Browse files
author
Thierry RAMORASOAVINA
committed
Add a way to add a variable to a dictionary using a complete specification
1 parent 1b0b3dc commit b86df6c

6 files changed

Lines changed: 309 additions & 83 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
comments, and dictionary and variable block internal comments.
1414
- (`core`) Dictionary `Rule` class and supporting API for adding and getting
1515
rules to / from variables and variable blocks.
16+
- (`core`) New way to add a variable to a dictionary using a complete specification.
1617
- (`sklearn`) `Text` Khiops type support at the estimator level.
1718

1819
### Fixed

doc/samples/samples.rst

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -79,40 +79,28 @@ Samples
7979
{"name": "ts", "type": "TimestampTZ"},
8080
]
8181
for var_spec in simple_variables:
82-
var = kh.Variable()
83-
var.name = var_spec["name"]
84-
var.type = var_spec["type"]
85-
root_dictionary.add_variable(var)
82+
root_dictionary.add_variable_from_spec(name=var_spec["name"], type=var_spec["type"])
8683
8784
# Create a second dictionary
8885
second_dictionary = kh.Dictionary(
8986
json_data={"name": "Service", "key": ["Id", "id_product"]}
9087
)
91-
second_dictionary.add_variable(
92-
kh.Variable(json_data={"name": "Id", "type": "Categorical"})
93-
)
94-
second_dictionary.add_variable(
95-
kh.Variable(json_data={"name": "id_product", "type": "Categorical"})
96-
)
88+
second_dictionary.add_variable_from_spec(name="Id", type="Categorical")
89+
second_dictionary.add_variable_from_spec(name="id_product", type="Categorical")
90+
9791
# Create a third dictionary
9892
third_dictionary = kh.Dictionary(json_data={"name": "Address", "key": ["Id"]})
99-
third_dictionary.add_variable(
100-
kh.Variable(json_data={"name": "StreetNumber", "type": "Numerical"})
101-
)
102-
third_dictionary.add_variable(
103-
kh.Variable(json_data={"name": "StreetName", "type": "Categorical"})
104-
)
105-
third_dictionary.add_variable(
106-
kh.Variable(json_data={"name": "id_city", "type": "Categorical"})
107-
)
93+
third_dictionary.add_variable_from_spec(name="StreetNumber", type="Numerical")
94+
third_dictionary.add_variable_from_spec(name="StreetName", type="Categorical")
95+
third_dictionary.add_variable_from_spec(name="id_city", type="Categorical")
10896
10997
# Add the variables used in a multi-table context in the first dictionary.
11098
# They link the root dictionary to the additional ones
111-
root_dictionary.add_variable(
112-
kh.Variable(json_data={"name": "Services", "type": "Table(Service)"})
99+
root_dictionary.add_variable_from_spec(
100+
name="Services", type="Table", object_type="Service"
113101
)
114-
root_dictionary.add_variable(
115-
kh.Variable(json_data={"name": "Address", "type": "Entity(Address)"})
102+
root_dictionary.add_variable_from_spec(
103+
name="Address", type="Entity", object_type="Address"
116104
)
117105
118106
# Create a DictionaryDomain (set of dictionaries)

khiops/core/dictionary.py

Lines changed: 191 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from khiops.core.exceptions import KhiopsJSONError
2424
from khiops.core.internals.common import (
2525
deprecation_message,
26+
is_dict_like,
27+
is_list_like,
2628
is_string_like,
2729
type_error_message,
2830
)
@@ -43,25 +45,17 @@ def _format_name(name):
4345
4446
Otherwise, it returns the name between backquoted (backquotes within are doubled)
4547
"""
46-
# Check that the type of name is string or bytes
47-
if not is_string_like(name):
48-
raise TypeError(type_error_message("name", name, "string-like"))
48+
is_valid_identifier = _check_name(name)
4949

50-
# Check if the name is an identifier
51-
# Python isalnum is not used because of utf-8 encoding (accentuated chars
52-
# are considered alphanumeric)
5350
# Return original name if is an identifier, otherwise between backquotes
54-
identifier_pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*"
55-
str_identifier_regex = re.compile(identifier_pattern)
56-
bytes_identifier_regex = re.compile(bytes(identifier_pattern, encoding="ascii"))
5751
if isinstance(name, str):
58-
if str_identifier_regex.fullmatch(name) is not None:
52+
if is_valid_identifier:
5953
formatted_name = name
6054
else:
6155
formatted_name = "`" + name.replace("`", "``") + "`"
6256
else:
6357
assert isinstance(name, bytes)
64-
if bytes_identifier_regex.fullmatch(name) is not None:
58+
if is_valid_identifier:
6559
formatted_name = name
6660
else:
6761
formatted_name = b"`" + name.replace(b"`", b"``") + b"`"
@@ -81,6 +75,59 @@ def _quote_value(value):
8175
return quoted_value
8276

8377

78+
def _check_name(name):
79+
"""Ensures the variable name is a valid identifier
80+
81+
Plain string or bytes are both accepted as input.
82+
Please note the Khiops core forbids a name
83+
- with a length outside the [1,128] interval
84+
- containing a simple (Unix) carriage-return (\n)
85+
- with leading and trailing spaces
86+
(\s in Perl-Compatible-Regular-Expressions syntax).
87+
This function must check at least these constraints.
88+
"""
89+
# Check that the type of name is string or bytes
90+
if not is_string_like(name):
91+
raise TypeError(type_error_message("name", name, "string-like"))
92+
93+
# Python isalnum is not used because of utf-8 encoding (accentuated chars
94+
# are considered alphanumeric)
95+
identifier_pattern = r"^[a-zA-Z_][a-zA-Z0-9_]{0,127}$"
96+
str_identifier_regex = re.compile(identifier_pattern)
97+
bytes_identifier_regex = re.compile(bytes(identifier_pattern, encoding="ascii"))
98+
if isinstance(name, str):
99+
return str_identifier_regex.match(name) is not None
100+
else:
101+
return bytes_identifier_regex.match(name) is not None
102+
103+
104+
def _is_valid_type(type_str):
105+
"""Checks whether the type is known"""
106+
return (
107+
_is_native_type(type_str)
108+
or _is_object_type(type_str)
109+
or type_str in ["TextList", "Structure"]
110+
) # internal types
111+
112+
113+
def _is_native_type(type_str):
114+
"""Checks whether the type is native (not internal or relational)"""
115+
return type_str in [
116+
"Categorical",
117+
"Numerical",
118+
"Time",
119+
"Date",
120+
"Timestamp",
121+
"TimestampTZ",
122+
"Text",
123+
]
124+
125+
126+
def _is_object_type(type_str):
127+
"""Checks whether the type is an object one (relational)"""
128+
return type_str in ["Entity", "Table"]
129+
130+
84131
class DictionaryDomain(KhiopsJSONObject):
85132
"""Main class containing the information of a Khiops dictionary file
86133
@@ -769,6 +816,136 @@ def add_variable(self, variable):
769816
self.variables.append(variable)
770817
self._variables_by_name[variable.name] = variable
771818

819+
def add_variable_from_spec(
820+
self,
821+
name,
822+
type,
823+
label=None,
824+
used=True,
825+
object_type=None,
826+
structure_type=None,
827+
rule=None,
828+
meta_data=None,
829+
):
830+
"""Adds a variable to this dictionary using a complete specification
831+
832+
Parameters
833+
----------
834+
name : str
835+
Variable name
836+
type : str
837+
Variable type, See `Variable`
838+
label : str, optional
839+
label of the variable
840+
used : bool, default ``True``
841+
usage status of the variable
842+
object_type : str, optional
843+
object type
844+
ignored if variable type not in ["Entity", "Table"]
845+
structure_type : str, optional
846+
structure type
847+
ignored if variable type is not "Structure"
848+
rule : str, optional
849+
variable rule (in verbatim)
850+
See `Rule`
851+
meta_data : dict, optional
852+
a Python dictionary which holds the metadata specification
853+
with the following keys:
854+
- keys : list, default []
855+
list of meta-data keys
856+
- values : list, default []
857+
list of meta-data values.
858+
The values can be str, bool, float or int
859+
860+
Raises
861+
------
862+
`ValueError`
863+
- If the variable name is empty or does not comply
864+
with the formatting constraints.
865+
- If there is already a variable with the same name.
866+
- If the given variable type is unknown.
867+
- If a native type is given 'object_type' or 'structure_type'
868+
- If the 'meta_data' is not a dictionary
869+
"""
870+
if not name:
871+
raise ValueError(
872+
"Cannot add to dictionary unnamed variable " f"(name = '{name}')"
873+
)
874+
if not _check_name(name):
875+
raise ValueError(
876+
f"New variable name '{name}' cannot be accepted "
877+
"(invalid length or characters)"
878+
)
879+
if name in self._variables_by_name:
880+
raise ValueError(f"Dictionary already has a variable named '{name}'")
881+
if not _is_valid_type(type):
882+
raise ValueError(f"Invalid type '{type}'")
883+
if _is_native_type(type):
884+
if object_type or structure_type:
885+
raise ValueError(
886+
f"Native type '{type}' "
887+
"cannot have 'object_type' or 'structure_type'"
888+
)
889+
if _is_object_type(type) and object_type is None:
890+
raise ValueError(f"'object_type' must be provided for type '{type}'")
891+
variable = Variable()
892+
variable.name = name
893+
variable.type = type
894+
variable.used = used
895+
if meta_data is not None:
896+
if not is_dict_like(meta_data):
897+
raise TypeError(type_error_message("meta_data", meta_data, "dict-like"))
898+
if "keys" not in meta_data or "values" not in meta_data:
899+
raise ValueError(
900+
"'meta_data' does not contain "
901+
"the mandatory keys 'keys' and 'values'"
902+
)
903+
if not is_list_like(meta_data["keys"]):
904+
raise TypeError(
905+
type_error_message(
906+
"meta_data['keys']", meta_data["keys"], "list-like"
907+
)
908+
)
909+
if not is_list_like(meta_data["values"]):
910+
raise TypeError(
911+
type_error_message(
912+
"meta_data['values']", meta_data["values"], "list-like"
913+
)
914+
)
915+
if len(meta_data["keys"]) != len(meta_data["values"]):
916+
raise ValueError(
917+
"'meta_data' keys and values " "do not have the same size"
918+
)
919+
for key, value in zip(meta_data["keys"], meta_data["values"]):
920+
variable.meta_data.add_value(key, value)
921+
if label is not None:
922+
if not is_string_like(label):
923+
raise TypeError(type_error_message("label", label, "string-like"))
924+
else:
925+
variable.label = label
926+
if object_type is not None:
927+
if not is_string_like(object_type):
928+
raise TypeError(
929+
type_error_message("object_type", object_type, "string-like")
930+
)
931+
else:
932+
variable.object_type = object_type
933+
if structure_type is not None:
934+
if not is_string_like(structure_type):
935+
raise TypeError(
936+
type_error_message("structure_type", structure_type, "string-like")
937+
)
938+
else:
939+
variable.structure_type = structure_type
940+
if rule is not None:
941+
if not is_string_like(rule):
942+
raise TypeError(
943+
type_error_message("rule", structure_type, "string-like")
944+
)
945+
else:
946+
variable.rule = Rule(verbatim=rule)
947+
self.add_variable(variable)
948+
772949
def remove_variable(self, variable_name):
773950
"""Removes the specified variable from this dictionary
774951
@@ -1058,7 +1235,7 @@ def __init__(self, json_data=None):
10581235
self.type = json_data.get("type")
10591236

10601237
# Initialize complement of the type
1061-
if self.type in ("Entity", "Table"):
1238+
if _is_object_type(self.type):
10621239
self.object_type = json_data.get("objectType")
10631240
elif self.type == "Structure":
10641241
self.structure_type = json_data.get("structureType")
@@ -1072,7 +1249,7 @@ def __init__(self, json_data=None):
10721249
self.meta_data = MetaData(json_meta_data)
10731250

10741251
def __repr__(self):
1075-
"""Returns a human readable string representation"""
1252+
"""Returns a human-readable string representation"""
10761253
return f"Variable ({self.name})"
10771254

10781255
def __str__(self):
@@ -1179,7 +1356,7 @@ def full_type(self):
11791356
basic.
11801357
"""
11811358
full_type = self.type
1182-
if self.type in ("Entity", "Table"):
1359+
if _is_object_type(self.type):
11831360
full_type += f"({self.object_type})"
11841361
elif self.type == "Structure":
11851362
full_type += f"({self.structure_type})"

khiops/samples/samples.ipynb

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -91,40 +91,28 @@
9191
" {\"name\": \"ts\", \"type\": \"TimestampTZ\"},\n",
9292
"]\n",
9393
"for var_spec in simple_variables:\n",
94-
" var = kh.Variable()\n",
95-
" var.name = var_spec[\"name\"]\n",
96-
" var.type = var_spec[\"type\"]\n",
97-
" root_dictionary.add_variable(var)\n",
94+
" root_dictionary.add_variable_from_spec(name=var_spec[\"name\"], type=var_spec[\"type\"])\n",
9895
"\n",
9996
"# Create a second dictionary\n",
10097
"second_dictionary = kh.Dictionary(\n",
10198
" json_data={\"name\": \"Service\", \"key\": [\"Id\", \"id_product\"]}\n",
10299
")\n",
103-
"second_dictionary.add_variable(\n",
104-
" kh.Variable(json_data={\"name\": \"Id\", \"type\": \"Categorical\"})\n",
105-
")\n",
106-
"second_dictionary.add_variable(\n",
107-
" kh.Variable(json_data={\"name\": \"id_product\", \"type\": \"Categorical\"})\n",
108-
")\n",
100+
"second_dictionary.add_variable_from_spec(name=\"Id\", type=\"Categorical\")\n",
101+
"second_dictionary.add_variable_from_spec(name=\"id_product\", type=\"Categorical\")\n",
102+
"\n",
109103
"# Create a third dictionary\n",
110104
"third_dictionary = kh.Dictionary(json_data={\"name\": \"Address\", \"key\": [\"Id\"]})\n",
111-
"third_dictionary.add_variable(\n",
112-
" kh.Variable(json_data={\"name\": \"StreetNumber\", \"type\": \"Numerical\"})\n",
113-
")\n",
114-
"third_dictionary.add_variable(\n",
115-
" kh.Variable(json_data={\"name\": \"StreetName\", \"type\": \"Categorical\"})\n",
116-
")\n",
117-
"third_dictionary.add_variable(\n",
118-
" kh.Variable(json_data={\"name\": \"id_city\", \"type\": \"Categorical\"})\n",
119-
")\n",
105+
"third_dictionary.add_variable_from_spec(name=\"StreetNumber\", type=\"Numerical\")\n",
106+
"third_dictionary.add_variable_from_spec(name=\"StreetName\", type=\"Categorical\")\n",
107+
"third_dictionary.add_variable_from_spec(name=\"id_city\", type=\"Categorical\")\n",
120108
"\n",
121109
"# Add the variables used in a multi-table context in the first dictionary.\n",
122110
"# They link the root dictionary to the additional ones\n",
123-
"root_dictionary.add_variable(\n",
124-
" kh.Variable(json_data={\"name\": \"Services\", \"type\": \"Table(Service)\"})\n",
111+
"root_dictionary.add_variable_from_spec(\n",
112+
" name=\"Services\", type=\"Table\", object_type=\"Service\"\n",
125113
")\n",
126-
"root_dictionary.add_variable(\n",
127-
" kh.Variable(json_data={\"name\": \"Address\", \"type\": \"Entity(Address)\"})\n",
114+
"root_dictionary.add_variable_from_spec(\n",
115+
" name=\"Address\", type=\"Entity\", object_type=\"Address\"\n",
128116
")\n",
129117
"\n",
130118
"# Create a DictionaryDomain (set of dictionaries)\n",

0 commit comments

Comments
 (0)