Skip to content

Commit 8e6db1b

Browse files
author
Thierry RAMORASOAVINA
committed
Add a way to add a variable to a dictionary using a complete specification
1 parent ec50a84 commit 8e6db1b

File tree

6 files changed

+291
-72
lines changed

6 files changed

+291
-72
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
### Added
1212
- (`core`) Dictionary API support for dictionary, variable and variable block
1313
comments, and dictionary and variable block internal comments.
14+
- (`core`) New way to add a variable to a dictionary using a complete specification.
1415
- (`sklearn`) `Text` Khiops type support at the estimator level.
1516

1617
### Fixed

doc/samples/samples.rst

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -79,40 +79,28 @@ Samples
7979
{"name": "ts", "type": "TimestampTZ"},
8080
]
8181
for var_spec in simple_variables:
82-
var = kh.Variable()
83-
var.name = var_spec["name"]
84-
var.type = var_spec["type"]
85-
root_dictionary.add_variable(var)
82+
root_dictionary.add_variable_from_spec(name=var_spec["name"], type=var_spec["type"])
8683
8784
# Create a second dictionary
8885
second_dictionary = kh.Dictionary(
8986
json_data={"name": "Service", "key": ["Id", "id_product"]}
9087
)
91-
second_dictionary.add_variable(
92-
kh.Variable(json_data={"name": "Id", "type": "Categorical"})
93-
)
94-
second_dictionary.add_variable(
95-
kh.Variable(json_data={"name": "id_product", "type": "Categorical"})
96-
)
88+
second_dictionary.add_variable_from_spec(name="Id", type="Categorical")
89+
second_dictionary.add_variable_from_spec(name="id_product", type="Categorical")
90+
9791
# Create a third dictionary
9892
third_dictionary = kh.Dictionary(json_data={"name": "Address", "key": ["Id"]})
99-
third_dictionary.add_variable(
100-
kh.Variable(json_data={"name": "StreetNumber", "type": "Numerical"})
101-
)
102-
third_dictionary.add_variable(
103-
kh.Variable(json_data={"name": "StreetName", "type": "Categorical"})
104-
)
105-
third_dictionary.add_variable(
106-
kh.Variable(json_data={"name": "id_city", "type": "Categorical"})
107-
)
93+
third_dictionary.add_variable_from_spec(name="StreetNumber", type="Numerical")
94+
third_dictionary.add_variable_from_spec(name="StreetName", type="Categorical")
95+
third_dictionary.add_variable_from_spec(name="id_city", type="Categorical")
10896
10997
# Add the variables used in a multi-table context in the first dictionary.
11098
# They link the root dictionary to the additional ones
111-
root_dictionary.add_variable(
112-
kh.Variable(json_data={"name": "Services", "type": "Table(Service)"})
99+
root_dictionary.add_variable_from_spec(
100+
name="Services", type="Table", object_type="Service"
113101
)
114-
root_dictionary.add_variable(
115-
kh.Variable(json_data={"name": "Address", "type": "Entity(Address)"})
102+
root_dictionary.add_variable_from_spec(
103+
name="Address", type="Entity", object_type="Address"
116104
)
117105
118106
# Create a DictionaryDomain (set of dictionaries)

khiops/core/dictionary.py

Lines changed: 196 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,55 @@ def _quote_value(value):
8080
return quoted_value
8181

8282

83+
def _is_a_valid_variable_name(name):
84+
"""Ensures the variable name is valid
85+
86+
Please note the Khiops core forbids a name
87+
- with a length outside the [1,128] interval
88+
- containing a simple (Unix) carriage-return (\n)
89+
- with leading and trailing spaces
90+
(\s in Perl-Compatible-Regular-Expressions syntax).
91+
This function must check at least these constraints
92+
"""
93+
variable_name_regexp = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{1,128}$")
94+
return variable_name_regexp.match(name) is not None
95+
96+
97+
def _is_valid_type(type_str):
98+
"""Checks whether the type is known"""
99+
return type_str in [
100+
"Categorical",
101+
"Numerical",
102+
"Time",
103+
"Date",
104+
"Timestamp",
105+
"TimestampTZ",
106+
"Text",
107+
"TextList",
108+
"Structure",
109+
"Entity",
110+
"Table",
111+
]
112+
113+
114+
def _is_native_type(type_str):
115+
"""Checks whether the type is native (not internal or relational)"""
116+
return type_str in [
117+
"Categorical",
118+
"Numerical",
119+
"Time",
120+
"Date",
121+
"Timestamp",
122+
"TimestampTZ",
123+
"Text",
124+
]
125+
126+
127+
def _is_object_type(type_str):
128+
"""Checks whether the type is an object one (relational)"""
129+
return type_str in ["Entity", "Table"]
130+
131+
83132
class DictionaryDomain(KhiopsJSONObject):
84133
"""Main class containing the information of a Khiops dictionary file
85134
@@ -768,6 +817,113 @@ def add_variable(self, variable):
768817
self.variables.append(variable)
769818
self._variables_by_name[variable.name] = variable
770819

820+
def add_variable_from_spec(
821+
self,
822+
name,
823+
type,
824+
label=None,
825+
used=None,
826+
object_type=None,
827+
structure_type=None,
828+
meta_data=None,
829+
):
830+
"""Adds a variable to this dictionary using a complete specification
831+
832+
Parameters
833+
----------
834+
name : str
835+
Variable name
836+
type : str
837+
Variable type
838+
See `Variable`
839+
label: str, optional
840+
label of the variable (None by default)
841+
used: bool, optional
842+
usage status of the variable (True by default)
843+
object_type: str, optional
844+
object type (None by default;
845+
ignored if variable_type not in ["Entity", "Table"])
846+
structure_type: str, optional
847+
structure type (None by default;
848+
ignored if variable_type != "Structure")
849+
meta_data: dict, optional
850+
a Python dictionary which holds the metadata specification
851+
with the following keys:
852+
- keys : List[str], optional
853+
list of meta-data keys ([] by default)
854+
- values: List[str|bool|float|int], optional
855+
list of meta-data values ([] by default)
856+
857+
Raises
858+
------
859+
`ValueError`
860+
- If the variable name is empty or does not comply
861+
with the formatting constraints.
862+
- If there is already a variable with the same name.
863+
- If the given variable type is unknown.
864+
- If a native type is given 'object_type' or 'structure_type'
865+
- If the 'meta_data' is not a dictionary
866+
"""
867+
if not name:
868+
raise ValueError(
869+
"Cannot add to dictionary unnamed variable " f"(name = '{name}')"
870+
)
871+
if not _is_a_valid_variable_name(name):
872+
raise ValueError(
873+
f"New variable name '{name}' cannot be accepted "
874+
"(invalid length or characters)"
875+
)
876+
if name in self._variables_by_name:
877+
raise ValueError(f"Dictionary already has a variable named '{name}'")
878+
if not _is_valid_type(type):
879+
raise ValueError(f"Invalid type '{type}'")
880+
if _is_native_type(type):
881+
if object_type or structure_type:
882+
raise ValueError(
883+
f"Native type '{type}' "
884+
"cannot have 'object_type' or 'structure_type'"
885+
)
886+
if _is_object_type(type) and object_type is None:
887+
raise ValueError(f"'object_type' must be provided for type '{type}'")
888+
variable = Variable()
889+
variable.name = name
890+
variable.type = type
891+
if used is not None:
892+
if not isinstance(used, bool):
893+
raise ValueError("'used' must be a boolean")
894+
else:
895+
variable.used = used
896+
if meta_data is not None:
897+
if not isinstance(meta_data, dict):
898+
raise ValueError("'meta_data' must be a dict")
899+
if "keys" not in meta_data or "values" not in meta_data:
900+
raise ValueError(
901+
"'meta_data' does not contain "
902+
"the mandatory keys 'keys' and 'values'"
903+
)
904+
if not isinstance(meta_data["keys"], list):
905+
raise ValueError("'meta_data' keys must be a list")
906+
if not isinstance(meta_data["values"], list):
907+
raise ValueError("'meta_data' values must be a list")
908+
variable.meta_data = MetaData(meta_data)
909+
if label is not None:
910+
if not isinstance(label, str):
911+
raise ValueError("'label' must be a str")
912+
else:
913+
variable.label = label
914+
if object_type is not None:
915+
if not isinstance(object_type, str):
916+
raise ValueError("'object_type' must be a str")
917+
else:
918+
variable.object_type = object_type
919+
if structure_type is not None:
920+
if not isinstance(structure_type, str):
921+
raise ValueError("'structure_type' must be a str")
922+
else:
923+
variable.structure_type = structure_type
924+
self.variables.append(variable)
925+
self._variables_by_name[variable.name] = variable
926+
771927
def remove_variable(self, variable_name):
772928
"""Removes the specified variable from this dictionary
773929
@@ -1007,6 +1163,43 @@ class Variable:
10071163
- `samples.create_dictionary_domain()`
10081164
"""
10091165

1166+
# Variable types
1167+
CATEGORICAL_TYPE = "Categorical"
1168+
NUMERICAL_TYPE = "Numerical"
1169+
TIME_TYPE = "Time"
1170+
DATE_TYPE = "Date"
1171+
TIMESTAMP_TYPE = "Timestamp"
1172+
TIMESTAMP_TZ_TYPE = "TimestampTZ"
1173+
TEXT_TYPE = "Text"
1174+
TEXTLIST_TYPE = "TextList"
1175+
STRUCTURE_TYPE = "Structure"
1176+
ENTITY_TYPE = "Entity"
1177+
TABLE_TYPE = "Table"
1178+
__all_types__ = (
1179+
CATEGORICAL_TYPE,
1180+
NUMERICAL_TYPE,
1181+
TIME_TYPE,
1182+
DATE_TYPE,
1183+
TIMESTAMP_TYPE,
1184+
TIMESTAMP_TZ_TYPE,
1185+
TEXT_TYPE,
1186+
TEXTLIST_TYPE,
1187+
STRUCTURE_TYPE,
1188+
ENTITY_TYPE,
1189+
TABLE_TYPE,
1190+
)
1191+
__native_types__ = (
1192+
CATEGORICAL_TYPE,
1193+
NUMERICAL_TYPE,
1194+
TIME_TYPE,
1195+
DATE_TYPE,
1196+
TIMESTAMP_TYPE,
1197+
TIMESTAMP_TZ_TYPE,
1198+
TEXT_TYPE,
1199+
)
1200+
__internal_types__ = (TEXTLIST_TYPE, STRUCTURE_TYPE)
1201+
__object_types__ = (ENTITY_TYPE, TABLE_TYPE)
1202+
10101203
def __init__(self, json_data=None):
10111204
"""See class docstring"""
10121205
# Check the type of json_data
@@ -1055,7 +1248,7 @@ def __init__(self, json_data=None):
10551248
self.type = json_data.get("type")
10561249

10571250
# Initialize complement of the type
1058-
if self.type in ("Entity", "Table"):
1251+
if _is_object_type(self.type):
10591252
self.object_type = json_data.get("objectType")
10601253
elif self.type == "Structure":
10611254
self.structure_type = json_data.get("structureType")
@@ -1069,7 +1262,7 @@ def __init__(self, json_data=None):
10691262
self.meta_data = MetaData(json_meta_data)
10701263

10711264
def __repr__(self):
1072-
"""Returns a human readable string representation"""
1265+
"""Returns a human-readable string representation"""
10731266
return f"Variable ({self.name})"
10741267

10751268
def __str__(self):
@@ -1169,7 +1362,7 @@ def full_type(self):
11691362
basic.
11701363
"""
11711364
full_type = self.type
1172-
if self.type in ("Entity", "Table"):
1365+
if _is_object_type(self.type):
11731366
full_type += f"({self.object_type})"
11741367
elif self.type == "Structure":
11751368
full_type += f"({self.structure_type})"

khiops/samples/samples.ipynb

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -91,40 +91,28 @@
9191
" {\"name\": \"ts\", \"type\": \"TimestampTZ\"},\n",
9292
"]\n",
9393
"for var_spec in simple_variables:\n",
94-
" var = kh.Variable()\n",
95-
" var.name = var_spec[\"name\"]\n",
96-
" var.type = var_spec[\"type\"]\n",
97-
" root_dictionary.add_variable(var)\n",
94+
" root_dictionary.add_variable_from_spec(name=var_spec[\"name\"], type=var_spec[\"type\"])\n",
9895
"\n",
9996
"# Create a second dictionary\n",
10097
"second_dictionary = kh.Dictionary(\n",
10198
" json_data={\"name\": \"Service\", \"key\": [\"Id\", \"id_product\"]}\n",
10299
")\n",
103-
"second_dictionary.add_variable(\n",
104-
" kh.Variable(json_data={\"name\": \"Id\", \"type\": \"Categorical\"})\n",
105-
")\n",
106-
"second_dictionary.add_variable(\n",
107-
" kh.Variable(json_data={\"name\": \"id_product\", \"type\": \"Categorical\"})\n",
108-
")\n",
100+
"second_dictionary.add_variable_from_spec(name=\"Id\", type=\"Categorical\")\n",
101+
"second_dictionary.add_variable_from_spec(name=\"id_product\", type=\"Categorical\")\n",
102+
"\n",
109103
"# Create a third dictionary\n",
110104
"third_dictionary = kh.Dictionary(json_data={\"name\": \"Address\", \"key\": [\"Id\"]})\n",
111-
"third_dictionary.add_variable(\n",
112-
" kh.Variable(json_data={\"name\": \"StreetNumber\", \"type\": \"Numerical\"})\n",
113-
")\n",
114-
"third_dictionary.add_variable(\n",
115-
" kh.Variable(json_data={\"name\": \"StreetName\", \"type\": \"Categorical\"})\n",
116-
")\n",
117-
"third_dictionary.add_variable(\n",
118-
" kh.Variable(json_data={\"name\": \"id_city\", \"type\": \"Categorical\"})\n",
119-
")\n",
105+
"third_dictionary.add_variable_from_spec(name=\"StreetNumber\", type=\"Numerical\")\n",
106+
"third_dictionary.add_variable_from_spec(name=\"StreetName\", type=\"Categorical\")\n",
107+
"third_dictionary.add_variable_from_spec(name=\"id_city\", type=\"Categorical\")\n",
120108
"\n",
121109
"# Add the variables used in a multi-table context in the first dictionary.\n",
122110
"# They link the root dictionary to the additional ones\n",
123-
"root_dictionary.add_variable(\n",
124-
" kh.Variable(json_data={\"name\": \"Services\", \"type\": \"Table(Service)\"})\n",
111+
"root_dictionary.add_variable_from_spec(\n",
112+
" name=\"Services\", type=\"Table\", object_type=\"Service\"\n",
125113
")\n",
126-
"root_dictionary.add_variable(\n",
127-
" kh.Variable(json_data={\"name\": \"Address\", \"type\": \"Entity(Address)\"})\n",
114+
"root_dictionary.add_variable_from_spec(\n",
115+
" name=\"Address\", type=\"Entity\", object_type=\"Address\"\n",
128116
")\n",
129117
"\n",
130118
"# Create a DictionaryDomain (set of dictionaries)\n",

0 commit comments

Comments
 (0)