Skip to content

Commit a4219d5

Browse files
author
Thierry RAMORASOAVINA
committed
Add a way to add a variable to a dictionary using a complete specification
1 parent ec50a84 commit a4219d5

6 files changed

Lines changed: 278 additions & 72 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
### Added
1212
- (`core`) Dictionary API support for dictionary, variable and variable block
1313
comments, and dictionary and variable block internal comments.
14+
- (`core`) New way to add a variable to a dictionary using a complete specification.
1415
- (`sklearn`) `Text` Khiops type support at the estimator level.
1516

1617
### Fixed

doc/samples/samples.rst

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -79,40 +79,28 @@ Samples
7979
{"name": "ts", "type": "TimestampTZ"},
8080
]
8181
for var_spec in simple_variables:
82-
var = kh.Variable()
83-
var.name = var_spec["name"]
84-
var.type = var_spec["type"]
85-
root_dictionary.add_variable(var)
82+
root_dictionary.add_variable_from_spec(name=var_spec["name"], type=var_spec["type"])
8683
8784
# Create a second dictionary
8885
second_dictionary = kh.Dictionary(
8986
json_data={"name": "Service", "key": ["Id", "id_product"]}
9087
)
91-
second_dictionary.add_variable(
92-
kh.Variable(json_data={"name": "Id", "type": "Categorical"})
93-
)
94-
second_dictionary.add_variable(
95-
kh.Variable(json_data={"name": "id_product", "type": "Categorical"})
96-
)
88+
second_dictionary.add_variable_from_spec(name="Id", type="Categorical")
89+
second_dictionary.add_variable_from_spec(name="id_product", type="Categorical")
90+
9791
# Create a third dictionary
9892
third_dictionary = kh.Dictionary(json_data={"name": "Address", "key": ["Id"]})
99-
third_dictionary.add_variable(
100-
kh.Variable(json_data={"name": "StreetNumber", "type": "Numerical"})
101-
)
102-
third_dictionary.add_variable(
103-
kh.Variable(json_data={"name": "StreetName", "type": "Categorical"})
104-
)
105-
third_dictionary.add_variable(
106-
kh.Variable(json_data={"name": "id_city", "type": "Categorical"})
107-
)
93+
third_dictionary.add_variable_from_spec(name="StreetNumber", type="Numerical")
94+
third_dictionary.add_variable_from_spec(name="StreetName", type="Categorical")
95+
third_dictionary.add_variable_from_spec(name="id_city", type="Categorical")
10896
10997
# Add the variables used in a multi-table context in the first dictionary.
11098
# They link the root dictionary to the additional ones
111-
root_dictionary.add_variable(
112-
kh.Variable(json_data={"name": "Services", "type": "Table(Service)"})
99+
root_dictionary.add_variable_from_spec(
100+
name="Services", type="Table", object_type="Service"
113101
)
114-
root_dictionary.add_variable(
115-
kh.Variable(json_data={"name": "Address", "type": "Entity(Address)"})
102+
root_dictionary.add_variable_from_spec(
103+
name="Address", type="Entity", object_type="Address"
116104
)
117105
118106
# Create a DictionaryDomain (set of dictionaries)

khiops/core/dictionary.py

Lines changed: 165 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,55 @@ def _quote_value(value):
8080
return quoted_value
8181

8282

83+
def _is_a_valid_variable_name(name):
84+
"""Ensures the variable name is valid
85+
86+
Please note the Khiops core forbids a name
87+
- with a length outside the [1,128] interval
88+
- containing a simple (Unix) carriage-return (\n)
89+
- with leading and trailing spaces
90+
(\s in Perl-Compatible-Regular-Expressions syntax).
91+
This function must check at least these constraints
92+
"""
93+
variable_name_regexp = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{1,128}$")
94+
return variable_name_regexp.match(name) is not None
95+
96+
97+
def _is_valid_type(type_str):
98+
"""Checks whether the type is known"""
99+
return type_str in [
100+
"Categorical",
101+
"Numerical",
102+
"Time",
103+
"Date",
104+
"Timestamp",
105+
"TimestampTZ",
106+
"Text",
107+
"TextList",
108+
"Structure",
109+
"Entity",
110+
"Table",
111+
]
112+
113+
114+
def _is_native_type(type_str):
115+
"""Checks whether the type is native (not internal or relational)"""
116+
return type_str in [
117+
"Categorical",
118+
"Numerical",
119+
"Time",
120+
"Date",
121+
"Timestamp",
122+
"TimestampTZ",
123+
"Text",
124+
]
125+
126+
127+
def _is_object_type(type_str):
128+
"""Checks whether the type is an object one (relational)"""
129+
return type_str in ["Entity", "Table"]
130+
131+
83132
class DictionaryDomain(KhiopsJSONObject):
84133
"""Main class containing the information of a Khiops dictionary file
85134
@@ -768,6 +817,119 @@ def add_variable(self, variable):
768817
self.variables.append(variable)
769818
self._variables_by_name[variable.name] = variable
770819

820+
def add_variable_from_spec(
821+
self,
822+
name,
823+
type,
824+
label=None,
825+
used=None,
826+
object_type=None,
827+
structure_type=None,
828+
meta_data=None,
829+
):
830+
"""Adds a variable to this dictionary using a complete specification
831+
832+
Parameters
833+
----------
834+
name : str
835+
Variable name
836+
type : str
837+
Variable type
838+
See `Variable`
839+
label: str, optional
840+
label of the variable (None by default)
841+
used: bool, optional
842+
usage status of the variable (True by default)
843+
object_type: str, optional
844+
object type (None by default;
845+
ignored if variable_type not in ["Entity", "Table"])
846+
structure_type: str, optional
847+
structure type (None by default;
848+
ignored if variable_type != "Structure")
849+
meta_data: dict, optional
850+
a Python dictionary which holds the metadata specification
851+
with the following keys:
852+
- keys : List[str], optional
853+
list of meta-data keys ([] by default)
854+
- values: List[str|bool|float|int], optional
855+
list of meta-data values ([] by default)
856+
857+
Raises
858+
------
859+
`ValueError`
860+
- If the variable name is empty or does not comply
861+
with the formatting constraints.
862+
- If there is already a variable with the same name.
863+
- If the given variable type is unknown.
864+
- If a native type is given 'object_type' or 'structure_type'
865+
- If the 'meta_data' is not a dictionary
866+
"""
867+
if not name:
868+
raise ValueError(
869+
"Cannot add to dictionary unnamed variable " f"(name = '{name}')"
870+
)
871+
if not _is_a_valid_variable_name(name):
872+
raise ValueError(
873+
f"New variable name '{name}' cannot be accepted "
874+
"(invalid length or characters)"
875+
)
876+
if name in self._variables_by_name:
877+
raise ValueError(f"Dictionary already has a variable named '{name}'")
878+
if not _is_valid_type(type):
879+
raise ValueError(f"Invalid type '{type}'")
880+
if _is_native_type(type):
881+
if object_type or structure_type:
882+
raise ValueError(
883+
f"Native type '{type}' "
884+
"cannot have 'object_type' or 'structure_type'"
885+
)
886+
if _is_object_type(type) and object_type is None:
887+
raise ValueError(f"'object_type' must be provided for type '{type}'")
888+
variable = Variable()
889+
variable.name = name
890+
variable.type = type
891+
if used is not None:
892+
if not isinstance(used, bool):
893+
raise TypeError("'used' must be a boolean")
894+
else:
895+
variable.used = used
896+
if meta_data is not None:
897+
if not isinstance(meta_data, dict):
898+
raise TypeError("'meta_data' must be a dict")
899+
if "keys" not in meta_data or "values" not in meta_data:
900+
raise ValueError(
901+
"'meta_data' does not contain "
902+
"the mandatory keys 'keys' and 'values'"
903+
)
904+
if not isinstance(meta_data["keys"], list):
905+
raise TypeError("'meta_data' keys must be a list")
906+
if not isinstance(meta_data["values"], list):
907+
raise TypeError("'meta_data' values must be a list")
908+
if len(meta_data["keys"]) != len(meta_data["values"]):
909+
raise ValueError(
910+
"'meta_data' keys and values " "do not have the same size"
911+
)
912+
variable.meta_data = MetaData()
913+
for key, value in zip(meta_data["keys"], meta_data["values"]):
914+
variable.meta_data.add_value(key, value)
915+
if label is not None:
916+
if not isinstance(label, str):
917+
raise TypeError("'label' must be a str")
918+
else:
919+
variable.label = label
920+
if object_type is not None:
921+
if not isinstance(object_type, str):
922+
raise TypeError("'object_type' must be a str")
923+
else:
924+
variable.object_type = object_type
925+
if structure_type is not None:
926+
if not isinstance(structure_type, str):
927+
raise TypeError("'structure_type' must be a str")
928+
else:
929+
variable.structure_type = structure_type
930+
self.variables.append(variable)
931+
self._variables_by_name[variable.name] = variable
932+
771933
def remove_variable(self, variable_name):
772934
"""Removes the specified variable from this dictionary
773935
@@ -1055,7 +1217,7 @@ def __init__(self, json_data=None):
10551217
self.type = json_data.get("type")
10561218

10571219
# Initialize complement of the type
1058-
if self.type in ("Entity", "Table"):
1220+
if _is_object_type(self.type):
10591221
self.object_type = json_data.get("objectType")
10601222
elif self.type == "Structure":
10611223
self.structure_type = json_data.get("structureType")
@@ -1069,7 +1231,7 @@ def __init__(self, json_data=None):
10691231
self.meta_data = MetaData(json_meta_data)
10701232

10711233
def __repr__(self):
1072-
"""Returns a human readable string representation"""
1234+
"""Returns a human-readable string representation"""
10731235
return f"Variable ({self.name})"
10741236

10751237
def __str__(self):
@@ -1169,7 +1331,7 @@ def full_type(self):
11691331
basic.
11701332
"""
11711333
full_type = self.type
1172-
if self.type in ("Entity", "Table"):
1334+
if _is_object_type(self.type):
11731335
full_type += f"({self.object_type})"
11741336
elif self.type == "Structure":
11751337
full_type += f"({self.structure_type})"

khiops/samples/samples.ipynb

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -91,40 +91,28 @@
9191
" {\"name\": \"ts\", \"type\": \"TimestampTZ\"},\n",
9292
"]\n",
9393
"for var_spec in simple_variables:\n",
94-
" var = kh.Variable()\n",
95-
" var.name = var_spec[\"name\"]\n",
96-
" var.type = var_spec[\"type\"]\n",
97-
" root_dictionary.add_variable(var)\n",
94+
" root_dictionary.add_variable_from_spec(name=var_spec[\"name\"], type=var_spec[\"type\"])\n",
9895
"\n",
9996
"# Create a second dictionary\n",
10097
"second_dictionary = kh.Dictionary(\n",
10198
" json_data={\"name\": \"Service\", \"key\": [\"Id\", \"id_product\"]}\n",
10299
")\n",
103-
"second_dictionary.add_variable(\n",
104-
" kh.Variable(json_data={\"name\": \"Id\", \"type\": \"Categorical\"})\n",
105-
")\n",
106-
"second_dictionary.add_variable(\n",
107-
" kh.Variable(json_data={\"name\": \"id_product\", \"type\": \"Categorical\"})\n",
108-
")\n",
100+
"second_dictionary.add_variable_from_spec(name=\"Id\", type=\"Categorical\")\n",
101+
"second_dictionary.add_variable_from_spec(name=\"id_product\", type=\"Categorical\")\n",
102+
"\n",
109103
"# Create a third dictionary\n",
110104
"third_dictionary = kh.Dictionary(json_data={\"name\": \"Address\", \"key\": [\"Id\"]})\n",
111-
"third_dictionary.add_variable(\n",
112-
" kh.Variable(json_data={\"name\": \"StreetNumber\", \"type\": \"Numerical\"})\n",
113-
")\n",
114-
"third_dictionary.add_variable(\n",
115-
" kh.Variable(json_data={\"name\": \"StreetName\", \"type\": \"Categorical\"})\n",
116-
")\n",
117-
"third_dictionary.add_variable(\n",
118-
" kh.Variable(json_data={\"name\": \"id_city\", \"type\": \"Categorical\"})\n",
119-
")\n",
105+
"third_dictionary.add_variable_from_spec(name=\"StreetNumber\", type=\"Numerical\")\n",
106+
"third_dictionary.add_variable_from_spec(name=\"StreetName\", type=\"Categorical\")\n",
107+
"third_dictionary.add_variable_from_spec(name=\"id_city\", type=\"Categorical\")\n",
120108
"\n",
121109
"# Add the variables used in a multi-table context in the first dictionary.\n",
122110
"# They link the root dictionary to the additional ones\n",
123-
"root_dictionary.add_variable(\n",
124-
" kh.Variable(json_data={\"name\": \"Services\", \"type\": \"Table(Service)\"})\n",
111+
"root_dictionary.add_variable_from_spec(\n",
112+
" name=\"Services\", type=\"Table\", object_type=\"Service\"\n",
125113
")\n",
126-
"root_dictionary.add_variable(\n",
127-
" kh.Variable(json_data={\"name\": \"Address\", \"type\": \"Entity(Address)\"})\n",
114+
"root_dictionary.add_variable_from_spec(\n",
115+
" name=\"Address\", type=\"Entity\", object_type=\"Address\"\n",
128116
")\n",
129117
"\n",
130118
"# Create a DictionaryDomain (set of dictionaries)\n",

khiops/samples/samples.py

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -91,40 +91,30 @@ def create_dictionary_domain():
9191
{"name": "ts", "type": "TimestampTZ"},
9292
]
9393
for var_spec in simple_variables:
94-
var = kh.Variable()
95-
var.name = var_spec["name"]
96-
var.type = var_spec["type"]
97-
root_dictionary.add_variable(var)
94+
root_dictionary.add_variable_from_spec(
95+
name=var_spec["name"], type=var_spec["type"]
96+
)
9897

9998
# Create a second dictionary
10099
second_dictionary = kh.Dictionary(
101100
json_data={"name": "Service", "key": ["Id", "id_product"]}
102101
)
103-
second_dictionary.add_variable(
104-
kh.Variable(json_data={"name": "Id", "type": "Categorical"})
105-
)
106-
second_dictionary.add_variable(
107-
kh.Variable(json_data={"name": "id_product", "type": "Categorical"})
108-
)
102+
second_dictionary.add_variable_from_spec(name="Id", type="Categorical")
103+
second_dictionary.add_variable_from_spec(name="id_product", type="Categorical")
104+
109105
# Create a third dictionary
110106
third_dictionary = kh.Dictionary(json_data={"name": "Address", "key": ["Id"]})
111-
third_dictionary.add_variable(
112-
kh.Variable(json_data={"name": "StreetNumber", "type": "Numerical"})
113-
)
114-
third_dictionary.add_variable(
115-
kh.Variable(json_data={"name": "StreetName", "type": "Categorical"})
116-
)
117-
third_dictionary.add_variable(
118-
kh.Variable(json_data={"name": "id_city", "type": "Categorical"})
119-
)
107+
third_dictionary.add_variable_from_spec(name="StreetNumber", type="Numerical")
108+
third_dictionary.add_variable_from_spec(name="StreetName", type="Categorical")
109+
third_dictionary.add_variable_from_spec(name="id_city", type="Categorical")
120110

121111
# Add the variables used in a multi-table context in the first dictionary.
122112
# They link the root dictionary to the additional ones
123-
root_dictionary.add_variable(
124-
kh.Variable(json_data={"name": "Services", "type": "Table(Service)"})
113+
root_dictionary.add_variable_from_spec(
114+
name="Services", type="Table", object_type="Service"
125115
)
126-
root_dictionary.add_variable(
127-
kh.Variable(json_data={"name": "Address", "type": "Entity(Address)"})
116+
root_dictionary.add_variable_from_spec(
117+
name="Address", type="Entity", object_type="Address"
128118
)
129119

130120
# Create a DictionaryDomain (set of dictionaries)

0 commit comments

Comments
 (0)