Skip to content

Commit 2e7cdc5

Browse files
author
Thierry RAMORASOAVINA
committed
Add a way to add a variable to a dictionary using a complete specification
1 parent ec50a84 commit 2e7cdc5

File tree

6 files changed

+190
-5
lines changed

6 files changed

+190
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
### Added
1212
- (`core`) Dictionary API support for dictionary, variable and variable block
1313
comments, and dictionary and variable block internal comments.
14+
- (`core`) Additional way to add a variable to a dictionary using a complete specification
1415
- (`sklearn`) `Text` Khiops type support at the estimator level.
1516

1617
### Fixed

doc/samples/samples.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ Samples
8383
var.name = var_spec["name"]
8484
var.type = var_spec["type"]
8585
root_dictionary.add_variable(var)
86+
# another way to add a variable
87+
root_dictionary.add_variable_from_spec(name="other_ts", type="TimestampTZ")
8688
8789
# Create a second dictionary
8890
second_dictionary = kh.Dictionary(

khiops/core/dictionary.py

Lines changed: 137 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,88 @@ def get_variable_block(self, variable_block_name):
738738
"""
739739
return self._variable_blocks_by_name[variable_block_name]
740740

741+
def add_variable_from_spec(self, name, type, **variable_spec):
742+
"""Adds a variable to this dictionary using a complete specification
743+
744+
Parameters
745+
----------
746+
name : str
747+
Variable name
748+
type : str
749+
Variable type
750+
See `Variable`
751+
variable_spec : dict
752+
Additional specification for the variable.
753+
The recognized keys and their corresponding values are currently
754+
- label: str, optional -
755+
label of the variable (None by default)
756+
- used: bool, optional -
757+
usage status of the variable (True by default)
758+
- object_type: str, optional -
759+
object type (None by default;
760+
ignored if variable_type not in ["Entity", "Table"])
761+
- structure_type: str, optional -
762+
structure type (None by default;
763+
ignored if variable_type != "Structure")
764+
- meta_data: dict, optional - a Python dictionary
765+
which holds the metadata specification
766+
767+
Raises
768+
------
769+
770+
`ValueError`
771+
If the variable name is empty or does not comply to the format rule.
772+
If there is already a variable with the same name.
773+
If the given variable type is unknown.
774+
If a native type is given 'object_type' or 'structure_type'
775+
If the 'meta_data' is not a dictionary
776+
"""
777+
if not name:
778+
raise ValueError(
779+
"Cannot add to dictionary unnamed variable " f"(name = '{name}')"
780+
)
781+
if not Variable._is_a_valid_variable_name(name):
782+
raise ValueError(
783+
f"New variable name '{name}' cannot be accepted "
784+
"(invalid length or characters)"
785+
)
786+
if name in self._variables_by_name:
787+
raise ValueError(f"Dictionary already has a variable named '{name}'")
788+
if type not in Variable.__all_types__:
789+
raise ValueError(
790+
f"Invalid type '{type}', "
791+
f"not in the allowed list {Variable.__all_types__}"
792+
)
793+
if type in Variable.__native_types__:
794+
if "object_type" in variable_spec or "structure_type" in variable_spec:
795+
raise ValueError(
796+
f"Native type '{type}' "
797+
"cannot have 'object_type' or 'structure_type'"
798+
)
799+
if type in Variable.__object_types__ and "object_type" not in variable_spec:
800+
raise ValueError(f"'object_type' must be provided for type '{type}'")
801+
variable = Variable()
802+
variable.name = name
803+
variable.type = type
804+
if "used" in variable_spec:
805+
if not isinstance(variable_spec["used"], bool):
806+
raise ValueError("'used' must be a boolean")
807+
else:
808+
variable.used = variable_spec["used"]
809+
if "meta_data" in variable_spec:
810+
if not isinstance(variable_spec["meta_data"], dict):
811+
raise ValueError("'meta_data' must be a dict")
812+
else:
813+
variable.meta_data = MetaData(variable_spec["meta_data"])
814+
if "label" in variable_spec:
815+
variable.label = variable_spec["label"]
816+
if "object_type" in variable_spec:
817+
variable.object_type = variable_spec["object_type"]
818+
if "structure_type" in variable_spec:
819+
variable.structure_type = variable_spec["structure_type"]
820+
self.variables.append(variable)
821+
self._variables_by_name[variable.name] = variable
822+
741823
def add_variable(self, variable):
742824
"""Adds a variable to this dictionary
743825
@@ -1007,6 +1089,43 @@ class Variable:
10071089
- `samples.create_dictionary_domain()`
10081090
"""
10091091

1092+
# Variables types
1093+
CATEGORICAL_TYPE = "Categorical"
1094+
NUMERICAL_TYPE = "Numerical"
1095+
TIME_TYPE = "Time"
1096+
DATE_TYPE = "Date"
1097+
TIMESTAMP_TYPE = "Timestamp"
1098+
TIMESTAMP_TZ_TYPE = "TimestampTZ"
1099+
TEXT_TYPE = "Text"
1100+
TEXTLIST_TYPE = "TextList"
1101+
STRUCTURE_TYPE = "Structure"
1102+
ENTITY_TYPE = "Entity"
1103+
TABLE_TYPE = "Table"
1104+
__all_types__ = (
1105+
CATEGORICAL_TYPE,
1106+
NUMERICAL_TYPE,
1107+
TIME_TYPE,
1108+
DATE_TYPE,
1109+
TIMESTAMP_TYPE,
1110+
TIMESTAMP_TZ_TYPE,
1111+
TEXT_TYPE,
1112+
TEXTLIST_TYPE,
1113+
STRUCTURE_TYPE,
1114+
ENTITY_TYPE,
1115+
TABLE_TYPE,
1116+
)
1117+
__native_types__ = (
1118+
CATEGORICAL_TYPE,
1119+
NUMERICAL_TYPE,
1120+
TIME_TYPE,
1121+
DATE_TYPE,
1122+
TIMESTAMP_TYPE,
1123+
TIMESTAMP_TZ_TYPE,
1124+
TEXT_TYPE,
1125+
)
1126+
__internal_types__ = (TEXTLIST_TYPE, STRUCTURE_TYPE)
1127+
__object_types__ = (ENTITY_TYPE, TABLE_TYPE)
1128+
10101129
def __init__(self, json_data=None):
10111130
"""See class docstring"""
10121131
# Check the type of json_data
@@ -1055,9 +1174,9 @@ def __init__(self, json_data=None):
10551174
self.type = json_data.get("type")
10561175

10571176
# Initialize complement of the type
1058-
if self.type in ("Entity", "Table"):
1177+
if self.type in Variable.__object_types__:
10591178
self.object_type = json_data.get("objectType")
1060-
elif self.type == "Structure":
1179+
elif self.type == Variable.STRUCTURE_TYPE:
10611180
self.structure_type = json_data.get("structureType")
10621181

10631182
# Initialize derivation rule
@@ -1069,7 +1188,7 @@ def __init__(self, json_data=None):
10691188
self.meta_data = MetaData(json_meta_data)
10701189

10711190
def __repr__(self):
1072-
"""Returns a human readable string representation"""
1191+
"""Returns a human-readable string representation"""
10731192
return f"Variable ({self.name})"
10741193

10751194
def __str__(self):
@@ -1078,6 +1197,19 @@ def __str__(self):
10781197
self.write(writer)
10791198
return str(stream.getvalue(), encoding="utf8", errors="replace")
10801199

1200+
@staticmethod
1201+
def _is_a_valid_variable_name(name):
1202+
"""Ensures the variable name is valid.
1203+
Please note the Khiops core forbids a name
1204+
- with a length outside the [1,128] interval
1205+
- containing a simple (Unix) carriage-return (\n)
1206+
- with leading and trailing spaces
1207+
(\s in Perl-Compatible-Regular-Expressions syntax).
1208+
This function must check at least these constraints
1209+
"""
1210+
variable_name_regexp = re.compile(r"^[a-zA-Z][a-zA-Z0-9_]{1,128}$")
1211+
return variable_name_regexp.match(name)
1212+
10811213
def copy(self):
10821214
"""Copies this variable instance
10831215
@@ -1169,9 +1301,9 @@ def full_type(self):
11691301
basic.
11701302
"""
11711303
full_type = self.type
1172-
if self.type in ("Entity", "Table"):
1304+
if self.type in Variable.__object_types__:
11731305
full_type += f"({self.object_type})"
1174-
elif self.type == "Structure":
1306+
elif self.type == Variable.STRUCTURE_TYPE:
11751307
full_type += f"({self.structure_type})"
11761308
return full_type
11771309

khiops/samples/samples.ipynb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@
9595
" var.name = var_spec[\"name\"]\n",
9696
" var.type = var_spec[\"type\"]\n",
9797
" root_dictionary.add_variable(var)\n",
98+
"# another way to add a variable\n",
99+
"root_dictionary.add_variable_from_spec(name=\"other_ts\", type=\"TimestampTZ\")\n",
98100
"\n",
99101
"# Create a second dictionary\n",
100102
"second_dictionary = kh.Dictionary(\n",

khiops/samples/samples.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ def create_dictionary_domain():
9595
var.name = var_spec["name"]
9696
var.type = var_spec["type"]
9797
root_dictionary.add_variable(var)
98+
# another way to add a variable
99+
root_dictionary.add_variable_from_spec(name="other_ts", type="TimestampTZ")
98100

99101
# Create a second dictionary
100102
second_dictionary = kh.Dictionary(

tests/test_core.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,6 +1836,52 @@ def test_dictionary_accessors(self):
18361836
variable.name = ""
18371837
with self.assertRaises(ValueError):
18381838
dictionary_copy.add_variable(variable)
1839+
# Thoroughly test Dictionary.add_variable_from_spec
1840+
# (using a part of the exception message to ensure
1841+
# the actual error is raised)
1842+
# empty name is forbidden
1843+
with self.assertRaisesRegex(ValueError, "unnamed"):
1844+
dictionary.add_variable_from_spec(name="", type="Categorical")
1845+
# too long name is forbidden
1846+
with self.assertRaisesRegex(ValueError, "cannot be accepted"):
1847+
dictionary.add_variable_from_spec(
1848+
name="A" * 200, type="Categorical"
1849+
)
1850+
# the name must not contain forbidden characters
1851+
with self.assertRaisesRegex(ValueError, "cannot be accepted"):
1852+
dictionary.add_variable_from_spec(
1853+
name="ALLFDLFDFDLLL\t", type="Categorical"
1854+
)
1855+
# successful adding
1856+
dictionary.add_variable_from_spec(name="label", type="Categorical")
1857+
# duplicate name is forbidden
1858+
with self.assertRaisesRegex(ValueError, "already"):
1859+
dictionary.add_variable_from_spec(name="label", type="Numerical")
1860+
# type must be recognized
1861+
with self.assertRaisesRegex(ValueError, "Invalid type"):
1862+
dictionary.add_variable_from_spec(
1863+
name="fresh_one", type="Unknowntype"
1864+
)
1865+
# native types cannot accept object_type or structure_type
1866+
with self.assertRaisesRegex(ValueError, "Native type"):
1867+
dictionary.add_variable_from_spec(
1868+
name="fresh_one", type="Numerical", object_type="X"
1869+
)
1870+
# object type must have a object_type parameter
1871+
with self.assertRaisesRegex(
1872+
ValueError, "'object_type' must be provided"
1873+
):
1874+
dictionary.add_variable_from_spec(name="fresh_one", type="Entity")
1875+
# used must be a boolean
1876+
with self.assertRaisesRegex(ValueError, "boolean"):
1877+
dictionary.add_variable_from_spec(
1878+
name="fresh_one", type="Numerical", used="True"
1879+
)
1880+
# meta data must be valid
1881+
with self.assertRaisesRegex(ValueError, "meta_data"):
1882+
dictionary.add_variable_from_spec(
1883+
name="fresh_one", type="Categorical", meta_data="str"
1884+
)
18391885

18401886
# Test Dictionary variable block accessors
18411887
# Create a simple block

0 commit comments

Comments
 (0)