@@ -80,6 +80,55 @@ def _quote_value(value):
8080 return quoted_value
8181
8282
83+ def _is_a_valid_variable_name (name ):
84+ """Ensures the variable name is valid
85+
86+ Please note the Khiops core forbids a name
87+ - with a length outside the [1,128] interval
88+ - containing a simple (Unix) carriage-return (\n )
89+ - with leading and trailing spaces
90+ (\s in Perl-Compatible-Regular-Expressions syntax).
91+ This function must check at least these constraints
92+ """
93+ variable_name_regexp = re .compile (r"^[a-zA-Z_][a-zA-Z0-9_]{1,128}$" )
94+ return variable_name_regexp .match (name ) is not None
95+
96+
97+ def _is_valid_type (type_str ):
98+ """Checks whether the type is known"""
99+ return type_str in [
100+ "Categorical" ,
101+ "Numerical" ,
102+ "Time" ,
103+ "Date" ,
104+ "Timestamp" ,
105+ "TimestampTZ" ,
106+ "Text" ,
107+ "TextList" ,
108+ "Structure" ,
109+ "Entity" ,
110+ "Table" ,
111+ ]
112+
113+
114+ def _is_native_type (type_str ):
115+ """Checks whether the type is native (not internal or relational)"""
116+ return type_str in [
117+ "Categorical" ,
118+ "Numerical" ,
119+ "Time" ,
120+ "Date" ,
121+ "Timestamp" ,
122+ "TimestampTZ" ,
123+ "Text" ,
124+ ]
125+
126+
127+ def _is_object_type (type_str ):
128+ """Checks whether the type is an object one (relational)"""
129+ return type_str in ["Entity" , "Table" ]
130+
131+
83132class DictionaryDomain (KhiopsJSONObject ):
84133 """Main class containing the information of a Khiops dictionary file
85134
@@ -768,6 +817,113 @@ def add_variable(self, variable):
768817 self .variables .append (variable )
769818 self ._variables_by_name [variable .name ] = variable
770819
820+ def add_variable_from_spec (
821+ self ,
822+ name ,
823+ type ,
824+ label = None ,
825+ used = None ,
826+ object_type = None ,
827+ structure_type = None ,
828+ meta_data = None ,
829+ ):
830+ """Adds a variable to this dictionary using a complete specification
831+
832+ Parameters
833+ ----------
834+ name : str
835+ Variable name
836+ type : str
837+ Variable type
838+ See `Variable`
839+ label: str, optional
840+ label of the variable (None by default)
841+ used: bool, optional
842+ usage status of the variable (True by default)
843+ object_type: str, optional
844+ object type (None by default;
845+ ignored if variable_type not in ["Entity", "Table"])
846+ structure_type: str, optional
847+ structure type (None by default;
848+ ignored if variable_type != "Structure")
849+ meta_data: dict, optional
850+ a Python dictionary which holds the metadata specification
851+ with the following keys:
852+ - keys : List[str], optional
853+ list of meta-data keys ([] by default)
854+ - values: List[str|bool|float|int], optional
855+ list of meta-data values ([] by default)
856+
857+ Raises
858+ ------
859+ `ValueError`
860+ - If the variable name is empty or does not comply
861+ with the formatting constraints.
862+ - If there is already a variable with the same name.
863+ - If the given variable type is unknown.
864+ - If a native type is given 'object_type' or 'structure_type'
865+ - If the 'meta_data' is not a dictionary
866+ """
867+ if not name :
868+ raise ValueError (
869+ "Cannot add to dictionary unnamed variable " f"(name = '{ name } ')"
870+ )
871+ if not _is_a_valid_variable_name (name ):
872+ raise ValueError (
873+ f"New variable name '{ name } ' cannot be accepted "
874+ "(invalid length or characters)"
875+ )
876+ if name in self ._variables_by_name :
877+ raise ValueError (f"Dictionary already has a variable named '{ name } '" )
878+ if not _is_valid_type (type ):
879+ raise ValueError (f"Invalid type '{ type } '" )
880+ if _is_native_type (type ):
881+ if object_type or structure_type :
882+ raise ValueError (
883+ f"Native type '{ type } ' "
884+ "cannot have 'object_type' or 'structure_type'"
885+ )
886+ if _is_object_type (type ) and object_type is None :
887+ raise ValueError (f"'object_type' must be provided for type '{ type } '" )
888+ variable = Variable ()
889+ variable .name = name
890+ variable .type = type
891+ if used is not None :
892+ if not isinstance (used , bool ):
893+ raise ValueError ("'used' must be a boolean" )
894+ else :
895+ variable .used = used
896+ if meta_data is not None :
897+ if not isinstance (meta_data , dict ):
898+ raise ValueError ("'meta_data' must be a dict" )
899+ if "keys" not in meta_data or "values" not in meta_data :
900+ raise ValueError (
901+ "'meta_data' does not contain "
902+ "the mandatory keys 'keys' and 'values'"
903+ )
904+ if not isinstance (meta_data ["keys" ], list ):
905+ raise ValueError ("'meta_data' keys must be a list" )
906+ if not isinstance (meta_data ["values" ], list ):
907+ raise ValueError ("'meta_data' values must be a list" )
908+ variable .meta_data = MetaData (meta_data )
909+ if label is not None :
910+ if not isinstance (label , str ):
911+ raise ValueError ("'label' must be a str" )
912+ else :
913+ variable .label = label
914+ if object_type is not None :
915+ if not isinstance (object_type , str ):
916+ raise ValueError ("'object_type' must be a str" )
917+ else :
918+ variable .object_type = object_type
919+ if structure_type is not None :
920+ if not isinstance (structure_type , str ):
921+ raise ValueError ("'structure_type' must be a str" )
922+ else :
923+ variable .structure_type = structure_type
924+ self .variables .append (variable )
925+ self ._variables_by_name [variable .name ] = variable
926+
771927 def remove_variable (self , variable_name ):
772928 """Removes the specified variable from this dictionary
773929
@@ -1007,6 +1163,43 @@ class Variable:
10071163 - `samples.create_dictionary_domain()`
10081164 """
10091165
1166+ # Variable types
1167+ CATEGORICAL_TYPE = "Categorical"
1168+ NUMERICAL_TYPE = "Numerical"
1169+ TIME_TYPE = "Time"
1170+ DATE_TYPE = "Date"
1171+ TIMESTAMP_TYPE = "Timestamp"
1172+ TIMESTAMP_TZ_TYPE = "TimestampTZ"
1173+ TEXT_TYPE = "Text"
1174+ TEXTLIST_TYPE = "TextList"
1175+ STRUCTURE_TYPE = "Structure"
1176+ ENTITY_TYPE = "Entity"
1177+ TABLE_TYPE = "Table"
1178+ __all_types__ = (
1179+ CATEGORICAL_TYPE ,
1180+ NUMERICAL_TYPE ,
1181+ TIME_TYPE ,
1182+ DATE_TYPE ,
1183+ TIMESTAMP_TYPE ,
1184+ TIMESTAMP_TZ_TYPE ,
1185+ TEXT_TYPE ,
1186+ TEXTLIST_TYPE ,
1187+ STRUCTURE_TYPE ,
1188+ ENTITY_TYPE ,
1189+ TABLE_TYPE ,
1190+ )
1191+ __native_types__ = (
1192+ CATEGORICAL_TYPE ,
1193+ NUMERICAL_TYPE ,
1194+ TIME_TYPE ,
1195+ DATE_TYPE ,
1196+ TIMESTAMP_TYPE ,
1197+ TIMESTAMP_TZ_TYPE ,
1198+ TEXT_TYPE ,
1199+ )
1200+ __internal_types__ = (TEXTLIST_TYPE , STRUCTURE_TYPE )
1201+ __object_types__ = (ENTITY_TYPE , TABLE_TYPE )
1202+
10101203 def __init__ (self , json_data = None ):
10111204 """See class docstring"""
10121205 # Check the type of json_data
@@ -1055,7 +1248,7 @@ def __init__(self, json_data=None):
10551248 self .type = json_data .get ("type" )
10561249
10571250 # Initialize complement of the type
1058- if self .type in ( "Entity" , "Table" ):
1251+ if _is_object_type ( self .type ):
10591252 self .object_type = json_data .get ("objectType" )
10601253 elif self .type == "Structure" :
10611254 self .structure_type = json_data .get ("structureType" )
@@ -1069,7 +1262,7 @@ def __init__(self, json_data=None):
10691262 self .meta_data = MetaData (json_meta_data )
10701263
10711264 def __repr__ (self ):
1072- """Returns a human readable string representation"""
1265+ """Returns a human- readable string representation"""
10731266 return f"Variable ({ self .name } )"
10741267
10751268 def __str__ (self ):
@@ -1169,7 +1362,7 @@ def full_type(self):
11691362 basic.
11701363 """
11711364 full_type = self .type
1172- if self .type in ( "Entity" , "Table" ):
1365+ if _is_object_type ( self .type ):
11731366 full_type += f"({ self .object_type } )"
11741367 elif self .type == "Structure" :
11751368 full_type += f"({ self .structure_type } )"
0 commit comments