2323from khiops .core .exceptions import KhiopsJSONError
2424from khiops .core .internals .common import (
2525 deprecation_message ,
26+ is_dict_like ,
27+ is_list_like ,
2628 is_string_like ,
2729 type_error_message ,
2830)
@@ -43,25 +45,17 @@ def _format_name(name):
4345
4446 Otherwise, it returns the name between backquoted (backquotes within are doubled)
4547 """
46- # Check that the type of name is string or bytes
47- if not is_string_like (name ):
48- raise TypeError (type_error_message ("name" , name , "string-like" ))
48+ is_valid_identifier = _check_name (name )
4949
50- # Check if the name is an identifier
51- # Python isalnum is not used because of utf-8 encoding (accentuated chars
52- # are considered alphanumeric)
5350 # Return original name if is an identifier, otherwise between backquotes
54- identifier_pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*"
55- str_identifier_regex = re .compile (identifier_pattern )
56- bytes_identifier_regex = re .compile (bytes (identifier_pattern , encoding = "ascii" ))
5751 if isinstance (name , str ):
58- if str_identifier_regex . fullmatch ( name ) is not None :
52+ if is_valid_identifier :
5953 formatted_name = name
6054 else :
6155 formatted_name = "`" + name .replace ("`" , "``" ) + "`"
6256 else :
6357 assert isinstance (name , bytes )
64- if bytes_identifier_regex . fullmatch ( name ) is not None :
58+ if is_valid_identifier :
6559 formatted_name = name
6660 else :
6761 formatted_name = b"`" + name .replace (b"`" , b"``" ) + b"`"
@@ -81,6 +75,62 @@ def _quote_value(value):
8175 return quoted_value
8276
8377
78+ def _check_name (name ):
79+ """Ensures the variable name is a valid identifier
80+
81+ Plain string or bytes are both accepted as input.
82+ Please note the Khiops core forbids a name
83+ - with a length outside the [1,128] interval
84+ - containing a simple (Unix) carriage-return (\n )
85+ - with leading and trailing spaces
86+ (\s in Perl-Compatible-Regular-Expressions syntax).
87+ This function must check at least these constraints.
88+ """
89+ # Check that the type of name is string or bytes
90+ if not is_string_like (name ):
91+ raise TypeError (type_error_message ("name" , name , "string-like" ))
92+
93+ # Accentuated characters (between 128 and 255) are also accepted
94+ # BUT NOT the greek ones
95+ extended_identifier_pattern = r"^[a-zA-Z_\x80-\xFF][a-zA-Z0-9_\x80-\xFF]{0,127}$"
96+ str_identifier_regex = re .compile (extended_identifier_pattern )
97+ bytes_identifier_regex = re .compile (
98+ bytes (extended_identifier_pattern , encoding = "ascii" )
99+ )
100+ if isinstance (name , str ):
101+ return str_identifier_regex .match (name ) is not None
102+ else :
103+ assert isinstance (name , bytes )
104+ return bytes_identifier_regex .match (name ) is not None
105+
106+
107+ def _is_valid_type (type_str ):
108+ """Checks whether the type is known"""
109+ return (
110+ _is_native_type (type_str )
111+ or _is_object_type (type_str )
112+ or type_str in ["TextList" , "Structure" ]
113+ ) # internal types
114+
115+
116+ def _is_native_type (type_str ):
117+ """Checks whether the type is native (not internal or relational)"""
118+ return type_str in [
119+ "Categorical" ,
120+ "Numerical" ,
121+ "Time" ,
122+ "Date" ,
123+ "Timestamp" ,
124+ "TimestampTZ" ,
125+ "Text" ,
126+ ]
127+
128+
129+ def _is_object_type (type_str ):
130+ """Checks whether the type is an object one (relational)"""
131+ return type_str in ["Entity" , "Table" ]
132+
133+
84134class DictionaryDomain (KhiopsJSONObject ):
85135 """Main class containing the information of a Khiops dictionary file
86136
@@ -769,6 +819,137 @@ def add_variable(self, variable):
769819 self .variables .append (variable )
770820 self ._variables_by_name [variable .name ] = variable
771821
822+ def add_variable_from_spec (
823+ self ,
824+ name ,
825+ type ,
826+ label = None ,
827+ used = True ,
828+ object_type = None ,
829+ structure_type = None ,
830+ rule = None ,
831+ meta_data = None ,
832+ ):
833+ """Adds a variable to this dictionary using a complete specification
834+
835+ Parameters
836+ ----------
837+ name : str
838+ Variable name
839+ type : str
840+ Variable type, See `Variable`
841+ label : str, optional
842+ Label of the variable.
843+ used : bool, default ``True``
844+ Usage status of the variable.
845+ object_type : str, optional
846+ Object type. Ignored if variable type not in ["Entity", "Table"]
847+ structure_type : str, optional
848+ Structure type. Ignored if variable type is not "Structure"
849+ rule : str, optional
850+ Variable rule (in verbatim).
851+ meta_data : dict, optional
852+ A Python dictionary which holds the metadata specification
853+ with the following keys:
854+ - keys : list, default []
855+ list of meta-data keys
856+ - values : list, default []
857+ list of meta-data values.
858+ The values can be str, bool, float or int.
859+
860+ Raises
861+ ------
862+ `ValueError`
863+ - If the variable name is empty or does not comply
864+ with the formatting constraints.
865+ - If there is already a variable with the same name.
866+ - If the given variable type is unknown.
867+ - If a native type is given 'object_type' or 'structure_type'
868+ - If the 'meta_data' is not a dictionary
869+ """
870+ # Values and Types checks
871+ if not name :
872+ raise ValueError (
873+ "Cannot add to dictionary unnamed variable " f"(name = '{ name } ')"
874+ )
875+ if not _check_name (name ):
876+ raise ValueError (
877+ f"New variable name '{ name } ' cannot be accepted "
878+ "(invalid length or characters)"
879+ )
880+ if name in self ._variables_by_name :
881+ raise ValueError (f"Dictionary already has a variable named '{ name } '" )
882+ if not _is_valid_type (type ):
883+ raise ValueError (f"Invalid type '{ type } '" )
884+ if _is_native_type (type ):
885+ if object_type or structure_type :
886+ raise ValueError (
887+ f"Native type '{ type } ' "
888+ "cannot have 'object_type' or 'structure_type'"
889+ )
890+ if _is_object_type (type ) and object_type is None :
891+ raise ValueError (f"'object_type' must be provided for type '{ type } '" )
892+ if meta_data is not None :
893+ if not is_dict_like (meta_data ):
894+ raise TypeError (type_error_message ("meta_data" , meta_data , "dict-like" ))
895+ if "keys" not in meta_data or "values" not in meta_data :
896+ raise ValueError (
897+ "'meta_data' does not contain "
898+ "the mandatory keys 'keys' and 'values'"
899+ )
900+ if not is_list_like (meta_data ["keys" ]):
901+ raise TypeError (
902+ type_error_message (
903+ "meta_data['keys']" , meta_data ["keys" ], "list-like"
904+ )
905+ )
906+ if not is_list_like (meta_data ["values" ]):
907+ raise TypeError (
908+ type_error_message (
909+ "meta_data['values']" , meta_data ["values" ], "list-like"
910+ )
911+ )
912+ if len (meta_data ["keys" ]) != len (meta_data ["values" ]):
913+ raise ValueError (
914+ "'meta_data' keys and values " "do not have the same size"
915+ )
916+ if label is not None :
917+ if not is_string_like (label ):
918+ raise TypeError (type_error_message ("label" , label , "string-like" ))
919+ if object_type is not None :
920+ if not is_string_like (object_type ):
921+ raise TypeError (
922+ type_error_message ("object_type" , object_type , "string-like" )
923+ )
924+ if structure_type is not None :
925+ if not is_string_like (structure_type ):
926+ raise TypeError (
927+ type_error_message ("structure_type" , structure_type , "string-like" )
928+ )
929+ if rule is not None :
930+ if not is_string_like (rule ):
931+ raise TypeError (
932+ type_error_message ("rule" , structure_type , "string-like" )
933+ )
934+
935+ # Variable initialization
936+ variable = Variable ()
937+ variable .name = name
938+ variable .type = type
939+ variable .used = used
940+ if meta_data is not None :
941+ for key , value in zip (meta_data ["keys" ], meta_data ["values" ]):
942+ variable .meta_data .add_value (key , value )
943+ if label is not None :
944+ variable .label = label
945+ if object_type is not None :
946+ variable .object_type = object_type
947+ if structure_type is not None :
948+ variable .structure_type = structure_type
949+ if rule is not None :
950+ variable .rule = Rule (verbatim = rule )
951+ self .add_variable (variable )
952+
772953 def remove_variable (self , variable_name ):
773954 """Removes the specified variable from this dictionary
774955
@@ -1058,7 +1239,7 @@ def __init__(self, json_data=None):
10581239 self .type = json_data .get ("type" )
10591240
10601241 # Initialize complement of the type
1061- if self .type in ( "Entity" , "Table" ):
1242+ if _is_object_type ( self .type ):
10621243 self .object_type = json_data .get ("objectType" )
10631244 elif self .type == "Structure" :
10641245 self .structure_type = json_data .get ("structureType" )
@@ -1072,7 +1253,7 @@ def __init__(self, json_data=None):
10721253 self .meta_data = MetaData (json_meta_data )
10731254
10741255 def __repr__ (self ):
1075- """Returns a human readable string representation"""
1256+ """Returns a human- readable string representation"""
10761257 return f"Variable ({ self .name } )"
10771258
10781259 def __str__ (self ):
@@ -1179,7 +1360,7 @@ def full_type(self):
11791360 basic.
11801361 """
11811362 full_type = self .type
1182- if self .type in ( "Entity" , "Table" ):
1363+ if _is_object_type ( self .type ):
11831364 full_type += f"({ self .object_type } )"
11841365 elif self .type == "Structure" :
11851366 full_type += f"({ self .structure_type } )"
0 commit comments