2323from khiops .core .exceptions import KhiopsJSONError
2424from khiops .core .internals .common import (
2525 deprecation_message ,
26+ is_dict_like ,
27+ is_list_like ,
2628 is_string_like ,
2729 type_error_message ,
2830)
@@ -43,25 +45,17 @@ def _format_name(name):
4345
4446 Otherwise, it returns the name between backquoted (backquotes within are doubled)
4547 """
46- # Check that the type of name is string or bytes
47- if not is_string_like (name ):
48- raise TypeError (type_error_message ("name" , name , "string-like" ))
48+ is_valid_identifier = _check_name (name )
4949
50- # Check if the name is an identifier
51- # Python isalnum is not used because of utf-8 encoding (accentuated chars
52- # are considered alphanumeric)
5350 # Return original name if is an identifier, otherwise between backquotes
54- identifier_pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*"
55- str_identifier_regex = re .compile (identifier_pattern )
56- bytes_identifier_regex = re .compile (bytes (identifier_pattern , encoding = "ascii" ))
5751 if isinstance (name , str ):
58- if str_identifier_regex . fullmatch ( name ) is not None :
52+ if is_valid_identifier :
5953 formatted_name = name
6054 else :
6155 formatted_name = "`" + name .replace ("`" , "``" ) + "`"
6256 else :
6357 assert isinstance (name , bytes )
64- if bytes_identifier_regex . fullmatch ( name ) is not None :
58+ if is_valid_identifier :
6559 formatted_name = name
6660 else :
6761 formatted_name = b"`" + name .replace (b"`" , b"``" ) + b"`"
@@ -81,6 +75,59 @@ def _quote_value(value):
8175 return quoted_value
8276
8377
78+ def _check_name (name ):
79+ """Ensures the variable name is a valid identifier
80+
81+ Plain string or bytes are both accepted as input.
82+ Please note the Khiops core forbids a name
83+ - with a length outside the [1,128] interval
84+ - containing a simple (Unix) carriage-return (\n )
85+ - with leading and trailing spaces
86+ (\s in Perl-Compatible-Regular-Expressions syntax).
87+ This function must check at least these constraints.
88+ """
89+ # Check that the type of name is string or bytes
90+ if not is_string_like (name ):
91+ raise TypeError (type_error_message ("name" , name , "string-like" ))
92+
93+ # Python isalnum is not used because of utf-8 encoding (accentuated chars
94+ # are considered alphanumeric)
95+ identifier_pattern = r"^[a-zA-Z_][a-zA-Z0-9_]{0,127}$"
96+ str_identifier_regex = re .compile (identifier_pattern )
97+ bytes_identifier_regex = re .compile (bytes (identifier_pattern , encoding = "ascii" ))
98+ if isinstance (name , str ):
99+ return str_identifier_regex .match (name ) is not None
100+ else :
101+ return bytes_identifier_regex .match (name ) is not None
102+
103+
104+ def _is_valid_type (type_str ):
105+ """Checks whether the type is known"""
106+ return (
107+ _is_native_type (type_str )
108+ or _is_object_type (type_str )
109+ or type_str in ["TextList" , "Structure" ]
110+ ) # internal types
111+
112+
113+ def _is_native_type (type_str ):
114+ """Checks whether the type is native (not internal or relational)"""
115+ return type_str in [
116+ "Categorical" ,
117+ "Numerical" ,
118+ "Time" ,
119+ "Date" ,
120+ "Timestamp" ,
121+ "TimestampTZ" ,
122+ "Text" ,
123+ ]
124+
125+
126+ def _is_object_type (type_str ):
127+ """Checks whether the type is an object one (relational)"""
128+ return type_str in ["Entity" , "Table" ]
129+
130+
84131class DictionaryDomain (KhiopsJSONObject ):
85132 """Main class containing the information of a Khiops dictionary file
86133
@@ -769,6 +816,136 @@ def add_variable(self, variable):
769816 self .variables .append (variable )
770817 self ._variables_by_name [variable .name ] = variable
771818
819+ def add_variable_from_spec (
820+ self ,
821+ name ,
822+ type ,
823+ label = None ,
824+ used = True ,
825+ object_type = None ,
826+ structure_type = None ,
827+ rule = None ,
828+ meta_data = None ,
829+ ):
830+ """Adds a variable to this dictionary using a complete specification
831+
832+ Parameters
833+ ----------
834+ name : str
835+ Variable name
836+ type : str
837+ Variable type, See `Variable`
838+ label : str, optional
839+ label of the variable
840+ used : bool, default ``True``
841+ usage status of the variable
842+ object_type : str, optional
843+ object type
844+ ignored if variable type not in ["Entity", "Table"]
845+ structure_type : str, optional
846+ structure type
847+ ignored if variable type is not "Structure"
848+ rule : str, optional
849+ variable rule (in verbatim)
850+ See `Rule`
851+ meta_data : dict, optional
852+ a Python dictionary which holds the metadata specification
853+ with the following keys:
854+ - keys : list, default []
855+ list of meta-data keys
856+ - values : list, default []
857+ list of meta-data values.
858+ The values can be str, bool, float or int
859+
860+ Raises
861+ ------
862+ `ValueError`
863+ - If the variable name is empty or does not comply
864+ with the formatting constraints.
865+ - If there is already a variable with the same name.
866+ - If the given variable type is unknown.
867+ - If a native type is given 'object_type' or 'structure_type'
868+ - If the 'meta_data' is not a dictionary
869+ """
870+ if not name :
871+ raise ValueError (
872+ "Cannot add to dictionary unnamed variable " f"(name = '{ name } ')"
873+ )
874+ if not _check_name (name ):
875+ raise ValueError (
876+ f"New variable name '{ name } ' cannot be accepted "
877+ "(invalid length or characters)"
878+ )
879+ if name in self ._variables_by_name :
880+ raise ValueError (f"Dictionary already has a variable named '{ name } '" )
881+ if not _is_valid_type (type ):
882+ raise ValueError (f"Invalid type '{ type } '" )
883+ if _is_native_type (type ):
884+ if object_type or structure_type :
885+ raise ValueError (
886+ f"Native type '{ type } ' "
887+ "cannot have 'object_type' or 'structure_type'"
888+ )
889+ if _is_object_type (type ) and object_type is None :
890+ raise ValueError (f"'object_type' must be provided for type '{ type } '" )
891+ variable = Variable ()
892+ variable .name = name
893+ variable .type = type
894+ variable .used = used
895+ if meta_data is not None :
896+ if not is_dict_like (meta_data ):
897+ raise TypeError (type_error_message ("meta_data" , meta_data , "dict-like" ))
898+ if "keys" not in meta_data or "values" not in meta_data :
899+ raise ValueError (
900+ "'meta_data' does not contain "
901+ "the mandatory keys 'keys' and 'values'"
902+ )
903+ if not is_list_like (meta_data ["keys" ]):
904+ raise TypeError (
905+ type_error_message (
906+ "meta_data['keys']" , meta_data ["keys" ], "list-like"
907+ )
908+ )
909+ if not is_list_like (meta_data ["values" ]):
910+ raise TypeError (
911+ type_error_message (
912+ "meta_data['values']" , meta_data ["values" ], "list-like"
913+ )
914+ )
915+ if len (meta_data ["keys" ]) != len (meta_data ["values" ]):
916+ raise ValueError (
917+ "'meta_data' keys and values " "do not have the same size"
918+ )
919+ for key , value in zip (meta_data ["keys" ], meta_data ["values" ]):
920+ variable .meta_data .add_value (key , value )
921+ if label is not None :
922+ if not is_string_like (label ):
923+ raise TypeError (type_error_message ("label" , label , "string-like" ))
924+ else :
925+ variable .label = label
926+ if object_type is not None :
927+ if not is_string_like (object_type ):
928+ raise TypeError (
929+ type_error_message ("object_type" , object_type , "string-like" )
930+ )
931+ else :
932+ variable .object_type = object_type
933+ if structure_type is not None :
934+ if not is_string_like (structure_type ):
935+ raise TypeError (
936+ type_error_message ("structure_type" , structure_type , "string-like" )
937+ )
938+ else :
939+ variable .structure_type = structure_type
940+ if rule is not None :
941+ if not is_string_like (rule ):
942+ raise TypeError (
943+ type_error_message ("rule" , structure_type , "string-like" )
944+ )
945+ else :
946+ variable .rule = Rule (verbatim = rule )
947+ self .add_variable (variable )
948+
772949 def remove_variable (self , variable_name ):
773950 """Removes the specified variable from this dictionary
774951
@@ -1058,7 +1235,7 @@ def __init__(self, json_data=None):
10581235 self .type = json_data .get ("type" )
10591236
10601237 # Initialize complement of the type
1061- if self .type in ( "Entity" , "Table" ):
1238+ if _is_object_type ( self .type ):
10621239 self .object_type = json_data .get ("objectType" )
10631240 elif self .type == "Structure" :
10641241 self .structure_type = json_data .get ("structureType" )
@@ -1072,7 +1249,7 @@ def __init__(self, json_data=None):
10721249 self .meta_data = MetaData (json_meta_data )
10731250
10741251 def __repr__ (self ):
1075- """Returns a human readable string representation"""
1252+ """Returns a human- readable string representation"""
10761253 return f"Variable ({ self .name } )"
10771254
10781255 def __str__ (self ):
@@ -1179,7 +1356,7 @@ def full_type(self):
11791356 basic.
11801357 """
11811358 full_type = self .type
1182- if self .type in ( "Entity" , "Table" ):
1359+ if _is_object_type ( self .type ):
11831360 full_type += f"({ self .object_type } )"
11841361 elif self .type == "Structure" :
11851362 full_type += f"({ self .structure_type } )"
0 commit comments