2121import khiops .core as kh
2222import khiops .core .internals .filesystems as fs
2323from khiops .core .dictionary import VariableBlock
24- from khiops .core .exceptions import KhiopsRuntimeError
25- from khiops .core .internals .common import (
26- deprecation_message ,
27- is_dict_like ,
28- is_list_like ,
29- type_error_message ,
30- )
24+ from khiops .core .internals .common import is_dict_like , is_list_like , type_error_message
3125
3226# Disable PEP8 variable names because of scikit-learn X,y conventions
3327# To capture invalid-names other than X,y run:
@@ -466,7 +460,7 @@ def __init__(self, X, y=None, categorical_target=True, key=None):
466460 self .main_table = PandasTable ("main_table" , X )
467461 self .secondary_tables = []
468462 # A single numpy array (or compatible object)
469- elif hasattr (X , "__array__" ):
463+ elif hasattr (X , "__array__" ) or is_list_like ( X ) :
470464 self .main_table = NumpyTable ("main_table" , X )
471465 self .secondary_tables = []
472466 # A scipy.sparse.spmatrix
@@ -489,57 +483,12 @@ def __init__(self, X, y=None, categorical_target=True, key=None):
489483 ),
490484 ):
491485 check_array (X , accept_sparse = False )
492- # A tuple spec
493- elif isinstance (X , tuple ):
494- warnings .warn (
495- deprecation_message (
496- "Tuple dataset input" ,
497- "11.0.0" ,
498- replacement = "dict dataset spec" ,
499- quote = False ,
500- ),
501- stacklevel = 3 ,
502- )
503- # Check the input tuple
504- self ._check_input_tuple (X )
505-
506- # Obtain path and separator
507- path , sep = X
508-
509- # Initialization
510- self .main_table = FileTable ("main_table" , path = path , sep = sep )
511- self .secondary_tables = []
512-
513- # A dataset sequence spec
514- # We try first for compatible python arrays then the deprecated sequences spec
515- elif is_list_like (X ):
516- # Try to transform to a numerical array with sklearn's check_array
517- # On failure we try the old deprecated sequence interface
518- # When the old list interface is eliminated this will considerably reduce
519- # this branch's code
520- try :
521- X_checked = check_array (X , ensure_2d = True , force_all_finite = False )
522- self .main_table = NumpyTable ("main_table" , X_checked )
523- self .secondary_tables = []
524- except ValueError :
525- warnings .warn (
526- deprecation_message (
527- "List dataset input" ,
528- "11.0.0" ,
529- replacement = "dict dataset spec" ,
530- quote = False ,
531- ),
532- stacklevel = 3 ,
533- )
534- self ._init_tables_from_sequence (X , key = key )
535486 # A a dataset dict spec
536487 elif is_dict_like (X ):
537488 self ._init_tables_from_mapping (X )
538489 # Fail if X is not recognized
539490 else :
540- raise TypeError (
541- type_error_message ("X" , X , "array-like" , tuple , Sequence , Mapping )
542- )
491+ raise TypeError (type_error_message ("X" , X , "array-like" , Mapping , Sequence ))
543492
544493 # Initialization of the target column if any
545494 if y is not None :
@@ -581,35 +530,6 @@ def _check_input_tuple(self, X):
581530 if not isinstance (X [1 ], str ):
582531 raise TypeError (type_error_message ("X[1]" , X [1 ], str ))
583532
584- def _init_tables_from_sequence (self , X , key = None ):
585- """Initializes the spec from a list-like 'X'"""
586- assert is_list_like (X ), "'X' must be a list-like"
587-
588- # Check the input sequence
589- self ._check_input_sequence (X , key = key )
590-
591- # Initialize the tables
592- if isinstance (X [0 ], pd .DataFrame ):
593- self .main_table = PandasTable ("main_table" , X [0 ], key = key )
594- self .secondary_tables = []
595- for index , dataframe in enumerate (X [1 :], start = 1 ):
596- self .secondary_tables .append (
597- PandasTable (f"secondary_table_{ index :02d} " , dataframe , key = key )
598- )
599- else :
600- self .main_table = FileTable ("main_table" , X [0 ], key = key )
601- self .secondary_tables = []
602- for index , table_path in enumerate (X [1 :], start = 1 ):
603- self .secondary_tables .append (
604- FileTable (f"secondary_table_{ index :02d} " , table_path , key = key )
605- )
606-
607- # Create a list of relations
608- main_table_name = self .main_table .name
609- self .relations = [
610- (main_table_name , table .name , False ) for table in self .secondary_tables
611- ]
612-
613533 def _check_input_sequence (self , X , key = None ):
614534 # Check the first table
615535 if len (X ) == 0 :
@@ -1206,7 +1126,7 @@ class NumpyTable(DatasetTable):
12061126 ----------
12071127 name : str
12081128 Name for the table.
1209- array : `numpy.ndarray` of shape (n_samples, n_features_in)
1129+ array : `numpy.ndarray` of shape (n_samples, n_features_in) or Sequence
12101130 The data frame to be encapsulated.
12111131 key : :external:term`array-like` of int, optional
12121132 The names of the columns composing the key.
@@ -1217,8 +1137,8 @@ def __init__(self, name, array, key=None):
12171137 super ().__init__ (name , key = key )
12181138
12191139 # Check the array's types and shape
1220- if not hasattr (array , "__array__" ):
1221- raise TypeError (type_error_message ("array" , array , np .ndarray ))
1140+ if not hasattr (array , "__array__" ) and not is_list_like ( array ) :
1141+ raise TypeError (type_error_message ("array" , array , np .ndarray , Sequence ))
12221142
12231143 # Initialize the members
12241144 self .data_source = check_array (array , ensure_2d = True , force_all_finite = False )
0 commit comments