@@ -177,7 +177,7 @@ def _check_multitable_spec(ds_spec):
177177 )
178178
179179
180- def _table_name_of_path (table_path ):
180+ def table_name_of_path (table_path ):
181181 return table_path .split ("/" )[- 1 ]
182182
183183
@@ -378,7 +378,6 @@ def __init__(self, X, y=None, categorical_target=True):
378378 # Initialize members
379379 self .main_table = None
380380 self .additional_data_tables = None
381- self .relations = None
382381 self .categorical_target = categorical_target
383382 self .target_column = None
384383 self .target_column_id = None
@@ -428,7 +427,8 @@ def __init__(self, X, y=None, categorical_target=True):
428427 # Index the tables by name
429428 self ._tables_by_name = {
430429 table .name : table
431- for table in [self .main_table ] + self .additional_data_tables
430+ for table in [self .main_table ]
431+ + [table for _ , table , _ in self .additional_data_tables ]
432432 }
433433
434434 # Post-conditions
@@ -504,32 +504,21 @@ def _init_tables_from_mapping(self, X):
504504 key = main_table_key ,
505505 )
506506 self .additional_data_tables = []
507- self .relations = []
508507 if "additional_data_tables" in X :
509508 for table_path , table_spec in X ["additional_data_tables" ].items ():
510509 table_source , table_key = table_spec [:2 ]
511- table_name = _table_name_of_path (table_path )
510+ table_name = table_name_of_path (table_path )
512511 table = PandasTable (
513512 table_name ,
514513 table_source ,
515- data_path = table_path ,
516514 key = table_key ,
517515 )
518- self .additional_data_tables .append (table )
519516 is_one_to_one_relation = False
520517 if len (table_spec ) == 3 and table_spec [2 ] is True :
521518 is_one_to_one_relation = True
522519
523- # Set relation parent: if no "/" in path, main_table is the parent
524- if not "/" in table_path :
525- parent_table_name = self .main_table .name
526- else :
527- table_path_fragments = table_path .split ("/" )
528- parent_table_name = _table_name_of_path (
529- "/" .join (table_path_fragments [:- 1 ])
530- )
531- self .relations .append (
532- (parent_table_name , table_name , is_one_to_one_relation )
520+ self .additional_data_tables .append (
521+ (table_path , table , is_one_to_one_relation )
533522 )
534523 # Initialize a sparse dataset (monotable)
535524 elif isinstance (main_table_source , sp .spmatrix ):
@@ -539,7 +528,6 @@ def _init_tables_from_mapping(self, X):
539528 key = main_table_key ,
540529 )
541530 self .additional_data_tables = []
542- self .relations = []
543531 # Initialize a numpyarray dataset (monotable)
544532 elif hasattr (main_table_source , "__array__" ):
545533 self .main_table = NumpyTable (
@@ -552,7 +540,6 @@ def _init_tables_from_mapping(self, X):
552540 "with pandas dataframe source tables"
553541 )
554542 self .additional_data_tables = []
555- self .relations = []
556543 else :
557544 raise TypeError (
558545 type_error_message (
@@ -671,11 +658,12 @@ def to_spec(self):
671658 ds_spec = {}
672659 ds_spec ["main_table" ] = (self .main_table .data_source , self .main_table .key )
673660 ds_spec ["additional_data_tables" ] = {}
674- for table in self .additional_data_tables :
675- assert table . data_path is not None
676- ds_spec ["additional_data_tables" ][table . data_path ] = (
661+ for table_path , table , is_one_to_one_relation in self .additional_data_tables :
662+ assert table_path is not None
663+ ds_spec ["additional_data_tables" ][table_path ] = (
677664 table .data_source ,
678665 table .key ,
666+ is_one_to_one_relation ,
679667 )
680668
681669 return ds_spec
@@ -739,31 +727,32 @@ def create_khiops_dictionary_domain(self):
739727 # Note: In general 'name' and 'object_type' fields of Variable can be different
740728 if self .additional_data_tables :
741729 main_dictionary .root = True
742- table_names = [table .name for table in self .additional_data_tables ]
743- tables_to_visit = [self .main_table .name ]
744- while tables_to_visit :
745- current_table = tables_to_visit .pop (0 )
746- for relation in self .relations :
747- parent_table , child_table , is_one_to_one_relation = relation
748- if parent_table == current_table :
749- tables_to_visit .append (child_table )
750- parent_table_name = parent_table
751- index_table = table_names .index (child_table )
752- table = self .additional_data_tables [index_table ]
753- parent_table_dictionary = dictionary_domain .get_dictionary (
754- parent_table_name
755- )
756- dictionary = table .create_khiops_dictionary ()
757- dictionary_domain .add_dictionary (dictionary )
758- table_variable = kh .Variable ()
759- if is_one_to_one_relation :
760- table_variable .type = "Entity"
761- else :
762- table_variable .type = "Table"
763- table_variable .name = table .name
764- table_variable .object_type = table .name
765- parent_table_dictionary .add_variable (table_variable )
730+ for (
731+ table_path ,
732+ table ,
733+ is_one_to_one_relation ,
734+ ) in self .additional_data_tables :
735+ if not "/" in table_path :
736+ parent_table_name = self .main_table .name
737+ else :
738+ table_path_fragments = table_path .split ("/" )
739+ parent_table_name = table_name_of_path (
740+ "/" .join (table_path_fragments [:- 1 ])
741+ )
742+ parent_table_dictionary = dictionary_domain .get_dictionary (
743+ parent_table_name
744+ )
766745
746+ dictionary = table .create_khiops_dictionary ()
747+ dictionary_domain .add_dictionary (dictionary )
748+ table_variable = kh .Variable ()
749+ if is_one_to_one_relation :
750+ table_variable .type = "Entity"
751+ else :
752+ table_variable .type = "Table"
753+ table_variable .name = table .name
754+ table_variable .object_type = table .name
755+ parent_table_dictionary .add_variable (table_variable )
767756 return dictionary_domain
768757
769758 def create_table_files_for_khiops (self , output_dir , sort = True ):
@@ -802,9 +791,9 @@ def create_table_files_for_khiops(self, output_dir, sort=True):
802791
803792 # Create a copy of each secondary table
804793 secondary_table_paths = {}
805- for table in self .additional_data_tables :
806- assert table . data_path is not None
807- secondary_table_paths [table . data_path ] = table .create_table_file_for_khiops (
794+ for table_path , table , _ in self .additional_data_tables :
795+ assert table_path is not None
796+ secondary_table_paths [table_path ] = table .create_table_file_for_khiops (
808797 output_dir , sort = sort
809798 )
810799
@@ -909,13 +898,11 @@ class PandasTable(DatasetTable):
909898 Name for the table.
910899 dataframe : `pandas.DataFrame`
911900 The data frame to be encapsulated. It must be non-empty.
912- data_path : str, optional
913- Data path of the table. Unset for main tables.
914901 key : list of str, optional
915902 The names of the columns composing the key.
916903 """
917904
918- def __init__ (self , name , dataframe , data_path = None , key = None ):
905+ def __init__ (self , name , dataframe , key = None ):
919906 # Call the parent method
920907 super ().__init__ (name = name , key = key )
921908
@@ -928,7 +915,6 @@ def __init__(self, name, dataframe, data_path=None, key=None):
928915 # Initialize the attributes
929916 self .data_source = dataframe
930917 self .n_samples = len (self .data_source )
931- self .data_path = data_path
932918
933919 # Initialize feature columns and verify their types
934920 self .column_ids = self .data_source .columns .values
0 commit comments