@@ -176,7 +176,7 @@ def _check_multitable_spec(ds_spec):
176176 )
177177
178178
179- def _table_name_of_path (table_path ):
179+ def table_name_of_path (table_path ):
180180 return table_path .split ("/" )[- 1 ]
181181
182182
@@ -379,7 +379,6 @@ def __init__(self, X, y=None, categorical_target=True):
379379 # Initialize members
380380 self .main_table = None
381381 self .additional_data_tables = None
382- self .relations = None
383382 self .categorical_target = categorical_target
384383 self .target_column = None
385384 self .target_column_id = None
@@ -429,7 +428,8 @@ def __init__(self, X, y=None, categorical_target=True):
429428 # Index the tables by name
430429 self ._tables_by_name = {
431430 table .name : table
432- for table in [self .main_table ] + self .additional_data_tables
431+ for table in [self .main_table ]
432+ + [table for _ , table , _ in self .additional_data_tables ]
433433 }
434434
435435 # Post-conditions
@@ -505,32 +505,21 @@ def _init_tables_from_mapping(self, X):
505505 key = main_table_key ,
506506 )
507507 self .additional_data_tables = []
508- self .relations = []
509508 if "additional_data_tables" in X :
510509 for table_path , table_spec in X ["additional_data_tables" ].items ():
511510 table_source , table_key = table_spec [:2 ]
512- table_name = _table_name_of_path (table_path )
511+ table_name = table_name_of_path (table_path )
513512 table = PandasTable (
514513 table_name ,
515514 table_source ,
516- data_path = table_path ,
517515 key = table_key ,
518516 )
519- self .additional_data_tables .append (table )
520517 is_one_to_one_relation = False
521518 if len (table_spec ) == 3 and table_spec [2 ] is True :
522519 is_one_to_one_relation = True
523520
524- # Set relation parent: if no "/" in path, main_table is the parent
525- if not "/" in table_path :
526- parent_table_name = self .main_table .name
527- else :
528- table_path_fragments = table_path .split ("/" )
529- parent_table_name = _table_name_of_path (
530- "/" .join (table_path_fragments [:- 1 ])
531- )
532- self .relations .append (
533- (parent_table_name , table_name , is_one_to_one_relation )
521+ self .additional_data_tables .append (
522+ (table_path , table , is_one_to_one_relation )
534523 )
535524 # Initialize a sparse dataset (monotable)
536525 elif isinstance (main_table_source , sp .spmatrix ):
@@ -540,7 +529,6 @@ def _init_tables_from_mapping(self, X):
540529 key = main_table_key ,
541530 )
542531 self .additional_data_tables = []
543- self .relations = []
544532 # Initialize a numpyarray dataset (monotable)
545533 elif hasattr (main_table_source , "__array__" ):
546534 self .main_table = NumpyTable (
@@ -553,7 +541,6 @@ def _init_tables_from_mapping(self, X):
553541 "with pandas dataframe source tables"
554542 )
555543 self .additional_data_tables = []
556- self .relations = []
557544 else :
558545 raise TypeError (
559546 type_error_message (
@@ -672,11 +659,12 @@ def to_spec(self):
672659 ds_spec = {}
673660 ds_spec ["main_table" ] = (self .main_table .data_source , self .main_table .key )
674661 ds_spec ["additional_data_tables" ] = {}
675- for table in self .additional_data_tables :
676- assert table . data_path is not None
677- ds_spec ["additional_data_tables" ][table . data_path ] = (
662+ for table_path , table , is_one_to_one_relation in self .additional_data_tables :
663+ assert table_path is not None
664+ ds_spec ["additional_data_tables" ][table_path ] = (
678665 table .data_source ,
679666 table .key ,
667+ is_one_to_one_relation ,
680668 )
681669
682670 return ds_spec
@@ -740,31 +728,32 @@ def create_khiops_dictionary_domain(self):
740728 # Note: In general 'name' and 'object_type' fields of Variable can be different
741729 if self .additional_data_tables :
742730 main_dictionary .root = True
743- table_names = [table .name for table in self .additional_data_tables ]
744- tables_to_visit = [self .main_table .name ]
745- while tables_to_visit :
746- current_table = tables_to_visit .pop (0 )
747- for relation in self .relations :
748- parent_table , child_table , is_one_to_one_relation = relation
749- if parent_table == current_table :
750- tables_to_visit .append (child_table )
751- parent_table_name = parent_table
752- index_table = table_names .index (child_table )
753- table = self .additional_data_tables [index_table ]
754- parent_table_dictionary = dictionary_domain .get_dictionary (
755- parent_table_name
756- )
757- dictionary = table .create_khiops_dictionary ()
758- dictionary_domain .add_dictionary (dictionary )
759- table_variable = kh .Variable ()
760- if is_one_to_one_relation :
761- table_variable .type = "Entity"
762- else :
763- table_variable .type = "Table"
764- table_variable .name = table .name
765- table_variable .object_type = table .name
766- parent_table_dictionary .add_variable (table_variable )
731+ for (
732+ table_path ,
733+ table ,
734+ is_one_to_one_relation ,
735+ ) in self .additional_data_tables :
736+ if not "/" in table_path :
737+ parent_table_name = self .main_table .name
738+ else :
739+ table_path_fragments = table_path .split ("/" )
740+ parent_table_name = table_name_of_path (
741+ "/" .join (table_path_fragments [:- 1 ])
742+ )
743+ parent_table_dictionary = dictionary_domain .get_dictionary (
744+ parent_table_name
745+ )
767746
747+ dictionary = table .create_khiops_dictionary ()
748+ dictionary_domain .add_dictionary (dictionary )
749+ table_variable = kh .Variable ()
750+ if is_one_to_one_relation :
751+ table_variable .type = "Entity"
752+ else :
753+ table_variable .type = "Table"
754+ table_variable .name = table .name
755+ table_variable .object_type = table .name
756+ parent_table_dictionary .add_variable (table_variable )
768757 return dictionary_domain
769758
770759 def create_table_files_for_khiops (self , output_dir , sort = True ):
@@ -803,9 +792,9 @@ def create_table_files_for_khiops(self, output_dir, sort=True):
803792
804793 # Create a copy of each secondary table
805794 secondary_table_paths = {}
806- for table in self .additional_data_tables :
807- assert table . data_path is not None
808- secondary_table_paths [table . data_path ] = table .create_table_file_for_khiops (
795+ for table_path , table , _ in self .additional_data_tables :
796+ assert table_path is not None
797+ secondary_table_paths [table_path ] = table .create_table_file_for_khiops (
809798 output_dir , sort = sort
810799 )
811800
@@ -910,13 +899,11 @@ class PandasTable(DatasetTable):
910899 Name for the table.
911900 dataframe : `pandas.DataFrame`
912901 The data frame to be encapsulated. It must be non-empty.
913- data_path : str, optional
914- Data path of the table. Unset for main tables.
915902 key : list of str, optional
916903 The names of the columns composing the key.
917904 """
918905
919- def __init__ (self , name , dataframe , data_path = None , key = None ):
906+ def __init__ (self , name , dataframe , key = None ):
920907 # Call the parent method
921908 super ().__init__ (name = name , key = key )
922909
@@ -929,7 +916,6 @@ def __init__(self, name, dataframe, data_path=None, key=None):
929916 # Initialize the attributes
930917 self .data_source = dataframe
931918 self .n_samples = len (self .data_source )
932- self .data_path = data_path
933919
934920 # Initialize feature columns and verify their types
935921 self .column_ids = self .data_source .columns .values
0 commit comments