2121import khiops .core as kh
2222import khiops .core .internals .filesystems as fs
2323from khiops .core .dictionary import VariableBlock
24- from khiops .core .internals .common import is_dict_like , is_list_like , type_error_message
24+ from khiops .core .internals .common import (
25+ deprecation_message ,
26+ is_dict_like ,
27+ is_list_like ,
28+ type_error_message ,
29+ )
2530
2631# Disable PEP8 variable names because of scikit-learn X,y conventions
2732# To capture invalid-names other than X,y run:
@@ -171,6 +176,54 @@ def _check_multitable_spec(ds_spec):
171176 )
172177
173178
179+ def _table_name_of_path (table_path ):
180+ return table_path .split ("/" )[- 1 ]
181+
182+
183+ def _upgrade_mapping_spec (ds_spec ):
184+ assert is_dict_like (ds_spec )
185+ new_ds_spec = {}
186+ new_ds_spec ["additional_data_tables" ] = {}
187+ for table_name , table_data in ds_spec ["tables" ].items ():
188+ table_df , table_key = table_data
189+ if not is_list_like (table_key ):
190+ table_key = [table_key ]
191+ if table_name == ds_spec ["main_table" ]:
192+ new_ds_spec ["main_table" ] = (table_df , table_key )
193+ else :
194+ table_path = [table_name ]
195+ is_entity = False
196+
197+ # Cycle 4 times on the relations to get all transitive relation, like:
198+ # - current table name N
199+ # - main table name N1
200+ # - and relations: (N1, N2), (N2, N3), (N3, N)
201+ # the data-path must be N2/N3/N
202+ # Note: this is a heuristic that should be replaced with a graph
203+ # traversal procedure
204+ # If no "relations" key exists, then one has a star schema and
205+ # the data-paths are the names of the secondary tables themselves
206+ # (with respect to the main table)
207+ if "relations" in ds_spec :
208+ for relation in list (ds_spec ["relations" ]) * 4 :
209+ left , right = relation [:2 ]
210+ if len (relation ) == 3 and right == table_name :
211+ is_entity = relation [2 ]
212+ if (
213+ left != ds_spec ["main_table" ]
214+ and left not in table_path
215+ and right in table_path
216+ ):
217+ table_path .insert (0 , left )
218+ table_path = "/" .join (table_path )
219+ if is_entity :
220+ table_data = (table_df , table_key , is_entity )
221+ else :
222+ table_data = (table_df , table_key )
223+ new_ds_spec ["additional_data_tables" ][table_path ] = table_data
224+ return new_ds_spec
225+
226+
174227def get_khiops_type (numpy_type ):
175228 """Translates a numpy dtype to a Khiops dictionary type
176229
@@ -418,14 +471,26 @@ def _check_input_sequence(self, X, key=None):
418471 # Check the key for the main_table (it is the same for the others)
419472 _check_table_key ("main_table" , key )
420473
421- def _table_name_of_path (self , table_path ):
422- # TODO: Add >= 128-character truncation and indexing scheme
423- return table_path .split ("/" )[- 1 ]
424-
425474 def _init_tables_from_mapping (self , X ):
426475 """Initializes the table spec from a dict-like 'X'"""
427476 assert is_dict_like (X ), "'X' must be dict-like"
428477
478+ # Detect if deprecated mapping specification syntax is used;
479+ # if so, issue deprecation warning and transform it to the new syntax
480+ if "tables" in X .keys () and isinstance (X .get ("main_table" ), str ):
481+ warnings .warn (
482+ deprecation_message (
483+ "This multi-table dataset specification format" ,
484+ "11.0.1" ,
485+ replacement = (
486+ "the new data-path-based format, as documented in "
487+ ":doc:`multi_table_primer`."
488+ ),
489+ quote = False ,
490+ )
491+ )
492+ X = _upgrade_mapping_spec (X )
493+
429494 # Check the input mapping
430495 check_dataset_spec (X )
431496
@@ -444,7 +509,7 @@ def _init_tables_from_mapping(self, X):
444509 if "additional_data_tables" in X :
445510 for table_path , table_spec in X ["additional_data_tables" ].items ():
446511 table_source , table_key = table_spec [:2 ]
447- table_name = self . _table_name_of_path (table_path )
512+ table_name = _table_name_of_path (table_path )
448513 table = PandasTable (
449514 table_name ,
450515 table_source ,
@@ -461,7 +526,7 @@ def _init_tables_from_mapping(self, X):
461526 parent_table_name = self .main_table .name
462527 else :
463528 table_path_fragments = table_path .split ("/" )
464- parent_table_name = self . _table_name_of_path (
529+ parent_table_name = _table_name_of_path (
465530 "/" .join (table_path_fragments [:- 1 ])
466531 )
467532 self .relations .append (
0 commit comments