1010import warnings
1111from abc import ABC , abstractmethod
1212from collections .abc import Iterable , Mapping , Sequence
13+ from itertools import cycle , islice
1314
1415import numpy as np
1516import pandas as pd
2122import khiops .core as kh
2223import khiops .core .internals .filesystems as fs
2324from khiops .core .dictionary import VariableBlock
24- from khiops .core .internals .common import is_dict_like , is_list_like , type_error_message
25+ from khiops .core .internals .common import (
26+ deprecation_message ,
27+ is_dict_like ,
28+ is_list_like ,
29+ type_error_message ,
30+ )
2531
2632# Disable PEP8 variable names because of scikit-learn X,y conventions
2733# To capture invalid-names other than X,y run:
@@ -171,6 +177,52 @@ def _check_multitable_spec(ds_spec):
171177 )
172178
173179
180+ def _table_name_of_path (table_path ):
181+ return table_path .split ("/" )[- 1 ]
182+
183+
184+ def _upgrade_mapping_spec (X ):
185+ assert is_dict_like (X )
186+ new_X = {}
187+ new_X ["additional_data_tables" ] = {}
188+ for table_name , table_data in X ["tables" ].items ():
189+ table_df , table_key = table_data
190+ if not is_list_like (table_key ):
191+ table_key = [table_key ]
192+ if table_name == X ["main_table" ]:
193+ new_X ["main_table" ] = (table_df , table_key )
194+ else :
195+ table_path = [table_name ]
196+ is_entity = False
197+
198+ # Cycle twice on the relations to get all transitive relation, like:
199+ # - current table name N
200+ # - main table name N1
201+ # - and relations: (N1, N2), (N2, N3), (N3, N)
202+ # the data-path must be N2/N3/N
203+ # If no "relations" key exists, then one has a star schema and
204+ # the data-paths are the names of the secondary tables themselves
205+ # (with respect to the main table)
206+ if "relations" in X :
207+ for relation in islice (cycle (X ["relations" ]), 2 * len (X ["relations" ])):
208+ left , right = relation [:2 ]
209+ if len (relation ) == 3 and right == table_name :
210+ is_entity = relation [2 ]
211+ if (
212+ left != X ["main_table" ]
213+ and left not in table_path
214+ and right in table_path
215+ ):
216+ table_path .insert (0 , left )
217+ table_path = "/" .join (table_path )
218+ if is_entity :
219+ table_data = (table_df , table_key , is_entity )
220+ else :
221+ table_data = (table_df , table_key )
222+ new_X ["additional_data_tables" ][table_path ] = table_data
223+ return new_X
224+
225+
174226def get_khiops_type (numpy_type ):
175227 """Translates a numpy dtype to a Khiops dictionary type
176228
@@ -418,14 +470,26 @@ def _check_input_sequence(self, X, key=None):
418470 # Check the key for the main_table (it is the same for the others)
419471 _check_table_key ("main_table" , key )
420472
421- def _table_name_of_path (self , table_path ):
422- # TODO: Add >= 128-character truncation and indexing scheme
423- return table_path .split ("/" )[- 1 ]
424-
425473 def _init_tables_from_mapping (self , X ):
426474 """Initializes the table spec from a dict-like 'X'"""
427475 assert is_dict_like (X ), "'X' must be dict-like"
428476
477+ # Detect if deprecated mapping specification syntax is used;
478+ # if so, issue deprecation warning and transform it to the new syntax
479+ if "tables" in X .keys ():
480+ warnings .warn (
481+ deprecation_message (
482+ "This multi-table dataset specification format" ,
483+ "11.0.1" ,
484+ replacement = (
485+ "the new data-path-based format, as documented in "
486+ ":doc:`multi_table_primer`."
487+ ),
488+ quote = False ,
489+ )
490+ )
491+ X = _upgrade_mapping_spec (X )
492+
429493 # Check the input mapping
430494 check_dataset_spec (X )
431495
@@ -444,7 +508,7 @@ def _init_tables_from_mapping(self, X):
444508 if "additional_data_tables" in X :
445509 for table_path , table_spec in X ["additional_data_tables" ].items ():
446510 table_source , table_key = table_spec [:2 ]
447- table_name = self . _table_name_of_path (table_path )
511+ table_name = _table_name_of_path (table_path )
448512 table = PandasTable (
449513 table_name ,
450514 table_source ,
@@ -461,7 +525,7 @@ def _init_tables_from_mapping(self, X):
461525 parent_table_name = self .main_table .name
462526 else :
463527 table_path_fragments = table_path .split ("/" )
464- parent_table_name = self . _table_name_of_path (
528+ parent_table_name = _table_name_of_path (
465529 "/" .join (table_path_fragments [:- 1 ])
466530 )
467531 self .relations .append (
0 commit comments