22import pandas as pd
33from numbers import Number
44
5+ from .encoder_adaptor import IEncoderAdaptor
6+ from .numpy_encoder_adaptor import NumpyEncoderAdaptor
7+ from .pandas_encoder_adaptor import PandasEncoderAdaptor
8+
59
610class VarShaper :
711
@@ -20,10 +24,11 @@ class VarShaper:
2024
2125 _dummy_constant_counter = 0
2226
23- def __init__ (self , var_name , encoder , data_sample = None ):
27+ def __init__ (self , var_name , encoder , data_sample = None , encoder_adaptor = None ):
2428 self ._var_name = var_name
2529 # _name will be included in metadata for using in ML models, e.g. for naming input layers in Keras
2630 self ._name = var_name
31+ self ._encoder_adaptor = self ._build_encoder_adaptor (encoder_adaptor )
2732 self ._encoder = encoder
2833 self ._class = self ._self_classify (var_name , encoder )
2934 self ._decoded_dtype , self ._dtype = self ._get_dtypes (data_sample )
@@ -34,6 +39,24 @@ def __init__(self, var_name, encoder, data_sample=None):
3439 self ._shape = self ._get_shape (var_name , encoder , data_sample )
3540 self ._n_classes = self ._get_n_classes (encoder )
3641
42+ def _build_encoder_adaptor (self , encoder_adaptor ):
43+ """
44+ This method makes encoder adaptor class that utilises polymorphism to accommodate encoders that require
45+ different data types. The encoder adaptor instance takes care of
46+ :param encoder_adaptor: str ('numpy' or 'pandas') or a custom class derived from IEncoderAdaptor
47+ :return: instance of IEncoderAdaptor
48+ """
49+ if (encoder_adaptor == 'numpy' ) or (encoder_adaptor is None ):
50+ adaptor = NumpyEncoderAdaptor ()
51+ elif encoder_adaptor == 'pandas' :
52+ adaptor = PandasEncoderAdaptor ()
53+ elif isinstance (encoder_adaptor , IEncoderAdaptor ):
54+ adaptor = encoder_adaptor
55+ else :
56+ raise TypeError (f"Error: The encoder adaptor must be a string ('numpy' or 'pandas') or an instance of a "
57+ f"custom class derived from IEncoderAdaptor" )
58+ return adaptor
59+
3760 @staticmethod
3861 def _self_classify (var_name , encoder ):
3962 """
@@ -58,6 +81,8 @@ def _self_classify(var_name, encoder):
5881 return "direct"
5982 if not hasattr (encoder , "transform" ):
6083 raise ValueError (f"Error: encoder provided for column '{ var_name } ' has no 'transform' method" )
84+ if not hasattr (encoder , "inverse_transform" ):
85+ raise ValueError (f"Error: encoder provided for column '{ var_name } ' has no 'inverse_transform' method" )
6186 return "encoder"
6287 else :
6388 raise ValueError (f"Error: variable name must be a str or None. Got { type (var_name )} " )
@@ -155,20 +180,21 @@ def transform(self, data):
155180 # if not hasattr(self._encoder, 'transform'):
156181 # raise ValueError(f"Error: encoders of class {type(self._encoder).__name__} provided in structure "
157182 # f"definition has no 'transform' method")
183+ encoder_input = self ._encoder_adaptor .transform (data [self ._var_name ])
158184 try :
159- x = getattr ( self ._encoder , ' transform' )( data [ self . _var_name ]. values )
185+ x = self ._encoder . transform ( encoder_input )
160186 except ValueError as e :
161187 raise ValueError (f'Error: ValueError exception occured while calling '
162188 f'{ type (self ._encoder ).__name__ } .transform method. Most likely you used'
163189 f' 2D encoders. At the moment, only 1D transformers are supported. Please use 1D '
164190 f'variant or use wrapper. The error was: { e } ' )
165- except Exception as e :
166- raise RuntimeError (f'Error: unknown error while calling transform method of '
167- f'{ type (self ._encoder ).__name__ } class provided in structure. The error was: { e } ' )
191+ # except Exception as e:
192+ # raise RuntimeError(f'Error: unknown error while calling transform method of '
193+ # f'{type(self._encoder).__name__} class provided in structure. The error was: {e}')
168194 elif self ._class == "constant" :
169195 x = np .repeat (self ._encoder , data .shape [0 ])
170196 elif self ._class == "direct" :
171- x = data [self ._var_name ].values
197+ x = data [self ._var_name ].to_numpy ()
172198 else :
173199 raise RuntimeError ('Error: this should not have happened. Maybe it needs to be reported' )
174200 # if self._dtype is None:
@@ -192,13 +218,13 @@ def inverse_transform(self, df, encoded_data):
192218 # changes. These cases have structure entry like this ('col_name', None)
193219 df [self ._var_name ] = pd .Series (np .squeeze (encoded_data ), dtype = self ._decoded_dtype )
194220 elif self ._class == "encoder" :
195- if not hasattr ( self . _encoder , "inverse_transform" ):
196- raise ValueError ( f"Error: encoder provided for column ' { self ._var_name } ' has no ' inverse_transform' method" )
197- if not hasattr ( self ._encoder , 'inverse_transform' ):
198- raise ValueError ( 'Error: the encoders {} used for column {} has no inverse_transform method'
199- . format ( type ( self ._encoder ). __name__ , self . _var_name ))
200- it = self ._encoder . inverse_transform ( encoded_data )
201- df [self ._var_name ] = pd . Series ( it , dtype = self . _decoded_dtype )
221+ # it has already been checked at init stage. It is redundant here
222+ # if not hasattr( self._encoder, " inverse_transform"):
223+ # raise ValueError(f"Error: encoder provided for column '{ self._var_name}' has no "
224+ # "' inverse_transform' method")
225+ it = self . _encoder_adaptor . inverse_transform ( self ._encoder . inverse_transform ( encoded_data ),
226+ dtype = self ._decoded_dtype )
227+ df [self ._var_name ] = it
202228
203229 def _reshape (self , x : np .ndarray ):
204230 if x .ndim == 1 :
0 commit comments