@@ -211,6 +211,99 @@ def test_max_abs_scaler_series_normalizes(penguins_df_default_index, new_penguin
211211 pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
212212
213213
214+ def test_min_max_scaler_normalizeds_fit_transform (new_penguins_df ):
215+ scaler = bigframes .ml .preprocessing .MinMaxScaler ()
216+ result = scaler .fit_transform (
217+ new_penguins_df [["culmen_length_mm" , "culmen_depth_mm" , "flipper_length_mm" ]]
218+ ).to_pandas ()
219+
220+ # TODO: bug? feature columns seem to be in nondeterministic random order
221+ # workaround: sort columns by name. Can't repro it in pantheon, so could
222+ # be a bigframes issue...
223+ result = result .reindex (sorted (result .columns ), axis = 1 )
224+
225+ expected = pd .DataFrame (
226+ {
227+ "min_max_scaled_culmen_depth_mm" : [1.0 , 0.0 , 0.5625 ],
228+ "min_max_scaled_culmen_length_mm" : [1.0 , 0.375 , 0.0 ],
229+ "min_max_scaled_flipper_length_mm" : [1.0 , 0.0 , 0.466667 ],
230+ },
231+ dtype = "Float64" ,
232+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
233+ )
234+
235+ pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
236+
237+
238+ def test_min_max_scaler_series_normalizes (penguins_df_default_index , new_penguins_df ):
239+ scaler = bigframes .ml .preprocessing .MinMaxScaler ()
240+ scaler .fit (penguins_df_default_index ["culmen_length_mm" ])
241+
242+ result = scaler .transform (penguins_df_default_index ["culmen_length_mm" ]).to_pandas ()
243+
244+ # If minmax-scaled correctly, min should be 0 and max should be 1.
245+ for column in result .columns :
246+ assert math .isclose (result [column ].max (), 1.0 , abs_tol = 1e-3 )
247+ assert math .isclose (result [column ].min (), 0.0 , abs_tol = 1e-3 )
248+
249+ result = scaler .transform (new_penguins_df ).to_pandas ()
250+
251+ # TODO: bug? feature columns seem to be in nondeterministic random order
252+ # workaround: sort columns by name. Can't repro it in pantheon, so could
253+ # be a bigframes issue...
254+ result = result .reindex (sorted (result .columns ), axis = 1 )
255+
256+ expected = pd .DataFrame (
257+ {
258+ "min_max_scaled_culmen_length_mm" : [0.269091 , 0.232727 , 0.210909 ],
259+ },
260+ dtype = "Float64" ,
261+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
262+ )
263+
264+ pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
265+
266+
267+ def test_min_max_scaler_normalizes (penguins_df_default_index , new_penguins_df ):
268+ # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.StandardScaler, when BQML's change is in prod.
269+ scaler = bigframes .ml .preprocessing .MinMaxScaler ()
270+ scaler .fit (
271+ penguins_df_default_index [
272+ ["culmen_length_mm" , "culmen_depth_mm" , "flipper_length_mm" ]
273+ ]
274+ )
275+
276+ result = scaler .transform (
277+ penguins_df_default_index [
278+ ["culmen_length_mm" , "culmen_depth_mm" , "flipper_length_mm" ]
279+ ]
280+ ).to_pandas ()
281+
282+ # If minmax-scaled correctly, min should be 0 and max should be 1.
283+ for column in result .columns :
284+ assert math .isclose (result [column ].max (), 1.0 , abs_tol = 1e-3 )
285+ assert math .isclose (result [column ].min (), 0.0 , abs_tol = 1e-3 )
286+
287+ result = scaler .transform (new_penguins_df ).to_pandas ()
288+
289+ # TODO: bug? feature columns seem to be in nondeterministic random order
290+ # workaround: sort columns by name. Can't repro it in pantheon, so could
291+ # be a bigframes issue...
292+ result = result .reindex (sorted (result .columns ), axis = 1 )
293+
294+ expected = pd .DataFrame (
295+ {
296+ "min_max_scaled_culmen_depth_mm" : [0.678571 , 0.4880952 , 0.595238 ],
297+ "min_max_scaled_culmen_length_mm" : [0.269091 , 0.232727 , 0.210909 ],
298+ "min_max_scaled_flipper_length_mm" : [0.40678 , 0.152542 , 0.271186 ],
299+ },
300+ dtype = "Float64" ,
301+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
302+ )
303+
304+ pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
305+
306+
214307def test_one_hot_encoder_default_params (new_penguins_df ):
215308 encoder = bigframes .ml .preprocessing .OneHotEncoder ()
216309 encoder .fit (new_penguins_df [["species" , "sex" ]])
0 commit comments