diff --git a/.ci/scripts/select_sklearn_tests.py b/.ci/scripts/select_sklearn_tests.py index 868e74cee3..c77cee21b6 100644 --- a/.ci/scripts/select_sklearn_tests.py +++ b/.ci/scripts/select_sklearn_tests.py @@ -51,6 +51,7 @@ def parse_tests_tree(entry, prefix=""): "metrics/tests": ["test_pairwise.py", "test_ranking.py"], "model_selection/tests": ["test_split.py", "test_validation.py"], "neighbors/tests": ["test_lof.py", "test_neighbors.py", "test_neighbors_pipeline.py"], + "preprocessing/tests": ["test_common.py", "test_data.py"], "svm/tests": ["test_sparse.py", "test_svm.py"], "tests": "test_dummy.py", } diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 7d0c6c103f..7af0abca22 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -469,6 +469,9 @@ deselected_tests: # CI jobs in sklearnex compile scikit-learn from source, not necessarily with the same toolkits as sklearn's CIs - preprocessing/tests/test_polynomial.py::test_sizeof_LARGEST_INT_t + # sklearn does not support n_jobs in preprocessing estimators + - preprocessing/tests/test_common.py::test_missing_value_handling[est0-maxabs_scale-True-False-omit_kwargs0] + # Fails due to numeric tolerances on some AMD systems - linear_model/tests/test_base.py::test_linear_regression_vs_lstsq[float32] diff --git a/requirements-test.txt b/requirements-test.txt index 538bb01200..c55ff561ac 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,7 +3,7 @@ pytest==9.0.3 ; python_version >= '3.11' pytest-json-report==1.5.0 pytest-cov==7.1.0 pytest-mock==3.15.1 -numpy>=1.21.6 ; python_version <= '3.10' +numpy>=1.21.6,<1.24.0 ; python_version <= '3.10' numpy>=1.23.5 ; python_version == '3.11' numpy>=2.0.0 ; python_version >= '3.12' scikit-learn==1.7.2 ; python_version <= '3.10' diff --git a/setup.py b/setup.py index b22b1d90be..7592388363 100644 --- a/setup.py +++ b/setup.py @@ -555,6 +555,7 @@ class build(onedal_build, orig_build.build): "sklearnex.preview.covariance", "sklearnex.preview.decomposition", "sklearnex.preview.linear_model", + "sklearnex.preview.preprocessing", "sklearnex.svm", "sklearnex.utils", ] @@ -586,7 +587,11 @@ class build(onedal_build, orig_build.build): "sklearnex.spmd.neighbors", ] if ONEDAL_VERSION >= 20230200: - packages_with_tests += ["onedal.spmd.cluster", "sklearnex.spmd.cluster"] + packages_with_tests += [ + "onedal.spmd.cluster", + "sklearnex.spmd.cluster", + "sklearnex.spmd.preprocessing", + ] setup( name="scikit-learn-intelex", diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index af2d2a86b9..bdbfc2857b 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -54,14 +54,17 @@ def get_patch_map_core(preview: bool = False) -> PatchMap: import sklearn.covariance as covariance_module import sklearn.decomposition as decomposition_module + import sklearn.preprocessing as preprocessing_module from sklearn.covariance import EmpiricalCovariance as EmpiricalCovariance_sklearn from sklearn.decomposition import IncrementalPCA as IncrementalPCA_sklearn + from sklearn.preprocessing import MaxAbsScaler as MaxAbsScaler_sklearn # Preview classes for patching from .preview.covariance import ( EmpiricalCovariance as EmpiricalCovariance_sklearnex, ) from .preview.decomposition import IncrementalPCA as IncrementalPCA_sklearnex + from .preview.preprocessing import MaxAbsScaler as MaxAbsScaler_sklearnex # Since the state of the lru_cache without preview cannot be # guaranteed to not have already enabled sklearnex algorithms @@ -82,6 +85,12 @@ def get_patch_map_core(preview: bool = False) -> PatchMap: IncrementalPCA_sklearnex, IncrementalPCA_sklearn, ), + "sklearn.preprocessing.MaxAbsScaler": ( + preprocessing_module, + "MaxAbsScaler", + MaxAbsScaler_sklearnex, + MaxAbsScaler_sklearn, + ), } if daal_check_version((2024, "P", 1)): import sklearn.linear_model as linear_model_module diff --git a/sklearnex/preview/__init__.py b/sklearnex/preview/__init__.py index 769bfbcb4b..7ade9e9a38 100644 --- a/sklearnex/preview/__init__.py +++ b/sklearnex/preview/__init__.py @@ -14,4 +14,4 @@ # limitations under the License. # ============================================================================== -__all__ = ["covariance", "decomposition"] +__all__ = ["covariance", "decomposition", "preprocessing"] diff --git a/sklearnex/preview/preprocessing/__init__.py b/sklearnex/preview/preprocessing/__init__.py new file mode 100644 index 0000000000..d0a8296b38 --- /dev/null +++ b/sklearnex/preview/preprocessing/__init__.py @@ -0,0 +1,19 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from ._data import MaxAbsScaler + +__all__ = ["MaxAbsScaler"] diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py new file mode 100644 index 0000000000..bdc7f56a4e --- /dev/null +++ b/sklearnex/preview/preprocessing/_data.py @@ -0,0 +1,235 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from functools import partial + +from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler +from sklearn.preprocessing._data import _handle_zeros_in_scale +from sklearn.utils.validation import check_array, check_is_fitted + +from daal4py.sklearn._n_jobs_support import control_n_jobs +from daal4py.sklearn._utils import is_sparse, sklearn_check_version +from onedal._device_offload import support_sycl_format +from onedal.basic_statistics import ( + IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, +) + +from ..._device_offload import dispatch, wrap_output_data +from ..._utils import PatchingConditionsChain +from ...base import oneDALEstimator +from ...utils._array_api import enable_array_api, get_namespace +from ...utils.validation import ( + _finite_keyword, + assert_all_finite, + validate_data, +) + +__check_kwargs = { + "dtype": None, + "ensure_2d": False, + "ensure_min_samples": 0, + "ensure_min_features": 0, + "accept_sparse": True, + _finite_keyword: False, +} + +_check_array = partial(check_array, **__check_kwargs) + + +@enable_array_api +@control_n_jobs(decorated_methods=["fit", "partial_fit", "_onedal_finalize_fit"]) +class MaxAbsScaler(oneDALEstimator, _sklearn_MaxAbsScaler): + __doc__ = _sklearn_MaxAbsScaler.__doc__ + + if sklearn_check_version("1.2"): + _parameter_constraints: dict = { + **_sklearn_MaxAbsScaler._parameter_constraints, + } + + def __init__(self, *, copy=True, clip=False): + self.copy = copy + self.clip = clip + self._need_to_finalize = False + + _onedal_incremental_basic_statistics = staticmethod(onedal_IncrementalBasicStatistics) + + def _onedal_supported(self, method_name, *data): + # The patching condition here checks whether the data is fit for oneDAL. + # oneDAL's IncrementalBasicStatistics expects dense input in float32/float64 format. + # MaxAbsScaler in sklearn naturally supports sparse matrices, which creates a scenario + # for a required fallback to standard sklearn if the input is sparse. + + patching_status = PatchingConditionsChain( + f"sklearn.preprocessing.{self.__class__.__name__}.{method_name}" + ) + if method_name in ["fit", "partial_fit"]: + (X,) = data + try: + X_test = _check_array(X) + assert_all_finite(X_test) # minimally verify the data + input_is_finite = True + except ValueError: + input_is_finite = False + patching_status.and_conditions( + [ + (not is_sparse(X), "Sparse input is not supported"), + (input_is_finite, "Non-finite input is not supported."), + ] + ) + + return patching_status + + _onedal_cpu_supported = _onedal_supported + _onedal_gpu_supported = _onedal_supported + + def _onedal_finalize_fit(self, queue=None): + # This function commits the basic statistics and extracts the values we need to compute scale_. + # We need the min_ and max_ to compute the maximum absolute value per feature. + assert hasattr(self, "_onedal_estimator") + self._onedal_estimator.finalize_fit() + + xp, _ = get_namespace(self._onedal_estimator.min_) + + # Calculate the max absolute scaler + min_abs = xp.abs(self._onedal_estimator.min_) + max_abs = xp.abs(self._onedal_estimator.max_) + self.max_abs_ = xp.maximum(min_abs, max_abs) + self.scale_ = _handle_zeros_in_scale(self._max_abs_, copy=True) + + self._need_to_finalize = False + + def _onedal_partial_fit(self, X, queue=None, check_input=True): + # partial_fit updates the internal _onedal_estimator with the present batch of X. + first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 + + # In sklearn, check_input is used to enforce validation. + if check_input: + xp, _ = get_namespace(X) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + reset=first_pass, + ensure_all_finite=False, + ) + + # We keep track of the samples internally as well to mirror scikit-learn. + if first_pass: + self.n_samples_seen_ = X.shape[0] + else: + self.n_samples_seen_ += X.shape[0] + + if not hasattr(self, "_onedal_estimator"): + # We specifically only ask for min and max to save overhead since those are the only two + # statistics required to calculate the max_abs values. + self._onedal_estimator = self._onedal_incremental_basic_statistics( + result_options=["min", "max"] + ) + + self._onedal_estimator.partial_fit(X, queue=queue) + self._need_to_finalize = True + + def _onedal_fit(self, X, queue=None): + # For a full fit, we must reset the estimator and internal sample count to 0, + # mimicking a fresh calculation. + xp, _ = get_namespace(X) + if sklearn_check_version("1.2"): + self._validate_params() + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + ensure_all_finite=False, + ) + + self.n_samples_seen_ = 0 + if hasattr(self, "_onedal_estimator"): + self._onedal_estimator._reset() + + # Execute partial fit just once on the entire dataset. + self._onedal_partial_fit(X, queue=queue, check_input=False) + + # Must compute the actual class attributes from the oneDAL values. + self._onedal_finalize_fit() + + return self + + def partial_fit(self, X, y=None): + # We use dispatch so that validation occurs appropriately. The check_input feature + # acts identically to sklearn's checking strategy, hence passed through. + if sklearn_check_version("1.2"): + self._validate_params() + + # Scikit-Learn implements a check within partial fit natively, so we pass check_input=True implicitly. + dispatch( + self, + "partial_fit", + { + "onedal": self.__class__._onedal_partial_fit, + "sklearn": _sklearn_MaxAbsScaler.partial_fit, + }, + X, + ) + return self + + def fit(self, X, y=None): + if sklearn_check_version("1.2"): + self._validate_params() + + dispatch( + self, + "fit", + { + "onedal": self.__class__._onedal_fit, + "sklearn": _sklearn_MaxAbsScaler.fit, + }, + X, + ) + return self + + # Transform relies completely on standard scikit-learn functionality and does not need to + # be overridden using oneDAL capabilities as the scale vectors are appropriately populated. + transform = support_sycl_format(_sklearn_MaxAbsScaler.transform) + + # Ensure access to the derived properties without manually calling _onedal_finalize_fit + # explicitly from the user. We wrap properties that require a finalized state. + @property + def max_abs_(self): + if hasattr(self, "_onedal_estimator") and self._need_to_finalize: + self._onedal_finalize_fit() + return self._max_abs_ + + @max_abs_.setter + def max_abs_(self, value): + self._max_abs_ = value + + @max_abs_.deleter + def max_abs_(self): + del self._max_abs_ + + @property + def scale_(self): + if hasattr(self, "_onedal_estimator") and self._need_to_finalize: + self._onedal_finalize_fit() + return self._scale_ + + @scale_.setter + def scale_(self, value): + self._scale_ = value + + @scale_.deleter + def scale_(self): + del self._scale_ diff --git a/sklearnex/preview/preprocessing/tests/test_data.py b/sklearnex/preview/preprocessing/tests/test_data.py new file mode 100644 index 0000000000..ffa8d9a376 --- /dev/null +++ b/sklearnex/preview/preprocessing/tests/test_data.py @@ -0,0 +1,131 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import pytest +from numpy.testing import assert_allclose +from scipy import sparse as sp +from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler + +from daal4py.sklearn._utils import sklearn_check_version +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) +from sklearnex import config_context +from sklearnex.preview.preprocessing import MaxAbsScaler + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import_MaxAbsScaler(dataframe, queue): + # Verify that the estimator gets properly imported from sklearnex + rng = np.random.default_rng(seed=42) + X = rng.random((10, 4)) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + est = MaxAbsScaler().fit(X) + assert "sklearnex" in est.__module__ + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_max_abs_scaler_dense_fit_transform(dataframe, queue): + # Test parity with scikit-learn for basic fit_transform behavior + rng = np.random.default_rng(seed=42) + X = rng.standard_normal((50, 5)) + + # Randomly scale some columns to have varying absolute max values + X[:, 0] *= 10 + X[:, 1] *= 0.1 + X[:, 2] += 5 + + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + # Scikit-learn Baseline + scaler_sk = _sklearn_MaxAbsScaler() + X_trans_sk = scaler_sk.fit_transform(X) + + # Sklearnex + scaler_ex = MaxAbsScaler() + X_trans_ex = scaler_ex.fit_transform(X_df) + X_trans_ex_np = _as_numpy(X_trans_ex) + + assert_allclose(scaler_ex.scale_, scaler_sk.scale_) + assert_allclose(scaler_ex.max_abs_, scaler_sk.max_abs_) + assert_allclose(X_trans_ex_np, X_trans_sk) + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_max_abs_scaler_dense_partial_fit(dataframe, queue): + # Test batch processing parity with native scikit-learn + rng = np.random.default_rng(seed=42) + X = rng.standard_normal((100, 3)) + + # create batches + X1, X2, X3 = X[:30], X[30:70], X[70:] + + # Scikit-learn baseline + scaler_sk = _sklearn_MaxAbsScaler() + for batch in [X1, X2, X3]: + scaler_sk.partial_fit(batch) + X_trans_sk = scaler_sk.transform(X) + + # Sklearnex execution + scaler_ex = MaxAbsScaler() + for batch in [X1, X2, X3]: + batch_df = _convert_to_dataframe(batch, sycl_queue=queue, target_df=dataframe) + scaler_ex.partial_fit(batch_df) + + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + X_trans_ex = scaler_ex.transform(X_df) + X_trans_ex_np = _as_numpy(X_trans_ex) + + assert scaler_ex.n_samples_seen_ == scaler_sk.n_samples_seen_ + assert_allclose(scaler_ex.scale_, scaler_sk.scale_) + assert_allclose(scaler_ex.max_abs_, scaler_sk.max_abs_) + assert_allclose(X_trans_ex_np, X_trans_sk) + + +@pytest.mark.skipif( + not sklearn_check_version("1.3"), reason="lacks sklearn array API support" +) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("dpctl,dpnp")) +def test_max_abs_scaler_array_api_dispatch(dataframe, queue): + # Ensure properties are properly constructed as the dispatched arrays using Array API + rng = np.random.default_rng(seed=42) + X = rng.standard_normal((10, 4)) + + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + est = MaxAbsScaler().fit(X_df) + X_trans = est.transform(X_df) + + # Verify the property types respect array api execution outputs. + # The scale_ out typically relies on standard numpy if DPCTL/DPNP isn't requested natively + # via the context namespace, but let's just make sure it behaves normally. + assert hasattr(est, "scale_") + assert hasattr(est, "max_abs_") + + est.scale_ = np.ones(est.scale_.shape) + X_trans_modified = est.transform(X_df) + + X_np = _as_numpy(X_df) + X_trans_modified_np = _as_numpy(X_trans_modified) + + # Testing that after artificially modifying the scaler properties, the transform + # executes normally (just returns the raw variables over 1.0 logic). + assert_allclose(X_np, X_trans_modified_np) diff --git a/sklearnex/spmd/__init__.py b/sklearnex/spmd/__init__.py index 2c60cc2353..21bbce02eb 100644 --- a/sklearnex/spmd/__init__.py +++ b/sklearnex/spmd/__init__.py @@ -22,4 +22,5 @@ "ensemble", "linear_model", "neighbors", + "preprocessing", ] diff --git a/sklearnex/spmd/preprocessing/__init__.py b/sklearnex/spmd/preprocessing/__init__.py new file mode 100644 index 0000000000..d0a8296b38 --- /dev/null +++ b/sklearnex/spmd/preprocessing/__init__.py @@ -0,0 +1,19 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from ._data import MaxAbsScaler + +__all__ = ["MaxAbsScaler"] diff --git a/sklearnex/spmd/preprocessing/_data.py b/sklearnex/spmd/preprocessing/_data.py new file mode 100644 index 0000000000..1b4c5b5c2c --- /dev/null +++ b/sklearnex/spmd/preprocessing/_data.py @@ -0,0 +1,27 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from onedal.spmd.basic_statistics import ( + IncrementalBasicStatistics as onedalSPMD_IncrementalBasicStatistics, +) + +from ...preview.preprocessing import MaxAbsScaler as base_MaxAbsScaler + + +class MaxAbsScaler(base_MaxAbsScaler): + _onedal_incremental_basic_statistics = staticmethod( + onedalSPMD_IncrementalBasicStatistics + ) diff --git a/sklearnex/spmd/preprocessing/tests/test_data_spmd.py b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py new file mode 100644 index 0000000000..02a36c1120 --- /dev/null +++ b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py @@ -0,0 +1,114 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) +from sklearnex.tests.utils.spmd import _get_local_tensor, _mpi_libs_and_gpu_available + + +@pytest.mark.skipif( + not _mpi_libs_and_gpu_available, + reason="GPU device and MPI libs required for test", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), +) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.mpi +def test_max_abs_scaler_fit_spmd_gold(dataframe, queue, dtype): + from sklearnex.preview.preprocessing import MaxAbsScaler + from sklearnex.spmd.preprocessing import MaxAbsScaler as MaxAbsScaler_SPMD + + data = np.array( + [ + [-10.0, 0.0, 3.0], + [2.0, -1.0, 2.0], + [5.0, 2.0, -4.0], + [1.0, 3.0, 8.0], + [8.0, -4.0, 1.0], + [-1.0, 5.0, 2.0], + [-5.0, -6.0, 64.0], + [2.0, 1.0, -128.0], + ], + dtype=dtype, + ) + dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe) + + local_dpt_data = _convert_to_dataframe( + _get_local_tensor(data), sycl_queue=queue, target_df=dataframe + ) + + # ensure results of batch algo match spmd + scaler_spmd = MaxAbsScaler_SPMD().fit(local_dpt_data) + scaler = MaxAbsScaler().fit(dpt_data) + + assert_allclose(scaler_spmd.scale_, scaler.scale_) + assert_allclose(scaler_spmd.max_abs_, scaler.max_abs_) + + +@pytest.mark.skipif( + not _mpi_libs_and_gpu_available, + reason="GPU device and MPI libs required for test", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), +) +@pytest.mark.parametrize("num_blocks", [1, 2]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.mpi +def test_max_abs_scaler_partial_fit_spmd_gold(dataframe, queue, num_blocks, dtype): + from sklearnex.preview.preprocessing import MaxAbsScaler + from sklearnex.spmd.preprocessing import MaxAbsScaler as MaxAbsScaler_SPMD + + data = np.array( + [ + [-1.0, 3.0, 0.0], + [0.5, 1.0, -2.0], + [4.0, 2.0, 4.0], + [-3.0, -3.0, 8.0], + [5.0, 4.0, -16.0], + [2.0, -5.0, 32.0], + [1.0, -6.0, -64.0], + [-7.0, 8.0, 128.0], + ], + dtype=dtype, + ) + dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe) + local_data = _get_local_tensor(data) + split_local_data = np.array_split(local_data, num_blocks) + + scaler_spmd = MaxAbsScaler_SPMD() + scaler = MaxAbsScaler() + + for i in range(num_blocks): + local_dpt_data = _convert_to_dataframe( + split_local_data[i], sycl_queue=queue, target_df=dataframe + ) + scaler_spmd.partial_fit(local_dpt_data) + + scaler.fit(dpt_data) + + assert_allclose(scaler_spmd.scale_, scaler.scale_) + assert_allclose(scaler_spmd.max_abs_, scaler.max_abs_)