From 45613ff6b8089d1a12d5048925dade1c732d3010 Mon Sep 17 00:00:00 2001 From: icfaust Date: Thu, 12 Mar 2026 21:30:26 +0100 Subject: [PATCH 01/25] test with antigravity --- setup.py | 1 + sklearnex/dispatcher.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/setup.py b/setup.py index b22b1d90be..962a061a62 100644 --- a/setup.py +++ b/setup.py @@ -555,6 +555,7 @@ class build(onedal_build, orig_build.build): "sklearnex.preview.covariance", "sklearnex.preview.decomposition", "sklearnex.preview.linear_model", + "sklearnex.preview.preprocessing", "sklearnex.svm", "sklearnex.utils", ] diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index af2d2a86b9..bdbfc2857b 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -54,14 +54,17 @@ def get_patch_map_core(preview: bool = False) -> PatchMap: import sklearn.covariance as covariance_module import sklearn.decomposition as decomposition_module + import sklearn.preprocessing as preprocessing_module from sklearn.covariance import EmpiricalCovariance as EmpiricalCovariance_sklearn from sklearn.decomposition import IncrementalPCA as IncrementalPCA_sklearn + from sklearn.preprocessing import MaxAbsScaler as MaxAbsScaler_sklearn # Preview classes for patching from .preview.covariance import ( EmpiricalCovariance as EmpiricalCovariance_sklearnex, ) from .preview.decomposition import IncrementalPCA as IncrementalPCA_sklearnex + from .preview.preprocessing import MaxAbsScaler as MaxAbsScaler_sklearnex # Since the state of the lru_cache without preview cannot be # guaranteed to not have already enabled sklearnex algorithms @@ -82,6 +85,12 @@ def get_patch_map_core(preview: bool = False) -> PatchMap: IncrementalPCA_sklearnex, IncrementalPCA_sklearn, ), + "sklearn.preprocessing.MaxAbsScaler": ( + preprocessing_module, + "MaxAbsScaler", + MaxAbsScaler_sklearnex, + MaxAbsScaler_sklearn, + ), } if daal_check_version((2024, "P", 1)): import sklearn.linear_model as linear_model_module From 3520f0b93af8ab5141222bc8a3d9a798cbde14a7 Mon Sep 17 00:00:00 2001 From: icfaust Date: Thu, 12 Mar 2026 21:36:22 +0100 Subject: [PATCH 02/25] add missing spmd interface --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 962a061a62..7592388363 100644 --- a/setup.py +++ b/setup.py @@ -587,7 +587,11 @@ class build(onedal_build, orig_build.build): "sklearnex.spmd.neighbors", ] if ONEDAL_VERSION >= 20230200: - packages_with_tests += ["onedal.spmd.cluster", "sklearnex.spmd.cluster"] + packages_with_tests += [ + "onedal.spmd.cluster", + "sklearnex.spmd.cluster", + "sklearnex.spmd.preprocessing", + ] setup( name="scikit-learn-intelex", From 3af4717cd928c89e28aa35e721fcde38d0baa46c Mon Sep 17 00:00:00 2001 From: icfaust Date: Thu, 12 Mar 2026 21:37:47 +0100 Subject: [PATCH 03/25] add missing files --- sklearnex/preview/preprocessing/__init__.py | 19 ++ sklearnex/preview/preprocessing/_data.py | 216 ++++++++++++++++++ .../preview/preprocessing/tests/test_data.py | 134 +++++++++++ sklearnex/spmd/preprocessing/__init__.py | 19 ++ sklearnex/spmd/preprocessing/_data.py | 27 +++ .../preprocessing/tests/test_data_spmd.py | 125 ++++++++++ 6 files changed, 540 insertions(+) create mode 100644 sklearnex/preview/preprocessing/__init__.py create mode 100644 sklearnex/preview/preprocessing/_data.py create mode 100644 sklearnex/preview/preprocessing/tests/test_data.py create mode 100644 sklearnex/spmd/preprocessing/__init__.py create mode 100644 sklearnex/spmd/preprocessing/_data.py create mode 100644 sklearnex/spmd/preprocessing/tests/test_data_spmd.py diff --git a/sklearnex/preview/preprocessing/__init__.py b/sklearnex/preview/preprocessing/__init__.py new file mode 100644 index 0000000000..d0a8296b38 --- /dev/null +++ b/sklearnex/preview/preprocessing/__init__.py @@ -0,0 +1,19 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from ._data import MaxAbsScaler + +__all__ = ["MaxAbsScaler"] diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py new file mode 100644 index 0000000000..1f1c7cd6f4 --- /dev/null +++ b/sklearnex/preview/preprocessing/_data.py @@ -0,0 +1,216 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler +from sklearn.utils.validation import check_is_fitted + +from daal4py.sklearn._n_jobs_support import control_n_jobs +from daal4py.sklearn._utils import is_sparse, sklearn_check_version +from onedal.basic_statistics import ( + IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, +) + +from ..._config import get_config +from ..._device_offload import dispatch, wrap_output_data +from ..._utils import PatchingConditionsChain +from ...base import oneDALEstimator +from ...utils._array_api import enable_array_api, get_namespace +from ...utils.validation import validate_data + + +@enable_array_api +@control_n_jobs(decorated_methods=["fit", "partial_fit", "_onedal_finalize_fit"]) +class MaxAbsScaler(oneDALEstimator, _sklearn_MaxAbsScaler): + __doc__ = _sklearn_MaxAbsScaler.__doc__ + + if sklearn_check_version("1.2"): + _parameter_constraints: dict = { + **_sklearn_MaxAbsScaler._parameter_constraints, + } + + def __init__(self, *, copy=True): + self.copy = copy + self._need_to_finalize = False + + _onedal_incremental_basic_statistics = staticmethod( + onedal_IncrementalBasicStatistics + ) + + def _onedal_supported(self, method_name, *data): + # The patching condition here checks whether the data is fit for oneDAL. + # oneDAL's IncrementalBasicStatistics expects dense input in float32/float64 format. + # MaxAbsScaler in sklearn naturally supports sparse matrices, which creates a scenario + # for a required fallback to standard sklearn if the input is sparse. + + patching_status = PatchingConditionsChain( + f"sklearn.preprocessing.{self.__class__.__name__}.{method_name}" + ) + if method_name in ["fit", "partial_fit"]: + (X,) = data + patching_status.and_conditions( + [ + (not is_sparse(X), "Sparse input is not supported"), + ] + ) + + return patching_status + + _onedal_cpu_supported = _onedal_supported + _onedal_gpu_supported = _onedal_supported + + def _onedal_finalize_fit(self, queue=None): + # This function commits the basic statistics and extracts the values we need to compute scale_. + # We need the min_ and max_ to compute the maximum absolute value per feature. + assert hasattr(self, "_onedal_estimator") + self._onedal_estimator.finalize_fit() + + xp, _ = get_namespace(self._onedal_estimator.min_) + + self.n_samples_seen_ = self._onedal_estimator.n_samples_seen_ + + # Calculate the max absolute scaler + min_abs = xp.abs(self._onedal_estimator.min_) + max_abs = xp.abs(self._onedal_estimator.max_) + self.max_abs_ = xp.maximum(min_abs, max_abs) + self.scale_ = xp.where(self.max_abs_ == 0, 1.0, self.max_abs_) + + self._need_to_finalize = False + + def _onedal_partial_fit(self, X, queue=None, check_input=True): + # partial_fit updates the internal _onedal_estimator with the present batch of X. + first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 + + # In sklearn, check_input is used to enforce validation. In combination with use_raw_input config + # it controls validation of inputs. + if check_input and not get_config()["use_raw_input"]: + xp, _ = get_namespace(X) + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + reset=first_pass, + ) + + # We keep track of the samples internally as well to mirror scikit-learn. + if first_pass: + self.n_features_in_ = X.shape[1] + self.n_samples_seen_ = X.shape[0] + else: + self.n_samples_seen_ += X.shape[0] + + if not hasattr(self, "_onedal_estimator"): + # We specifically only ask for min and max to save overhead since those are the only two + # statistics required to calculate the max_abs values. + self._onedal_estimator = self._onedal_incremental_basic_statistics( + result_options=["min", "max"] + ) + + self._onedal_estimator.partial_fit(X, queue=queue) + self._need_to_finalize = True + + def _onedal_fit(self, X, queue=None): + # For a full fit, we must reset the estimator and internal sample count to 0, + # mimicking a fresh calculation. + if not get_config()["use_raw_input"]: + xp, _ = get_namespace(X) + if sklearn_check_version("1.2"): + self._validate_params() + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + ) + else: + self.n_features_in_ = X.shape[1] + + self.n_samples_seen_ = 0 + if hasattr(self, "_onedal_estimator"): + self._onedal_estimator._reset() + + # Execute partial fit just once on the entire dataset. + self._onedal_partial_fit(X, queue=queue, check_input=False) + + # Must compute the actual class attributes from the oneDAL values. + self._onedal_finalize_fit() + + return self + + def partial_fit(self, X, y=None): + # We use dispatch so that validation occurs appropriately. The check_input feature + # acts identically to sklearn's checking strategy, hence passed through. + if sklearn_check_version("1.2"): + self._validate_params() + + # Scikit-Learn implements a check within partial fit natively, so we pass check_input=True implicitly. + dispatch( + self, + "partial_fit", + { + "onedal": self.__class__._onedal_partial_fit, + "sklearn": _sklearn_MaxAbsScaler.partial_fit, + }, + X, + ) + return self + + def fit(self, X, y=None): + if sklearn_check_version("1.2"): + self._validate_params() + + dispatch( + self, + "fit", + { + "onedal": self.__class__._onedal_fit, + "sklearn": _sklearn_MaxAbsScaler.fit, + }, + X, + ) + return self + + # Transform relies completely on standard scikit-learn functionality and does not need to + # be overridden using oneDAL capabilities as the scale vectors are appropriately populated. + + # Ensure access to the derived properties without manually calling _onedal_finalize_fit + # explicitly from the user. We wrap properties that require a finalized state. + @property + def max_abs_(self): + if hasattr(self, "_onedal_estimator") and self._need_to_finalize: + self._onedal_finalize_fit() + return self._max_abs_ + + @max_abs_.setter + def max_abs_(self, value): + self._max_abs_ = value + + @max_abs_.deleter + def max_abs_(self): + del self._max_abs_ + + @property + def scale_(self): + if hasattr(self, "_onedal_estimator") and self._need_to_finalize: + self._onedal_finalize_fit() + return self._scale_ + + @scale_.setter + def scale_(self, value): + self._scale_ = value + + @scale_.deleter + def scale_(self): + del self._scale_ diff --git a/sklearnex/preview/preprocessing/tests/test_data.py b/sklearnex/preview/preprocessing/tests/test_data.py new file mode 100644 index 0000000000..c36ee2c5d7 --- /dev/null +++ b/sklearnex/preview/preprocessing/tests/test_data.py @@ -0,0 +1,134 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import pytest +from numpy.testing import assert_allclose +from scipy import sparse as sp +from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler + +from daal4py.sklearn._utils import sklearn_check_version +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) +from sklearnex import config_context +from sklearnex.preview.preprocessing import MaxAbsScaler + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import_MaxAbsScaler(dataframe, queue): + # Verify that the estimator gets properly imported from sklearnex + rng = np.random.default_rng(seed=42) + X = rng.random((10, 4)) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + est = MaxAbsScaler().fit(X) + assert "sklearnex" in est.__module__ + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_max_abs_scaler_dense_fit_transform(dataframe, queue): + # Test parity with scikit-learn for basic fit_transform behavior + rng = np.random.default_rng(seed=42) + X = rng.standard_normal((50, 5)) + + # Randomly scale some columns to have varying absolute max values + X[:, 0] *= 10 + X[:, 1] *= 0.1 + X[:, 2] += 5 + + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + # Scikit-learn Baseline + scaler_sk = _sklearn_MaxAbsScaler() + X_trans_sk = scaler_sk.fit_transform(X) + + # Sklearnex + scaler_ex = MaxAbsScaler() + X_trans_ex = scaler_ex.fit_transform(X_df) + X_trans_ex_np = _as_numpy(X_trans_ex) + + assert_allclose(scaler_ex.scale_, scaler_sk.scale_) + assert_allclose(scaler_ex.max_abs_, scaler_sk.max_abs_) + assert_allclose(X_trans_ex_np, X_trans_sk) + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_max_abs_scaler_dense_partial_fit(dataframe, queue): + # Test batch processing parity with native scikit-learn + rng = np.random.default_rng(seed=42) + X = rng.standard_normal((100, 3)) + + # create batches + X1, X2, X3 = X[:30], X[30:70], X[70:] + + # Scikit-learn baseline + scaler_sk = _sklearn_MaxAbsScaler() + for batch in [X1, X2, X3]: + scaler_sk.partial_fit(batch) + X_trans_sk = scaler_sk.transform(X) + + # Sklearnex execution + scaler_ex = MaxAbsScaler() + for batch in [X1, X2, X3]: + batch_df = _convert_to_dataframe(batch, sycl_queue=queue, target_df=dataframe) + scaler_ex.partial_fit(batch_df) + + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + X_trans_ex = scaler_ex.transform(X_df) + X_trans_ex_np = _as_numpy(X_trans_ex) + + assert scaler_ex.n_samples_seen_ == scaler_sk.n_samples_seen_ + assert_allclose(scaler_ex.scale_, scaler_sk.scale_) + assert_allclose(scaler_ex.max_abs_, scaler_sk.max_abs_) + assert_allclose(X_trans_ex_np, X_trans_sk) + + + + + +@pytest.mark.skipif( + not sklearn_check_version("1.3"), reason="lacks sklearn array API support" +) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues("dpctl,dpnp")) +def test_max_abs_scaler_array_api_dispatch(dataframe, queue): + # Ensure properties are properly constructed as the dispatched arrays using Array API + rng = np.random.default_rng(seed=42) + X = rng.standard_normal((10, 4)) + + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + est = MaxAbsScaler().fit(X_df) + X_trans = est.transform(X_df) + + # Verify the property types respect array api execution outputs. + # The scale_ out typically relies on standard numpy if DPCTL/DPNP isn't requested natively + # via the context namespace, but let's just make sure it behaves normally. + assert hasattr(est, "scale_") + assert hasattr(est, "max_abs_") + + est.scale_ = np.ones(est.scale_.shape) + X_trans_modified = est.transform(X_df) + + X_np = _as_numpy(X_df) + X_trans_modified_np = _as_numpy(X_trans_modified) + + # Testing that after artificially modifying the scaler properties, the transform + # executes normally (just returns the raw variables over 1.0 logic). + assert_allclose(X_np, X_trans_modified_np) diff --git a/sklearnex/spmd/preprocessing/__init__.py b/sklearnex/spmd/preprocessing/__init__.py new file mode 100644 index 0000000000..d0a8296b38 --- /dev/null +++ b/sklearnex/spmd/preprocessing/__init__.py @@ -0,0 +1,19 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from ._data import MaxAbsScaler + +__all__ = ["MaxAbsScaler"] diff --git a/sklearnex/spmd/preprocessing/_data.py b/sklearnex/spmd/preprocessing/_data.py new file mode 100644 index 0000000000..1b4c5b5c2c --- /dev/null +++ b/sklearnex/spmd/preprocessing/_data.py @@ -0,0 +1,27 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from onedal.spmd.basic_statistics import ( + IncrementalBasicStatistics as onedalSPMD_IncrementalBasicStatistics, +) + +from ...preview.preprocessing import MaxAbsScaler as base_MaxAbsScaler + + +class MaxAbsScaler(base_MaxAbsScaler): + _onedal_incremental_basic_statistics = staticmethod( + onedalSPMD_IncrementalBasicStatistics + ) diff --git a/sklearnex/spmd/preprocessing/tests/test_data_spmd.py b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py new file mode 100644 index 0000000000..b0cc972a5e --- /dev/null +++ b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py @@ -0,0 +1,125 @@ +# ============================================================================== +# Copyright Contributors to the oneDAL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) +from sklearnex.tests.utils.spmd import ( + _get_local_tensor, + _mpi_libs_and_gpu_available, +) + + +@pytest.mark.skipif( + not _mpi_libs_and_gpu_available, + reason="GPU device and MPI libs required for test", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), +) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.mpi +def test_max_abs_scaler_fit_spmd_gold(dataframe, queue, dtype): + from sklearnex.preview.preprocessing import MaxAbsScaler + from sklearnex.spmd.preprocessing import ( + MaxAbsScaler as MaxAbsScaler_SPMD, + ) + + data = np.array( + [ + [-10.0, 0.0, 3.0], + [2.0, -1.0, 2.0], + [5.0, 2.0, -4.0], + [1.0, 3.0, 8.0], + [8.0, -4.0, 1.0], + [-1.0, 5.0, 2.0], + [-5.0, -6.0, 64.0], + [2.0, 1.0, -128.0], + ], + dtype=dtype, + ) + dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe) + + local_dpt_data = _convert_to_dataframe( + _get_local_tensor(data), sycl_queue=queue, target_df=dataframe + ) + + # ensure results of batch algo match spmd + scaler_spmd = MaxAbsScaler_SPMD().fit(local_dpt_data) + scaler = MaxAbsScaler().fit(dpt_data) + + assert_allclose(scaler_spmd.scale_, scaler.scale_) + assert_allclose(scaler_spmd.max_abs_, scaler.max_abs_) + assert scaler_spmd.n_samples_seen_ == scaler.n_samples_seen_ + + +@pytest.mark.skipif( + not _mpi_libs_and_gpu_available, + reason="GPU device and MPI libs required for test", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), +) +@pytest.mark.parametrize("num_blocks", [1, 2]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.mpi +def test_max_abs_scaler_partial_fit_spmd_gold( + dataframe, queue, num_blocks, dtype +): + from sklearnex.preview.preprocessing import MaxAbsScaler + from sklearnex.spmd.preprocessing import ( + MaxAbsScaler as MaxAbsScaler_SPMD, + ) + + data = np.array( + [ + [-1.0, 3.0, 0.0], + [0.5, 1.0, -2.0], + [4.0, 2.0, 4.0], + [-3.0, -3.0, 8.0], + [5.0, 4.0, -16.0], + [2.0, -5.0, 32.0], + [1.0, -6.0, -64.0], + [-7.0, 8.0, 128.0], + ], + dtype=dtype, + ) + dpt_data = _convert_to_dataframe(data, sycl_queue=queue, target_df=dataframe) + local_data = _get_local_tensor(data) + split_local_data = np.array_split(local_data, num_blocks) + + scaler_spmd = MaxAbsScaler_SPMD() + scaler = MaxAbsScaler() + + for i in range(num_blocks): + local_dpt_data = _convert_to_dataframe( + split_local_data[i], sycl_queue=queue, target_df=dataframe + ) + scaler_spmd.partial_fit(local_dpt_data) + + scaler.fit(dpt_data) + + assert_allclose(scaler_spmd.scale_, scaler.scale_) + assert_allclose(scaler_spmd.max_abs_, scaler.max_abs_) + assert scaler_spmd.n_samples_seen_ == scaler.n_samples_seen_ From 470fcc4ddec0898fa5110aa27d281f9bae6c9b07 Mon Sep 17 00:00:00 2001 From: icfaust Date: Thu, 12 Mar 2026 21:41:08 +0100 Subject: [PATCH 04/25] add missing tests from public sklearn conformance --- .ci/scripts/select_sklearn_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/scripts/select_sklearn_tests.py b/.ci/scripts/select_sklearn_tests.py index 868e74cee3..c77cee21b6 100644 --- a/.ci/scripts/select_sklearn_tests.py +++ b/.ci/scripts/select_sklearn_tests.py @@ -51,6 +51,7 @@ def parse_tests_tree(entry, prefix=""): "metrics/tests": ["test_pairwise.py", "test_ranking.py"], "model_selection/tests": ["test_split.py", "test_validation.py"], "neighbors/tests": ["test_lof.py", "test_neighbors.py", "test_neighbors_pipeline.py"], + "preprocessing/tests": ["test_common.py", "test_data.py"], "svm/tests": ["test_sparse.py", "test_svm.py"], "tests": "test_dummy.py", } From bfce75d2a969e26f0753f3d98c951e73682f355b Mon Sep 17 00:00:00 2001 From: icfaust Date: Thu, 12 Mar 2026 21:54:47 +0100 Subject: [PATCH 05/25] fixes for linting? --- sklearnex/preview/preprocessing/_data.py | 10 ++++------ .../preview/preprocessing/tests/test_data.py | 17 +++++++---------- .../spmd/preprocessing/tests/test_data_spmd.py | 17 ++++------------- 3 files changed, 15 insertions(+), 29 deletions(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 1f1c7cd6f4..f4c5a6a320 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -46,9 +46,7 @@ def __init__(self, *, copy=True): self.copy = copy self._need_to_finalize = False - _onedal_incremental_basic_statistics = staticmethod( - onedal_IncrementalBasicStatistics - ) + _onedal_incremental_basic_statistics = staticmethod(onedal_IncrementalBasicStatistics) def _onedal_supported(self, method_name, *data): # The patching condition here checks whether the data is fit for oneDAL. @@ -94,7 +92,7 @@ def _onedal_partial_fit(self, X, queue=None, check_input=True): # partial_fit updates the internal _onedal_estimator with the present batch of X. first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 - # In sklearn, check_input is used to enforce validation. In combination with use_raw_input config + # In sklearn, check_input is used to enforce validation. In combination with use_raw_input config # it controls validation of inputs. if check_input and not get_config()["use_raw_input"]: xp, _ = get_namespace(X) @@ -154,7 +152,7 @@ def partial_fit(self, X, y=None): # acts identically to sklearn's checking strategy, hence passed through. if sklearn_check_version("1.2"): self._validate_params() - + # Scikit-Learn implements a check within partial fit natively, so we pass check_input=True implicitly. dispatch( self, @@ -182,7 +180,7 @@ def fit(self, X, y=None): ) return self - # Transform relies completely on standard scikit-learn functionality and does not need to + # Transform relies completely on standard scikit-learn functionality and does not need to # be overridden using oneDAL capabilities as the scale vectors are appropriately populated. # Ensure access to the derived properties without manually calling _onedal_finalize_fit diff --git a/sklearnex/preview/preprocessing/tests/test_data.py b/sklearnex/preview/preprocessing/tests/test_data.py index c36ee2c5d7..ffa8d9a376 100644 --- a/sklearnex/preview/preprocessing/tests/test_data.py +++ b/sklearnex/preview/preprocessing/tests/test_data.py @@ -46,7 +46,7 @@ def test_max_abs_scaler_dense_fit_transform(dataframe, queue): # Test parity with scikit-learn for basic fit_transform behavior rng = np.random.default_rng(seed=42) X = rng.standard_normal((50, 5)) - + # Randomly scale some columns to have varying absolute max values X[:, 0] *= 10 X[:, 1] *= 0.1 @@ -73,7 +73,7 @@ def test_max_abs_scaler_dense_partial_fit(dataframe, queue): # Test batch processing parity with native scikit-learn rng = np.random.default_rng(seed=42) X = rng.standard_normal((100, 3)) - + # create batches X1, X2, X3 = X[:30], X[30:70], X[70:] @@ -99,9 +99,6 @@ def test_max_abs_scaler_dense_partial_fit(dataframe, queue): assert_allclose(X_trans_ex_np, X_trans_sk) - - - @pytest.mark.skipif( not sklearn_check_version("1.3"), reason="lacks sklearn array API support" ) @@ -110,7 +107,7 @@ def test_max_abs_scaler_array_api_dispatch(dataframe, queue): # Ensure properties are properly constructed as the dispatched arrays using Array API rng = np.random.default_rng(seed=42) X = rng.standard_normal((10, 4)) - + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) with config_context(array_api_dispatch=True): @@ -122,13 +119,13 @@ def test_max_abs_scaler_array_api_dispatch(dataframe, queue): # via the context namespace, but let's just make sure it behaves normally. assert hasattr(est, "scale_") assert hasattr(est, "max_abs_") - + est.scale_ = np.ones(est.scale_.shape) X_trans_modified = est.transform(X_df) - + X_np = _as_numpy(X_df) X_trans_modified_np = _as_numpy(X_trans_modified) - - # Testing that after artificially modifying the scaler properties, the transform + + # Testing that after artificially modifying the scaler properties, the transform # executes normally (just returns the raw variables over 1.0 logic). assert_allclose(X_np, X_trans_modified_np) diff --git a/sklearnex/spmd/preprocessing/tests/test_data_spmd.py b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py index b0cc972a5e..1c8b5b5379 100644 --- a/sklearnex/spmd/preprocessing/tests/test_data_spmd.py +++ b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py @@ -23,10 +23,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from sklearnex.tests.utils.spmd import ( - _get_local_tensor, - _mpi_libs_and_gpu_available, -) +from sklearnex.tests.utils.spmd import _get_local_tensor, _mpi_libs_and_gpu_available @pytest.mark.skipif( @@ -41,9 +38,7 @@ @pytest.mark.mpi def test_max_abs_scaler_fit_spmd_gold(dataframe, queue, dtype): from sklearnex.preview.preprocessing import MaxAbsScaler - from sklearnex.spmd.preprocessing import ( - MaxAbsScaler as MaxAbsScaler_SPMD, - ) + from sklearnex.spmd.preprocessing import MaxAbsScaler as MaxAbsScaler_SPMD data = np.array( [ @@ -84,13 +79,9 @@ def test_max_abs_scaler_fit_spmd_gold(dataframe, queue, dtype): @pytest.mark.parametrize("num_blocks", [1, 2]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.mpi -def test_max_abs_scaler_partial_fit_spmd_gold( - dataframe, queue, num_blocks, dtype -): +def test_max_abs_scaler_partial_fit_spmd_gold(dataframe, queue, num_blocks, dtype): from sklearnex.preview.preprocessing import MaxAbsScaler - from sklearnex.spmd.preprocessing import ( - MaxAbsScaler as MaxAbsScaler_SPMD, - ) + from sklearnex.spmd.preprocessing import MaxAbsScaler as MaxAbsScaler_SPMD data = np.array( [ From 983aa4809e34ec91fac3223e78d002ecbbdc6374 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Thu, 12 Mar 2026 22:16:52 +0100 Subject: [PATCH 06/25] forgot to add to __init__.py --- sklearnex/spmd/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearnex/spmd/__init__.py b/sklearnex/spmd/__init__.py index 2c60cc2353..21bbce02eb 100644 --- a/sklearnex/spmd/__init__.py +++ b/sklearnex/spmd/__init__.py @@ -22,4 +22,5 @@ "ensemble", "linear_model", "neighbors", + "preprocessing", ] From 7049aff8296ba9bdfc0e27865488b0e82ce498a9 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Thu, 12 Mar 2026 22:17:25 +0100 Subject: [PATCH 07/25] Update __init__.py --- sklearnex/preview/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/preview/__init__.py b/sklearnex/preview/__init__.py index 769bfbcb4b..7ade9e9a38 100644 --- a/sklearnex/preview/__init__.py +++ b/sklearnex/preview/__init__.py @@ -14,4 +14,4 @@ # limitations under the License. # ============================================================================== -__all__ = ["covariance", "decomposition"] +__all__ = ["covariance", "decomposition", "preprocessing"] From 76029c72687f910472b348e99fb507b82282e282 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 09:42:44 +0100 Subject: [PATCH 08/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index f4c5a6a320..8a11d8fe31 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -76,9 +76,7 @@ def _onedal_finalize_fit(self, queue=None): assert hasattr(self, "_onedal_estimator") self._onedal_estimator.finalize_fit() - xp, _ = get_namespace(self._onedal_estimator.min_) - - self.n_samples_seen_ = self._onedal_estimator.n_samples_seen_ + xp, _ = get_namespace(self._onedal_estimator.min) # Calculate the max absolute scaler min_abs = xp.abs(self._onedal_estimator.min_) @@ -105,7 +103,6 @@ def _onedal_partial_fit(self, X, queue=None, check_input=True): # We keep track of the samples internally as well to mirror scikit-learn. if first_pass: - self.n_features_in_ = X.shape[1] self.n_samples_seen_ = X.shape[0] else: self.n_samples_seen_ += X.shape[0] @@ -135,7 +132,7 @@ def _onedal_fit(self, X, queue=None): else: self.n_features_in_ = X.shape[1] - self.n_samples_seen_ = 0 + self.n_samples_seen_ = X.shape[0] if hasattr(self, "_onedal_estimator"): self._onedal_estimator._reset() From 1d328b09edc73bd728a47f559fee85e2b259f21e Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 10:27:33 +0100 Subject: [PATCH 09/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 8a11d8fe31..7915b23151 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -76,7 +76,7 @@ def _onedal_finalize_fit(self, queue=None): assert hasattr(self, "_onedal_estimator") self._onedal_estimator.finalize_fit() - xp, _ = get_namespace(self._onedal_estimator.min) + xp, _ = get_namespace(self._onedal_estimator.min_) # Calculate the max absolute scaler min_abs = xp.abs(self._onedal_estimator.min_) From af819ecabbdb7bbc7d351b9e5492b036c51f591a Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 12:10:41 +0100 Subject: [PATCH 10/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 7915b23151..3ec2675e0b 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -82,7 +82,7 @@ def _onedal_finalize_fit(self, queue=None): min_abs = xp.abs(self._onedal_estimator.min_) max_abs = xp.abs(self._onedal_estimator.max_) self.max_abs_ = xp.maximum(min_abs, max_abs) - self.scale_ = xp.where(self.max_abs_ == 0, 1.0, self.max_abs_) + self.scale_ = xp.where(self._max_abs_ == 0, 1.0, self._max_abs_) self._need_to_finalize = False From 637a6a88c95222e23dceab2bf9ef4ba6ba6ef3ac Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 13:31:01 +0100 Subject: [PATCH 11/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 3ec2675e0b..b4a88600d7 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -132,12 +132,11 @@ def _onedal_fit(self, X, queue=None): else: self.n_features_in_ = X.shape[1] - self.n_samples_seen_ = X.shape[0] if hasattr(self, "_onedal_estimator"): self._onedal_estimator._reset() # Execute partial fit just once on the entire dataset. - self._onedal_partial_fit(X, queue=queue, check_input=False) + self._onedal_partial_fit(X, queue=queue, check_input=False, first_pass=True) # Must compute the actual class attributes from the oneDAL values. self._onedal_finalize_fit() From c89d629d3df7ae519c7e612ae9e52a7fc08a641e Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 14:00:50 +0100 Subject: [PATCH 12/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index b4a88600d7..1b13c6d871 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -132,11 +132,12 @@ def _onedal_fit(self, X, queue=None): else: self.n_features_in_ = X.shape[1] + self.n_samples_seen_ = 0 if hasattr(self, "_onedal_estimator"): self._onedal_estimator._reset() # Execute partial fit just once on the entire dataset. - self._onedal_partial_fit(X, queue=queue, check_input=False, first_pass=True) + self._onedal_partial_fit(X, queue=queue, check_input=False) # Must compute the actual class attributes from the oneDAL values. self._onedal_finalize_fit() From cb7db1682a38f6e485df9e6f85a7ef765ebe072b Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 20:59:37 +0100 Subject: [PATCH 13/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 31 +++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 1b13c6d871..f8701071b7 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -14,12 +14,12 @@ # limitations under the License. # ============================================================================== -import numpy as np from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler from sklearn.utils.validation import check_is_fitted from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import is_sparse, sklearn_check_version +from onedal._device_offload import support_sycl_format from onedal.basic_statistics import ( IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, ) @@ -29,7 +29,22 @@ from ..._utils import PatchingConditionsChain from ...base import oneDALEstimator from ...utils._array_api import enable_array_api, get_namespace -from ...utils.validation import validate_data +from ..utils.validation import ( + _finite_keyword, + assert_all_finite, + validate_data, +) + +__check_kwargs = { + "dtype": None, + "ensure_2d": False, + "ensure_min_samples": 0, + "ensure_min_features": 0, + "accept_sparse": True, + _finite_keyword: False, +} + +_check_array = partial(check_array, **__check_kwargs) @enable_array_api @@ -59,9 +74,16 @@ def _onedal_supported(self, method_name, *data): ) if method_name in ["fit", "partial_fit"]: (X,) = data + try: + X_test = _check_array(X) + assert_all_finite(X_test) # minimally verify the data + input_is_finite = True + except ValueError: + input_is_finite = False patching_status.and_conditions( [ (not is_sparse(X), "Sparse input is not supported"), + (input_is_finite, "Non-finite input is not supported."), ] ) @@ -99,6 +121,7 @@ def _onedal_partial_fit(self, X, queue=None, check_input=True): X, dtype=[xp.float64, xp.float32], reset=first_pass, + ensure_all_finite=False, ) # We keep track of the samples internally as well to mirror scikit-learn. @@ -128,6 +151,7 @@ def _onedal_fit(self, X, queue=None): self, X, dtype=[xp.float64, xp.float32], + ensure_all_finite=False, ) else: self.n_features_in_ = X.shape[1] @@ -179,7 +203,8 @@ def fit(self, X, y=None): # Transform relies completely on standard scikit-learn functionality and does not need to # be overridden using oneDAL capabilities as the scale vectors are appropriately populated. - + transform = support_sycl_format(_sklearn_MaxAbsScaler.transform) + # Ensure access to the derived properties without manually calling _onedal_finalize_fit # explicitly from the user. We wrap properties that require a finalized state. @property From 5013a7d25d219083257b3768eacbcb8dddea8592 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 21:11:48 +0100 Subject: [PATCH 14/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index f8701071b7..fa300d9f66 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -29,7 +29,7 @@ from ..._utils import PatchingConditionsChain from ...base import oneDALEstimator from ...utils._array_api import enable_array_api, get_namespace -from ..utils.validation import ( +from ...utils.validation import ( _finite_keyword, assert_all_finite, validate_data, From d04223f5b7e39921a246f3e83e1e97e34d3b9737 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 13 Mar 2026 21:41:09 +0100 Subject: [PATCH 15/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index fa300d9f66..f5f20b03f3 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -14,6 +14,8 @@ # limitations under the License. # ============================================================================== +from functools import partial + from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler from sklearn.utils.validation import check_is_fitted From 202743755a566ae46827344c0fb9a5bd499ae96a Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Mar 2026 01:51:31 +0100 Subject: [PATCH 16/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index f5f20b03f3..00750f1cc4 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -17,7 +17,7 @@ from functools import partial from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_array, check_is_fitted from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import is_sparse, sklearn_check_version From 69f4de1cc2d568ced930ba157d35af9038d456bc Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Mar 2026 02:28:54 +0100 Subject: [PATCH 17/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 00750f1cc4..c6cb846a2b 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -206,7 +206,7 @@ def fit(self, X, y=None): # Transform relies completely on standard scikit-learn functionality and does not need to # be overridden using oneDAL capabilities as the scale vectors are appropriately populated. transform = support_sycl_format(_sklearn_MaxAbsScaler.transform) - + # Ensure access to the derived properties without manually calling _onedal_finalize_fit # explicitly from the user. We wrap properties that require a finalized state. @property From 97685f865725df0e6f87e94fc796c861735d201e Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Mar 2026 02:36:29 +0100 Subject: [PATCH 18/25] Update deselected_tests.yaml --- deselected_tests.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 83af6b7ca2..6e151bc4f7 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -406,6 +406,9 @@ deselected_tests: # CI jobs in sklearnex compile scikit-learn from source, not necessarily with the same toolkits as sklearn's CIs - preprocessing/tests/test_polynomial.py::test_sizeof_LARGEST_INT_t + # sklearn does not support n_jobs in preprocessing estimators + - preprocessing/tests/test_common.py::test_missing_value_handling[est0-maxabs_scale-True-False-omit_kwargs0] + # -------------------------------------------------------- # No need to test daal4py patching reduced_tests: From 1476b9cb7484bf803a98b4dd7620245634ee3dd6 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Mar 2026 06:12:35 +0100 Subject: [PATCH 19/25] Update incremental_basic_statistics.py --- onedal/basic_statistics/incremental_basic_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index 20aa918ddb..5c8e87f5cf 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -131,7 +131,7 @@ def partial_fit(self, X, sample_weight=None, queue=None): X_table, sample_weight_table = to_table(X, sample_weight, queue=queue) if not hasattr(self, "_onedal_params"): - self._onedal_params = self._get_onedal_params(False, dtype=X.dtype) + self._onedal_params = self._get_onedal_params(False, dtype=X_table.dtype) self._partial_result = self.partial_compute( self._onedal_params, self._partial_result, X_table, sample_weight_table From 2238d15e148bb1da1bdaa33ec6c72a90ae83cdaa Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Mar 2026 06:48:39 +0100 Subject: [PATCH 20/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index c6cb846a2b..3f649b7ed6 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -59,8 +59,9 @@ class MaxAbsScaler(oneDALEstimator, _sklearn_MaxAbsScaler): **_sklearn_MaxAbsScaler._parameter_constraints, } - def __init__(self, *, copy=True): + def __init__(self, *, copy=True, clip=False): self.copy = copy + self.clip = clip self._need_to_finalize = False _onedal_incremental_basic_statistics = staticmethod(onedal_IncrementalBasicStatistics) From 2a7096432a46a47138be191c321484aeccfc96e9 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 14 Mar 2026 08:41:46 +0100 Subject: [PATCH 21/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 3f649b7ed6..2a0950c630 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -17,6 +17,7 @@ from functools import partial from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler +from sklearn.preprocessing._data import _handle_zeros_in_scale from sklearn.utils.validation import check_array, check_is_fitted from daal4py.sklearn._n_jobs_support import control_n_jobs @@ -107,7 +108,7 @@ def _onedal_finalize_fit(self, queue=None): min_abs = xp.abs(self._onedal_estimator.min_) max_abs = xp.abs(self._onedal_estimator.max_) self.max_abs_ = xp.maximum(min_abs, max_abs) - self.scale_ = xp.where(self._max_abs_ == 0, 1.0, self._max_abs_) + self.scale_ = _handle_zeros_in_scale(self._max_abs_, copy=True) self._need_to_finalize = False From 0d0ca1d5860aa8601b1f1999d2f559bf8d486f8e Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Thu, 19 Mar 2026 10:33:25 +0100 Subject: [PATCH 22/25] Update test_data_spmd.py --- sklearnex/spmd/preprocessing/tests/test_data_spmd.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearnex/spmd/preprocessing/tests/test_data_spmd.py b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py index 1c8b5b5379..02a36c1120 100644 --- a/sklearnex/spmd/preprocessing/tests/test_data_spmd.py +++ b/sklearnex/spmd/preprocessing/tests/test_data_spmd.py @@ -65,7 +65,6 @@ def test_max_abs_scaler_fit_spmd_gold(dataframe, queue, dtype): assert_allclose(scaler_spmd.scale_, scaler.scale_) assert_allclose(scaler_spmd.max_abs_, scaler.max_abs_) - assert scaler_spmd.n_samples_seen_ == scaler.n_samples_seen_ @pytest.mark.skipif( @@ -113,4 +112,3 @@ def test_max_abs_scaler_partial_fit_spmd_gold(dataframe, queue, num_blocks, dtyp assert_allclose(scaler_spmd.scale_, scaler.scale_) assert_allclose(scaler_spmd.max_abs_, scaler.max_abs_) - assert scaler_spmd.n_samples_seen_ == scaler.n_samples_seen_ From bda82bd573078a3bad83d847db3fef06187d303d Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 6 May 2026 00:34:48 +0200 Subject: [PATCH 23/25] Update _data.py --- sklearnex/preview/preprocessing/_data.py | 27 ++++++++++-------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/sklearnex/preview/preprocessing/_data.py b/sklearnex/preview/preprocessing/_data.py index 2a0950c630..bdc7f56a4e 100644 --- a/sklearnex/preview/preprocessing/_data.py +++ b/sklearnex/preview/preprocessing/_data.py @@ -27,7 +27,6 @@ IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, ) -from ..._config import get_config from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain from ...base import oneDALEstimator @@ -116,9 +115,8 @@ def _onedal_partial_fit(self, X, queue=None, check_input=True): # partial_fit updates the internal _onedal_estimator with the present batch of X. first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 - # In sklearn, check_input is used to enforce validation. In combination with use_raw_input config - # it controls validation of inputs. - if check_input and not get_config()["use_raw_input"]: + # In sklearn, check_input is used to enforce validation. + if check_input: xp, _ = get_namespace(X) X = validate_data( self, @@ -147,18 +145,15 @@ def _onedal_partial_fit(self, X, queue=None, check_input=True): def _onedal_fit(self, X, queue=None): # For a full fit, we must reset the estimator and internal sample count to 0, # mimicking a fresh calculation. - if not get_config()["use_raw_input"]: - xp, _ = get_namespace(X) - if sklearn_check_version("1.2"): - self._validate_params() - X = validate_data( - self, - X, - dtype=[xp.float64, xp.float32], - ensure_all_finite=False, - ) - else: - self.n_features_in_ = X.shape[1] + xp, _ = get_namespace(X) + if sklearn_check_version("1.2"): + self._validate_params() + X = validate_data( + self, + X, + dtype=[xp.float64, xp.float32], + ensure_all_finite=False, + ) self.n_samples_seen_ = 0 if hasattr(self, "_onedal_estimator"): From 53981fa3909c538d50fa7248f88c328b79def174 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Tue, 2 Jun 2026 23:36:31 +0200 Subject: [PATCH 24/25] Update requirements-test.txt --- requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-test.txt b/requirements-test.txt index 538bb01200..19fdff2ba2 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,7 +3,7 @@ pytest==9.0.3 ; python_version >= '3.11' pytest-json-report==1.5.0 pytest-cov==7.1.0 pytest-mock==3.15.1 -numpy>=1.21.6 ; python_version <= '3.10' +numpy>=1.21.6, < 1.24.0 ; python_version <= '3.10' numpy>=1.23.5 ; python_version == '3.11' numpy>=2.0.0 ; python_version >= '3.12' scikit-learn==1.7.2 ; python_version <= '3.10' From f2683751f7370847b4cfd2c0e9c9bb4962d45007 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Tue, 2 Jun 2026 23:37:22 +0200 Subject: [PATCH 25/25] Update requirements-test.txt --- requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-test.txt b/requirements-test.txt index 19fdff2ba2..c55ff561ac 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,7 +3,7 @@ pytest==9.0.3 ; python_version >= '3.11' pytest-json-report==1.5.0 pytest-cov==7.1.0 pytest-mock==3.15.1 -numpy>=1.21.6, < 1.24.0 ; python_version <= '3.10' +numpy>=1.21.6,<1.24.0 ; python_version <= '3.10' numpy>=1.23.5 ; python_version == '3.11' numpy>=2.0.0 ; python_version >= '3.12' scikit-learn==1.7.2 ; python_version <= '3.10'