Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
45613ff
test with antigravity
icfaust Mar 12, 2026
3520f0b
add missing spmd interface
icfaust Mar 12, 2026
3af4717
add missing files
icfaust Mar 12, 2026
470fcc4
add missing tests from public sklearn conformance
icfaust Mar 12, 2026
bfce75d
fixes for linting?
icfaust Mar 12, 2026
983aa48
forgot to add to __init__.py
icfaust Mar 12, 2026
7049aff
Update __init__.py
icfaust Mar 12, 2026
76029c7
Update _data.py
icfaust Mar 13, 2026
1d328b0
Update _data.py
icfaust Mar 13, 2026
af819ec
Update _data.py
icfaust Mar 13, 2026
637a6a8
Update _data.py
icfaust Mar 13, 2026
c89d629
Update _data.py
icfaust Mar 13, 2026
cb7db16
Update _data.py
icfaust Mar 13, 2026
5013a7d
Update _data.py
icfaust Mar 13, 2026
d04223f
Update _data.py
icfaust Mar 13, 2026
2027437
Update _data.py
icfaust Mar 14, 2026
69f4de1
Update _data.py
icfaust Mar 14, 2026
97685f8
Update deselected_tests.yaml
icfaust Mar 14, 2026
1476b9c
Update incremental_basic_statistics.py
icfaust Mar 14, 2026
2238d15
Update _data.py
icfaust Mar 14, 2026
2a70964
Update _data.py
icfaust Mar 14, 2026
aeb0d89
Merge branch 'uxlfoundation:main' into maxabs_test
icfaust Mar 17, 2026
0d0ca1d
Update test_data_spmd.py
icfaust Mar 19, 2026
e32ff6f
Merge branch 'uxlfoundation:main' into maxabs_test
icfaust Mar 20, 2026
7ed49c8
Merge branch 'uxlfoundation:main' into maxabs_test
icfaust Apr 18, 2026
8b2db10
Merge branch 'main' into maxabs_test
icfaust May 5, 2026
bda82bd
Update _data.py
icfaust May 5, 2026
7dea094
Merge branch 'uxlfoundation:main' into maxabs_test
icfaust May 7, 2026
ab48c71
Merge branch 'main' into maxabs_test
icfaust May 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci/scripts/select_sklearn_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def parse_tests_tree(entry, prefix=""):
"metrics/tests": ["test_pairwise.py", "test_ranking.py"],
"model_selection/tests": ["test_split.py", "test_validation.py"],
"neighbors/tests": ["test_lof.py", "test_neighbors.py", "test_neighbors_pipeline.py"],
"preprocessing/tests": ["test_common.py", "test_data.py"],
"svm/tests": ["test_sparse.py", "test_svm.py"],
"tests": "test_dummy.py",
}
Expand Down
3 changes: 3 additions & 0 deletions deselected_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,9 @@ deselected_tests:
# CI jobs in sklearnex compile scikit-learn from source, not necessarily with the same toolkits as sklearn's CIs
- preprocessing/tests/test_polynomial.py::test_sizeof_LARGEST_INT_t

# sklearn does not support n_jobs in preprocessing estimators
- preprocessing/tests/test_common.py::test_missing_value_handling[est0-maxabs_scale-True-False-omit_kwargs0]

# Fails due to numeric tolerances on some AMD systems
- linear_model/tests/test_base.py::test_linear_regression_vs_lstsq[float32]

Expand Down
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,7 @@ class build(onedal_build, orig_build.build):
"sklearnex.preview.covariance",
"sklearnex.preview.decomposition",
"sklearnex.preview.linear_model",
"sklearnex.preview.preprocessing",
"sklearnex.svm",
"sklearnex.utils",
]
Expand Down Expand Up @@ -586,7 +587,11 @@ class build(onedal_build, orig_build.build):
"sklearnex.spmd.neighbors",
]
if ONEDAL_VERSION >= 20230200:
packages_with_tests += ["onedal.spmd.cluster", "sklearnex.spmd.cluster"]
packages_with_tests += [
"onedal.spmd.cluster",
"sklearnex.spmd.cluster",
"sklearnex.spmd.preprocessing",
]

setup(
name="scikit-learn-intelex",
Expand Down
9 changes: 9 additions & 0 deletions sklearnex/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,17 @@ def get_patch_map_core(preview: bool = False) -> PatchMap:

import sklearn.covariance as covariance_module
import sklearn.decomposition as decomposition_module
import sklearn.preprocessing as preprocessing_module
from sklearn.covariance import EmpiricalCovariance as EmpiricalCovariance_sklearn
from sklearn.decomposition import IncrementalPCA as IncrementalPCA_sklearn
from sklearn.preprocessing import MaxAbsScaler as MaxAbsScaler_sklearn

# Preview classes for patching
from .preview.covariance import (
EmpiricalCovariance as EmpiricalCovariance_sklearnex,
)
from .preview.decomposition import IncrementalPCA as IncrementalPCA_sklearnex
from .preview.preprocessing import MaxAbsScaler as MaxAbsScaler_sklearnex

# Since the state of the lru_cache without preview cannot be
# guaranteed to not have already enabled sklearnex algorithms
Expand All @@ -82,6 +85,12 @@ def get_patch_map_core(preview: bool = False) -> PatchMap:
IncrementalPCA_sklearnex,
IncrementalPCA_sklearn,
),
"sklearn.preprocessing.MaxAbsScaler": (
preprocessing_module,
"MaxAbsScaler",
MaxAbsScaler_sklearnex,
MaxAbsScaler_sklearn,
),
}
if daal_check_version((2024, "P", 1)):
import sklearn.linear_model as linear_model_module
Expand Down
2 changes: 1 addition & 1 deletion sklearnex/preview/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# limitations under the License.
# ==============================================================================

__all__ = ["covariance", "decomposition"]
__all__ = ["covariance", "decomposition", "preprocessing"]
19 changes: 19 additions & 0 deletions sklearnex/preview/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# ==============================================================================
# Copyright Contributors to the oneDAL Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from ._data import MaxAbsScaler

__all__ = ["MaxAbsScaler"]
235 changes: 235 additions & 0 deletions sklearnex/preview/preprocessing/_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
# ==============================================================================
# Copyright Contributors to the oneDAL Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from functools import partial

from sklearn.preprocessing import MaxAbsScaler as _sklearn_MaxAbsScaler
from sklearn.preprocessing._data import _handle_zeros_in_scale
from sklearn.utils.validation import check_array, check_is_fitted

from daal4py.sklearn._n_jobs_support import control_n_jobs
from daal4py.sklearn._utils import is_sparse, sklearn_check_version
from onedal._device_offload import support_sycl_format
from onedal.basic_statistics import (
IncrementalBasicStatistics as onedal_IncrementalBasicStatistics,
)

from ..._device_offload import dispatch, wrap_output_data
from ..._utils import PatchingConditionsChain
from ...base import oneDALEstimator
from ...utils._array_api import enable_array_api, get_namespace
from ...utils.validation import (
_finite_keyword,
assert_all_finite,
validate_data,
)

__check_kwargs = {
"dtype": None,
"ensure_2d": False,
"ensure_min_samples": 0,
"ensure_min_features": 0,
"accept_sparse": True,
_finite_keyword: False,
}

_check_array = partial(check_array, **__check_kwargs)


@enable_array_api
@control_n_jobs(decorated_methods=["fit", "partial_fit", "_onedal_finalize_fit"])
class MaxAbsScaler(oneDALEstimator, _sklearn_MaxAbsScaler):
__doc__ = _sklearn_MaxAbsScaler.__doc__

if sklearn_check_version("1.2"):
_parameter_constraints: dict = {
**_sklearn_MaxAbsScaler._parameter_constraints,
}

def __init__(self, *, copy=True, clip=False):
self.copy = copy
self.clip = clip
self._need_to_finalize = False

_onedal_incremental_basic_statistics = staticmethod(onedal_IncrementalBasicStatistics)

def _onedal_supported(self, method_name, *data):
# The patching condition here checks whether the data is fit for oneDAL.
# oneDAL's IncrementalBasicStatistics expects dense input in float32/float64 format.
# MaxAbsScaler in sklearn naturally supports sparse matrices, which creates a scenario
# for a required fallback to standard sklearn if the input is sparse.

patching_status = PatchingConditionsChain(
f"sklearn.preprocessing.{self.__class__.__name__}.{method_name}"
)
if method_name in ["fit", "partial_fit"]:
(X,) = data
try:
X_test = _check_array(X)
assert_all_finite(X_test) # minimally verify the data
input_is_finite = True
except ValueError:
input_is_finite = False
patching_status.and_conditions(
[
(not is_sparse(X), "Sparse input is not supported"),
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume benchmarking here will be necessary to find where the standard sklearn implementation is faster in finding the min and max, and then add a condition here to make sure ours is used when accelerating.

(input_is_finite, "Non-finite input is not supported."),
]
)

return patching_status

_onedal_cpu_supported = _onedal_supported
_onedal_gpu_supported = _onedal_supported

def _onedal_finalize_fit(self, queue=None):
# This function commits the basic statistics and extracts the values we need to compute scale_.
# We need the min_ and max_ to compute the maximum absolute value per feature.
assert hasattr(self, "_onedal_estimator")
self._onedal_estimator.finalize_fit()

xp, _ = get_namespace(self._onedal_estimator.min_)

# Calculate the max absolute scaler
min_abs = xp.abs(self._onedal_estimator.min_)
max_abs = xp.abs(self._onedal_estimator.max_)
self.max_abs_ = xp.maximum(min_abs, max_abs)
self.scale_ = _handle_zeros_in_scale(self._max_abs_, copy=True)

self._need_to_finalize = False

def _onedal_partial_fit(self, X, queue=None, check_input=True):
# partial_fit updates the internal _onedal_estimator with the present batch of X.
first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0

# In sklearn, check_input is used to enforce validation.
if check_input:
xp, _ = get_namespace(X)
X = validate_data(
self,
X,
dtype=[xp.float64, xp.float32],
reset=first_pass,
ensure_all_finite=False,
)

# We keep track of the samples internally as well to mirror scikit-learn.
if first_pass:
self.n_samples_seen_ = X.shape[0]
else:
self.n_samples_seen_ += X.shape[0]

if not hasattr(self, "_onedal_estimator"):
# We specifically only ask for min and max to save overhead since those are the only two
# statistics required to calculate the max_abs values.
self._onedal_estimator = self._onedal_incremental_basic_statistics(
result_options=["min", "max"]
)

self._onedal_estimator.partial_fit(X, queue=queue)
self._need_to_finalize = True

def _onedal_fit(self, X, queue=None):
# For a full fit, we must reset the estimator and internal sample count to 0,
# mimicking a fresh calculation.
xp, _ = get_namespace(X)
if sklearn_check_version("1.2"):
self._validate_params()
X = validate_data(
self,
X,
dtype=[xp.float64, xp.float32],
ensure_all_finite=False,
)

self.n_samples_seen_ = 0
if hasattr(self, "_onedal_estimator"):
self._onedal_estimator._reset()

# Execute partial fit just once on the entire dataset.
self._onedal_partial_fit(X, queue=queue, check_input=False)

# Must compute the actual class attributes from the oneDAL values.
self._onedal_finalize_fit()

return self

def partial_fit(self, X, y=None):
# We use dispatch so that validation occurs appropriately. The check_input feature
# acts identically to sklearn's checking strategy, hence passed through.
if sklearn_check_version("1.2"):
self._validate_params()

# Scikit-Learn implements a check within partial fit natively, so we pass check_input=True implicitly.
dispatch(
self,
"partial_fit",
{
"onedal": self.__class__._onedal_partial_fit,
"sklearn": _sklearn_MaxAbsScaler.partial_fit,
},
X,
)
return self

def fit(self, X, y=None):
if sklearn_check_version("1.2"):
self._validate_params()

dispatch(
self,
"fit",
{
"onedal": self.__class__._onedal_fit,
"sklearn": _sklearn_MaxAbsScaler.fit,
},
X,
)
return self

# Transform relies completely on standard scikit-learn functionality and does not need to
# be overridden using oneDAL capabilities as the scale vectors are appropriately populated.
transform = support_sycl_format(_sklearn_MaxAbsScaler.transform)

# Ensure access to the derived properties without manually calling _onedal_finalize_fit
# explicitly from the user. We wrap properties that require a finalized state.
@property
def max_abs_(self):
if hasattr(self, "_onedal_estimator") and self._need_to_finalize:
self._onedal_finalize_fit()
return self._max_abs_

@max_abs_.setter
def max_abs_(self, value):
self._max_abs_ = value

@max_abs_.deleter
def max_abs_(self):
del self._max_abs_

@property
def scale_(self):
if hasattr(self, "_onedal_estimator") and self._need_to_finalize:
self._onedal_finalize_fit()
return self._scale_

@scale_.setter
def scale_(self, value):
self._scale_ = value

@scale_.deleter
def scale_(self):
del self._scale_
Loading
Loading