Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion sklearnex/neighbors/_lof.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from sklearnex.neighbors.knn_unsupervised import NearestNeighbors

from ..utils._array_api import enable_array_api, get_namespace
from ..utils.validation import validate_data


@enable_array_api
Expand Down Expand Up @@ -141,6 +140,10 @@ def fit(self, X, y=None):
self._fit_X = xp.asarray(self._fit_X, device=device)
return self

# Note: this is overriding an internal method from scikit-learn with
# the same signature. In this case, 'validate_data' is called during
# 'decision_function', which calls '.kneighbors()'. Hence, it doesn't
# need to validate the namespace of 'X' with '_fit_X' here.
def _predict(self, X=None):
check_is_fitted(self)

Expand Down
56 changes: 39 additions & 17 deletions sklearnex/neighbors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

if sklearn_check_version("1.9"):
from sklearn.utils._sparse import _align_api_if_sparse
from sklearn.utils._array_api import get_namespace_and_device, move_to

from onedal._device_offload import _transfer_to_host
from onedal.utils._array_api import _is_numpy_namespace
Expand All @@ -37,7 +38,6 @@
from .._utils import PatchingConditionsChain
from ..base import oneDALEstimator
from ..utils._array_api import get_namespace
from ..utils.validation import validate_data


class KNeighborsDispatchingBase(oneDALEstimator):
Expand All @@ -51,11 +51,20 @@ def _get_weights(self, dist, weights):
# if user attempts to classify a point that was zero distance from one
# or more training points, those training points are weighted as 1.0
# and the other points as 0.0
with xp.errstate(divide="ignore"):
dist = 1.0 / dist
if _is_numpy_namespace(xp):
with xp.errstate(divide="ignore"):
dist = 1.0 / dist
else:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
dist = 1.0 / dist
inf_mask = xp.isinf(dist)
inf_row = xp.any(inf_mask, axis=1)
dist[inf_row] = inf_mask[inf_row]
if _is_numpy_namespace(xp):
# Note: older numpy do not have 'np.astype'
dist[inf_row] = inf_mask[inf_row]
else:
dist[inf_row] = xp.astype(inf_mask[inf_row], dist.dtype)
return dist
elif callable(weights):
return weights(dist)
Expand Down Expand Up @@ -84,11 +93,19 @@ def _compute_weighted_prediction(self, neigh_dist, neigh_ind, weights_param, y_t
array-like
Predicted values.
"""
xp, _ = get_namespace(y_train)
if not _is_numpy_namespace(xp):
# Note: in theory, the logic should be that 'y_train' should be converted
# to the namespace of 'neigh_dist', but by this point, 'y_train' should
# already have been moved to X's namespace, so it's fine to move 'neigh_dist'.
if sklearn_check_version("1.9"):
xp, _, device = get_namespace_and_device(y_train)
neigh_dist = move_to(neigh_dist, xp=xp, device=device)
neigh_ind = move_to(neigh_ind, xp=xp, device=device)
else:
xp, _ = get_namespace(y_train)
device = getattr(y_train, "device", None)
neigh_dist = xp.asarray(neigh_dist, device=device)
neigh_ind = xp.asarray(neigh_ind, device=device)
if not _is_numpy_namespace(xp):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we also need some explanation about why do we need this numpy check

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It has a different codepath for numpy with operations that are not supported by array API.

neigh_dist = xp.asarray(neigh_dist, device=device)
neigh_ind = xp.asarray(neigh_ind, device=device)

weights = self._get_weights(neigh_dist, weights_param)

Expand All @@ -113,9 +130,7 @@ def _compute_weighted_prediction(self, neigh_dist, neigh_ind, weights_param, y_t
y_pred_shape = (neigh_ind.shape[0], _y.shape[1])
if not _is_numpy_namespace(xp):
# Array API: pass device to ensure same device as input
y_pred = xp.empty(
y_pred_shape, dtype=neigh_dist.dtype, device=neigh_ind.device
)
y_pred = xp.empty(y_pred_shape, dtype=neigh_dist.dtype, device=device)
else:
# Numpy: no device parameter
y_pred = xp.empty(y_pred_shape, dtype=neigh_dist.dtype)
Expand Down Expand Up @@ -164,11 +179,16 @@ def _compute_class_probabilities(
array-like
Class probabilities.
"""
xp, _ = get_namespace(y_train)
if not _is_numpy_namespace(xp):
if sklearn_check_version("1.9"):
xp, _, device = get_namespace_and_device(y_train)
neigh_dist = move_to(neigh_dist, xp=xp, device=device)
neigh_ind = move_to(neigh_ind, xp=xp, device=device)
else:
xp, _ = get_namespace(y_train)
device = getattr(y_train, "device", None)
neigh_dist = xp.asarray(neigh_dist, device=device)
neigh_ind = xp.asarray(neigh_ind, device=device)
if not _is_numpy_namespace(xp):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the logic of this if-statement? As I understand it previously it was that neigh_dist and neigh_ind originally have numpy type and we only need to convert them if y is not a numpy. Is it correct? If yes do we need the same logic after the array-api update?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is how it was before if you look at the changes. I guess the purpose is to have them work with the other arrays.

neigh_dist = xp.asarray(neigh_dist, device=device)
neigh_ind = xp.asarray(neigh_ind, device=device)

_y = y_train
classes_ = classes
Expand Down Expand Up @@ -207,9 +227,9 @@ def _compute_class_probabilities(
proba_k = xp.zeros(
(n_classes, n_queries),
dtype=neigh_dist.dtype,
device=neigh_dist.device,
device=device,
)
zero = xp.zeros(1, dtype=neigh_dist.dtype, device=neigh_dist.device)
zero = xp.zeros(1, dtype=neigh_dist.dtype, device=device)
for c in range(n_classes):
mask = pred_labels == c
proba_k[c, :] = xp.sum(xp.where(mask, weights_k, zero), axis=1)
Expand Down Expand Up @@ -654,6 +674,8 @@ def _onedal_gpu_supported(self, method_name, *data):
def _onedal_cpu_supported(self, method_name, *data):
return self._onedal_supported("cpu", method_name, *data)

# Note: since this transfers the data to host, it doesn't validate
# that the array namespaces and devices of 'X' and '_fit_X' match.
def kneighbors_graph(self, X=None, n_neighbors=None, mode="connectivity"):
check_is_fitted(self)
if n_neighbors is None:
Expand Down
35 changes: 30 additions & 5 deletions sklearnex/neighbors/knn_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@
from ..utils.validation import validate_data
from .common import KNeighborsDispatchingBase

if sklearn_check_version("1.9"):
from sklearn.utils._array_api import (
check_same_namespace,
get_namespace_and_device,
move_to,
)


@enable_array_api
@control_n_jobs(
Expand Down Expand Up @@ -72,7 +79,12 @@ def __init__(
)

def fit(self, X, y):
xp, is_array_api = get_namespace(X)
if sklearn_check_version("1.9"):
xp, is_array_api, device = get_namespace_and_device(X)
else:
xp, is_array_api = get_namespace(X)
device = getattr(X, "device", None)

dispatch(
self,
"fit",
Expand All @@ -86,7 +98,6 @@ def fit(self, X, y):
# Ensure _fit_X matches the input namespace so that
# kneighbors(X=None) can use get_namespace(self._fit_X).
if is_array_api and not _is_numpy_namespace(xp):
Comment thread
avolkov-intel marked this conversation as resolved.
device = getattr(X, "device", None)
self._fit_X = xp.asarray(self._fit_X, device=device)
return self

Expand Down Expand Up @@ -169,7 +180,7 @@ def _onedal_fit(self, X, y, queue=None):
)

# Process classification targets before passing to onedal
self._process_classification_targets(y, skip_validation=False)
self._process_classification_targets(X, y, skip_validation=False)

# Call onedal backend
onedal_params = {
Expand Down Expand Up @@ -200,7 +211,7 @@ def _onedal_fit(self, X, y, queue=None):
# Post-processing
self._save_attributes()

def _process_classification_targets(self, y, skip_validation=False):
def _process_classification_targets(self, X, y, skip_validation=False):
"""Process classification targets and set class-related attributes.

Parameters
Expand Down Expand Up @@ -246,6 +257,10 @@ def _process_classification_targets(self, y, skip_validation=False):
self.classes_ = self.classes_[0]
self._y = xp.reshape(self._y, (-1,))

if sklearn_check_version("1.9"):
xp_X, _, device = get_namespace_and_device(X)
self._y = move_to(self._y, xp=xp_X, device=device)

def _onedal_predict(self, X, queue=None):
if X is not None:
xp, _ = get_namespace(X)
Expand All @@ -256,14 +271,20 @@ def _onedal_predict(self, X, queue=None):
accept_sparse="csr",
reset=False,
)
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="predict")

params = self._onedal_estimator._get_onedal_params(X)
params["result_option"] = "responses"
result = self._onedal_estimator._onedal_predict(
self._onedal_estimator._onedal_model, X, params
)
xp, _ = get_namespace(X)
responses = from_table(result.responses, like=X)
if sklearn_check_version("1.9"):
xp, _, device = get_namespace_and_device(self.classes_)
responses = move_to(responses, xp=xp, device=device)
else:
xp, _ = get_namespace(X)
return xp.take(
self.classes_, xp.asarray(xp.reshape(responses, (-1,)), dtype=xp.int64)
)
Expand All @@ -278,6 +299,8 @@ def _onedal_predict_proba(self, X, queue=None):
accept_sparse="csr",
reset=False,
)
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="predict_proba")

neigh_dist, neigh_ind = self._onedal_estimator.kneighbors(X)

Expand All @@ -299,6 +322,8 @@ def _onedal_kneighbors(
accept_sparse="csr",
reset=False,
)
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="kneighbors")
else:
query_is_train = True
X = self._fit_X
Expand Down
33 changes: 30 additions & 3 deletions sklearnex/neighbors/knn_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@
from ..utils.validation import validate_data
from .common import KNeighborsDispatchingBase

if sklearn_check_version("1.9"):
from sklearn.utils._array_api import (
check_same_namespace,
get_namespace_and_device,
move_to,
)


@enable_array_api("1.5") # validate_data y_numeric requires sklearn >=1.5
@control_n_jobs(decorated_methods=["fit", "predict", "kneighbors", "score"])
Expand Down Expand Up @@ -68,7 +75,11 @@ def __init__(
)

def fit(self, X, y):
xp, is_array_api = get_namespace(X)
if sklearn_check_version("1.9"):
xp, is_array_api, device = get_namespace_and_device(X)
else:
xp, is_array_api = get_namespace(X)
device = getattr(X, "device", None)
dispatch(
self,
"fit",
Expand All @@ -82,7 +93,6 @@ def fit(self, X, y):
# Ensure _fit_X matches the input namespace so that
# kneighbors(X=None) can use get_namespace(self._fit_X).
if is_array_api and not _is_numpy_namespace(xp):
device = getattr(X, "device", None)
self._fit_X = xp.asarray(self._fit_X, device=device)
return self

Expand Down Expand Up @@ -138,7 +148,10 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
)

def _onedal_fit(self, X, y, queue=None):
xp, _ = get_namespace(X, y)
if sklearn_check_version("1.9"):
xp, _, device = get_namespace_and_device(X)
else:
xp, _ = get_namespace(X, y)
self._set_effective_metric()

X, y = validate_data(
Expand All @@ -151,6 +164,9 @@ def _onedal_fit(self, X, y, queue=None):
y_numeric=True,
)

if sklearn_check_version("1.9"):
y = move_to(y, xp=xp, device=device)

self._process_regression_targets(y)
onedal_params = {
"n_neighbors": self.n_neighbors,
Expand Down Expand Up @@ -215,6 +231,13 @@ def _predict_gpu(self, X, queue=None):
accept_sparse="csr",
reset=False,
)
# Note: if called before 'validate_data', this check would fail if 'X' is
# a 'DataFrame', since '_fit_X' would have already been converted to NumPy.
# Hence, it must come after the call to 'validate_data'. If the behavior
# of this validator changes in scikit-learn, these checks could be done
# earlier in the code for quicker errors.
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="predict")
result = self._onedal_estimator._predict_gpu(X)
return result

Expand Down Expand Up @@ -246,6 +269,8 @@ def _predict_skl(self, X, queue=None):
X = validate_data(
self, X, dtype=[xp.float64, xp.float32], accept_sparse="csr", reset=False
)
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="predict")
return self._predict_skl_regression(X)

def _onedal_kneighbors(
Expand All @@ -262,6 +287,8 @@ def _onedal_kneighbors(
accept_sparse="csr",
reset=False,
)
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="kenighbors")
else:
query_is_train = True
X = self._fit_X
Expand Down
15 changes: 13 additions & 2 deletions sklearnex/neighbors/knn_unsupervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from ..utils.validation import validate_data
from .common import KNeighborsDispatchingBase

if sklearn_check_version("1.9"):
from sklearn.utils._array_api import check_same_namespace, get_namespace_and_device


@enable_array_api
@control_n_jobs(decorated_methods=["fit", "kneighbors", "radius_neighbors"])
Expand Down Expand Up @@ -66,7 +69,12 @@ def __init__(
)

def fit(self, X, y=None):
xp, is_array_api = get_namespace(X)
if sklearn_check_version("1.9"):
xp, is_array_api, device = get_namespace_and_device(X)
else:
xp, is_array_api = get_namespace(X)
device = getattr(X, "device", None)

dispatch(
self,
"fit",
Expand All @@ -80,7 +88,6 @@ def fit(self, X, y=None):
# Ensure _fit_X matches the input namespace so that
# kneighbors(X=None) can use get_namespace(self._fit_X).
if is_array_api and not _is_numpy_namespace(xp):
device = getattr(X, "device", None)
self._fit_X = xp.asarray(self._fit_X, device=device)
return self

Expand Down Expand Up @@ -189,6 +196,8 @@ def _onedal_predict(self, X, queue=None):
reset=False,
force_all_finite=False,
)
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="predict")
return self._onedal_estimator.predict(X, queue=queue)

def _onedal_kneighbors(
Expand All @@ -205,6 +214,8 @@ def _onedal_kneighbors(
accept_sparse="csr",
reset=False,
)
if sklearn_check_version("1.9"):
check_same_namespace(X, self, attribute="_fit_X", method="kneighbors")
else:
query_is_train = True
X = self._fit_X
Expand Down
Loading
Loading