diff --git a/sklearnex/neighbors/_lof.py b/sklearnex/neighbors/_lof.py
index 20d9cf313f..a23f780ab3 100644
--- a/sklearnex/neighbors/_lof.py
+++ b/sklearnex/neighbors/_lof.py
@@ -31,7 +31,6 @@
 from sklearnex.neighbors.knn_unsupervised import NearestNeighbors
 
 from ..utils._array_api import enable_array_api, get_namespace
-from ..utils.validation import validate_data
 
 
 @enable_array_api
@@ -141,6 +140,10 @@ def fit(self, X, y=None):
             self._fit_X = xp.asarray(self._fit_X, device=device)
         return self
 
+    # Note: this is overriding an internal method from scikit-learn with
+    # the same signature. In this case, 'validate_data' is called during
+    # 'decision_function', which calls '.kneighbors()'. Hence, it doesn't
+    # need to validate the namespace of 'X' with '_fit_X' here.
     def _predict(self, X=None):
         check_is_fitted(self)
 
diff --git a/sklearnex/neighbors/common.py b/sklearnex/neighbors/common.py
index 02cc8f8968..221f49a674 100644
--- a/sklearnex/neighbors/common.py
+++ b/sklearnex/neighbors/common.py
@@ -29,6 +29,7 @@
 
 if sklearn_check_version("1.9"):
     from sklearn.utils._sparse import _align_api_if_sparse
+    from sklearn.utils._array_api import get_namespace_and_device, move_to
 
 from onedal._device_offload import _transfer_to_host
 from onedal.utils._array_api import _is_numpy_namespace
@@ -37,7 +38,6 @@
 from .._utils import PatchingConditionsChain
 from ..base import oneDALEstimator
 from ..utils._array_api import get_namespace
-from ..utils.validation import validate_data
 
 
 class KNeighborsDispatchingBase(oneDALEstimator):
@@ -51,11 +51,20 @@ def _get_weights(self, dist, weights):
             # if user attempts to classify a point that was zero distance from one
             # or more training points, those training points are weighted as 1.0
             # and the other points as 0.0
-            with xp.errstate(divide="ignore"):
-                dist = 1.0 / dist
+            if _is_numpy_namespace(xp):
+                with xp.errstate(divide="ignore"):
+                    dist = 1.0 / dist
+            else:
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    dist = 1.0 / dist
             inf_mask = xp.isinf(dist)
             inf_row = xp.any(inf_mask, axis=1)
-            dist[inf_row] = inf_mask[inf_row]
+            if _is_numpy_namespace(xp):
+                # Note: older numpy do not have 'np.astype'
+                dist[inf_row] = inf_mask[inf_row]
+            else:
+                dist[inf_row] = xp.astype(inf_mask[inf_row], dist.dtype)
             return dist
         elif callable(weights):
             return weights(dist)
@@ -84,11 +93,19 @@ def _compute_weighted_prediction(self, neigh_dist, neigh_ind, weights_param, y_t
         array-like
             Predicted values.
         """
-        xp, _ = get_namespace(y_train)
-        if not _is_numpy_namespace(xp):
+        # Note: in theory, the logic should be that 'y_train' should be converted
+        # to the namespace of 'neigh_dist', but by this point, 'y_train' should
+        # already have been moved to X's namespace, so it's fine to move 'neigh_dist'.
+        if sklearn_check_version("1.9"):
+            xp, _, device = get_namespace_and_device(y_train)
+            neigh_dist = move_to(neigh_dist, xp=xp, device=device)
+            neigh_ind = move_to(neigh_ind, xp=xp, device=device)
+        else:
+            xp, _ = get_namespace(y_train)
             device = getattr(y_train, "device", None)
-            neigh_dist = xp.asarray(neigh_dist, device=device)
-            neigh_ind = xp.asarray(neigh_ind, device=device)
+            if not _is_numpy_namespace(xp):
+                neigh_dist = xp.asarray(neigh_dist, device=device)
+                neigh_ind = xp.asarray(neigh_ind, device=device)
 
         weights = self._get_weights(neigh_dist, weights_param)
 
@@ -113,9 +130,7 @@ def _compute_weighted_prediction(self, neigh_dist, neigh_ind, weights_param, y_t
             y_pred_shape = (neigh_ind.shape[0], _y.shape[1])
             if not _is_numpy_namespace(xp):
                 # Array API: pass device to ensure same device as input
-                y_pred = xp.empty(
-                    y_pred_shape, dtype=neigh_dist.dtype, device=neigh_ind.device
-                )
+                y_pred = xp.empty(y_pred_shape, dtype=neigh_dist.dtype, device=device)
             else:
                 # Numpy: no device parameter
                 y_pred = xp.empty(y_pred_shape, dtype=neigh_dist.dtype)
@@ -164,11 +179,16 @@ def _compute_class_probabilities(
         array-like
             Class probabilities.
         """
-        xp, _ = get_namespace(y_train)
-        if not _is_numpy_namespace(xp):
+        if sklearn_check_version("1.9"):
+            xp, _, device = get_namespace_and_device(y_train)
+            neigh_dist = move_to(neigh_dist, xp=xp, device=device)
+            neigh_ind = move_to(neigh_ind, xp=xp, device=device)
+        else:
+            xp, _ = get_namespace(y_train)
             device = getattr(y_train, "device", None)
-            neigh_dist = xp.asarray(neigh_dist, device=device)
-            neigh_ind = xp.asarray(neigh_ind, device=device)
+            if not _is_numpy_namespace(xp):
+                neigh_dist = xp.asarray(neigh_dist, device=device)
+                neigh_ind = xp.asarray(neigh_ind, device=device)
 
         _y = y_train
         classes_ = classes
@@ -207,9 +227,9 @@ def _compute_class_probabilities(
                 proba_k = xp.zeros(
                     (n_classes, n_queries),
                     dtype=neigh_dist.dtype,
-                    device=neigh_dist.device,
+                    device=device,
                 )
-                zero = xp.zeros(1, dtype=neigh_dist.dtype, device=neigh_dist.device)
+                zero = xp.zeros(1, dtype=neigh_dist.dtype, device=device)
                 for c in range(n_classes):
                     mask = pred_labels == c
                     proba_k[c, :] = xp.sum(xp.where(mask, weights_k, zero), axis=1)
@@ -654,6 +674,8 @@ def _onedal_gpu_supported(self, method_name, *data):
     def _onedal_cpu_supported(self, method_name, *data):
         return self._onedal_supported("cpu", method_name, *data)
 
+    # Note: since this transfers the data to host, it doesn't validate
+    # that the array namespaces and devices of 'X' and '_fit_X' match.
     def kneighbors_graph(self, X=None, n_neighbors=None, mode="connectivity"):
         check_is_fitted(self)
         if n_neighbors is None:
diff --git a/sklearnex/neighbors/knn_classification.py b/sklearnex/neighbors/knn_classification.py
index e4383da4c5..9788722a20 100755
--- a/sklearnex/neighbors/knn_classification.py
+++ b/sklearnex/neighbors/knn_classification.py
@@ -33,6 +33,13 @@
 from ..utils.validation import validate_data
 from .common import KNeighborsDispatchingBase
 
+if sklearn_check_version("1.9"):
+    from sklearn.utils._array_api import (
+        check_same_namespace,
+        get_namespace_and_device,
+        move_to,
+    )
+
 
 @enable_array_api
 @control_n_jobs(
@@ -72,7 +79,12 @@ def __init__(
         )
 
     def fit(self, X, y):
-        xp, is_array_api = get_namespace(X)
+        if sklearn_check_version("1.9"):
+            xp, is_array_api, device = get_namespace_and_device(X)
+        else:
+            xp, is_array_api = get_namespace(X)
+            device = getattr(X, "device", None)
+
         dispatch(
             self,
             "fit",
@@ -86,7 +98,6 @@ def fit(self, X, y):
         # Ensure _fit_X matches the input namespace so that
         # kneighbors(X=None) can use get_namespace(self._fit_X).
         if is_array_api and not _is_numpy_namespace(xp):
-            device = getattr(X, "device", None)
             self._fit_X = xp.asarray(self._fit_X, device=device)
         return self
 
@@ -169,7 +180,7 @@ def _onedal_fit(self, X, y, queue=None):
         )
 
         # Process classification targets before passing to onedal
-        self._process_classification_targets(y, skip_validation=False)
+        self._process_classification_targets(X, y, skip_validation=False)
 
         # Call onedal backend
         onedal_params = {
@@ -200,7 +211,7 @@ def _onedal_fit(self, X, y, queue=None):
         # Post-processing
         self._save_attributes()
 
-    def _process_classification_targets(self, y, skip_validation=False):
+    def _process_classification_targets(self, X, y, skip_validation=False):
         """Process classification targets and set class-related attributes.
 
         Parameters
@@ -246,6 +257,10 @@ def _process_classification_targets(self, y, skip_validation=False):
             self.classes_ = self.classes_[0]
             self._y = xp.reshape(self._y, (-1,))
 
+        if sklearn_check_version("1.9"):
+            xp_X, _, device = get_namespace_and_device(X)
+            self._y = move_to(self._y, xp=xp_X, device=device)
+
     def _onedal_predict(self, X, queue=None):
         if X is not None:
             xp, _ = get_namespace(X)
@@ -256,14 +271,20 @@ def _onedal_predict(self, X, queue=None):
                 accept_sparse="csr",
                 reset=False,
             )
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="predict")
 
         params = self._onedal_estimator._get_onedal_params(X)
         params["result_option"] = "responses"
         result = self._onedal_estimator._onedal_predict(
             self._onedal_estimator._onedal_model, X, params
         )
-        xp, _ = get_namespace(X)
         responses = from_table(result.responses, like=X)
+        if sklearn_check_version("1.9"):
+            xp, _, device = get_namespace_and_device(self.classes_)
+            responses = move_to(responses, xp=xp, device=device)
+        else:
+            xp, _ = get_namespace(X)
         return xp.take(
             self.classes_, xp.asarray(xp.reshape(responses, (-1,)), dtype=xp.int64)
         )
@@ -278,6 +299,8 @@ def _onedal_predict_proba(self, X, queue=None):
                 accept_sparse="csr",
                 reset=False,
             )
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="predict_proba")
 
         neigh_dist, neigh_ind = self._onedal_estimator.kneighbors(X)
 
@@ -299,6 +322,8 @@ def _onedal_kneighbors(
                 accept_sparse="csr",
                 reset=False,
             )
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="kneighbors")
         else:
             query_is_train = True
             X = self._fit_X
diff --git a/sklearnex/neighbors/knn_regression.py b/sklearnex/neighbors/knn_regression.py
index 9b2cb58e62..64089c0121 100755
--- a/sklearnex/neighbors/knn_regression.py
+++ b/sklearnex/neighbors/knn_regression.py
@@ -31,6 +31,13 @@
 from ..utils.validation import validate_data
 from .common import KNeighborsDispatchingBase
 
+if sklearn_check_version("1.9"):
+    from sklearn.utils._array_api import (
+        check_same_namespace,
+        get_namespace_and_device,
+        move_to,
+    )
+
 
 @enable_array_api("1.5")  # validate_data y_numeric requires sklearn >=1.5
 @control_n_jobs(decorated_methods=["fit", "predict", "kneighbors", "score"])
@@ -68,7 +75,11 @@ def __init__(
         )
 
     def fit(self, X, y):
-        xp, is_array_api = get_namespace(X)
+        if sklearn_check_version("1.9"):
+            xp, is_array_api, device = get_namespace_and_device(X)
+        else:
+            xp, is_array_api = get_namespace(X)
+            device = getattr(X, "device", None)
         dispatch(
             self,
             "fit",
@@ -82,7 +93,6 @@ def fit(self, X, y):
         # Ensure _fit_X matches the input namespace so that
         # kneighbors(X=None) can use get_namespace(self._fit_X).
         if is_array_api and not _is_numpy_namespace(xp):
-            device = getattr(X, "device", None)
             self._fit_X = xp.asarray(self._fit_X, device=device)
         return self
 
@@ -138,7 +148,10 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         )
 
     def _onedal_fit(self, X, y, queue=None):
-        xp, _ = get_namespace(X, y)
+        if sklearn_check_version("1.9"):
+            xp, _, device = get_namespace_and_device(X)
+        else:
+            xp, _ = get_namespace(X, y)
         self._set_effective_metric()
 
         X, y = validate_data(
@@ -151,6 +164,9 @@ def _onedal_fit(self, X, y, queue=None):
             y_numeric=True,
         )
 
+        if sklearn_check_version("1.9"):
+            y = move_to(y, xp=xp, device=device)
+
         self._process_regression_targets(y)
         onedal_params = {
             "n_neighbors": self.n_neighbors,
@@ -215,6 +231,13 @@ def _predict_gpu(self, X, queue=None):
                 accept_sparse="csr",
                 reset=False,
             )
+            # Note: if called before 'validate_data', this check would fail if 'X' is
+            # a 'DataFrame', since '_fit_X' would have already been converted to NumPy.
+            # Hence, it must come after the call to 'validate_data'. If the behavior
+            # of this validator changes in scikit-learn, these checks could be done
+            # earlier in the code for quicker errors.
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="predict")
         result = self._onedal_estimator._predict_gpu(X)
         return result
 
@@ -246,6 +269,8 @@ def _predict_skl(self, X, queue=None):
             X = validate_data(
                 self, X, dtype=[xp.float64, xp.float32], accept_sparse="csr", reset=False
             )
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="predict")
         return self._predict_skl_regression(X)
 
     def _onedal_kneighbors(
@@ -262,6 +287,8 @@ def _onedal_kneighbors(
                 accept_sparse="csr",
                 reset=False,
             )
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="kenighbors")
         else:
             query_is_train = True
             X = self._fit_X
diff --git a/sklearnex/neighbors/knn_unsupervised.py b/sklearnex/neighbors/knn_unsupervised.py
index 19b48a81ef..4c06c471d5 100755
--- a/sklearnex/neighbors/knn_unsupervised.py
+++ b/sklearnex/neighbors/knn_unsupervised.py
@@ -29,6 +29,9 @@
 from ..utils.validation import validate_data
 from .common import KNeighborsDispatchingBase
 
+if sklearn_check_version("1.9"):
+    from sklearn.utils._array_api import check_same_namespace, get_namespace_and_device
+
 
 @enable_array_api
 @control_n_jobs(decorated_methods=["fit", "kneighbors", "radius_neighbors"])
@@ -66,7 +69,12 @@ def __init__(
         )
 
     def fit(self, X, y=None):
-        xp, is_array_api = get_namespace(X)
+        if sklearn_check_version("1.9"):
+            xp, is_array_api, device = get_namespace_and_device(X)
+        else:
+            xp, is_array_api = get_namespace(X)
+            device = getattr(X, "device", None)
+
         dispatch(
             self,
             "fit",
@@ -80,7 +88,6 @@ def fit(self, X, y=None):
         # Ensure _fit_X matches the input namespace so that
         # kneighbors(X=None) can use get_namespace(self._fit_X).
         if is_array_api and not _is_numpy_namespace(xp):
-            device = getattr(X, "device", None)
             self._fit_X = xp.asarray(self._fit_X, device=device)
         return self
 
@@ -189,6 +196,8 @@ def _onedal_predict(self, X, queue=None):
                 reset=False,
                 force_all_finite=False,
             )
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="predict")
         return self._onedal_estimator.predict(X, queue=queue)
 
     def _onedal_kneighbors(
@@ -205,6 +214,8 @@ def _onedal_kneighbors(
                 accept_sparse="csr",
                 reset=False,
             )
+            if sklearn_check_version("1.9"):
+                check_same_namespace(X, self, attribute="_fit_X", method="kneighbors")
         else:
             query_is_train = True
             X = self._fit_X
diff --git a/sklearnex/neighbors/tests/test_neighbors.py b/sklearnex/neighbors/tests/test_neighbors.py
index f1653f2291..0ba0989b13 100755
--- a/sklearnex/neighbors/tests/test_neighbors.py
+++ b/sklearnex/neighbors/tests/test_neighbors.py
@@ -14,16 +14,24 @@
 # limitations under the License.
 # ===============================================================================
 
+import array_api_strict
 import numpy as np
+import pandas as pd
 import pytest
 from numpy.testing import assert_allclose, assert_array_equal
 from sklearn import datasets
+from sklearn.base import is_regressor
 
+from daal4py.sklearn._utils import sklearn_check_version
 from onedal.tests.utils._dataframes_support import (
     _as_numpy,
     _convert_to_dataframe,
+    dpnp_available,
     get_dataframes_and_queues,
+    torch_available,
+    torch_xpu_available,
 )
+from onedal.tests.utils._device_selection import is_sycl_device_available
 from sklearnex.neighbors import (
     KNeighborsClassifier,
     KNeighborsRegressor,
@@ -31,6 +39,11 @@
     NearestNeighbors,
 )
 
+if dpnp_available:
+    import dpnp
+if torch_available:
+    import torch
+
 
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 def test_sklearnex_import_knn_classifier(dataframe, queue):
@@ -173,3 +186,146 @@ def test_p_present_if_metric_is_minkowski():
     assert knn.effective_metric_ == "minkowski"
     assert "p" in knn.effective_metric_params_
     assert knn.effective_metric_params_["p"] == 3
+
+
+# Note: this doesn't check 'kneighbors_graph', because that function
+# transfers the data to NumPy internally, so it will not necessarily
+# end up erroring out.
+@pytest.mark.skipif(
+    not sklearn_check_version("1.9"), reason="Functionality introduced in alter versions."
+)
+@pytest.mark.parametrize("weights", ["uniform", "distance"])
+def test_error_on_incompatible_namespaces(weights, with_array_api):
+    rng = np.random.default_rng(seed=123)
+    X = rng.standard_normal(size=(25, 3))
+    y = rng.standard_normal(size=X.shape[0])
+    Xa = array_api_strict.from_dlpack(X)
+    ya = array_api_strict.from_dlpack(y)
+
+    knn = KNeighborsRegressor(weights=weights).fit(X, y)
+
+    with pytest.raises(ValueError, match="same namespace"):
+        knn.predict(Xa)
+    with pytest.raises(ValueError, match="same namespace"):
+        knn.kneighbors(Xa)
+
+    knn = KNeighborsRegressor().fit(Xa, ya)
+    with pytest.raises(ValueError, match="same namespace"):
+        knn.predict(X)
+    with pytest.raises(ValueError, match="same namespace"):
+        knn.kneighbors(X)
+
+
+@pytest.mark.skipif(
+    not sklearn_check_version("1.9"),
+    reason="Functionality introduced in later scikit-learn versions.",
+)
+@pytest.mark.parametrize("X_xp", [np, pd, array_api_strict])
+@pytest.mark.parametrize("y_xp", [np, pd, array_api_strict])
+@pytest.mark.parametrize("weights", ["uniform", "distance"])
+@pytest.mark.parametrize("n_classes", [0, 2, 3])  # 0 == regression
+def test_mixed_array_namespaces(X_xp, y_xp, weights, n_classes, with_array_api):
+    rng = np.random.default_rng(seed=123)
+    X = rng.standard_normal(size=(50, 4))
+    if n_classes == 0:  # regressor
+        y = rng.standard_normal(size=X.shape[0])
+    else:
+        y = rng.integers(n_classes, size=X.shape[0])
+
+    if X_xp is pd:
+        X = pd.DataFrame(X)
+    else:
+        X = X_xp.asarray(X)
+    if y_xp is pd:
+        if n_classes != 0:
+            y = np.array(["a", "b", "c"])[y]
+        y = pd.Series(y)
+    else:
+        y = y_xp.asarray(y)
+
+    model = (KNeighborsClassifier if n_classes != 0 else KNeighborsRegressor)(
+        weights=weights
+    )
+    model.fit(X, y)
+    pred = model.predict(X)
+    _ = model.score(X, y)
+
+    _ = model.kneighbors(X)
+    _ = model.kneighbors_graph(X)
+
+    if n_classes != 0:
+        proba = model.predict_proba(X)
+        if X_xp == pd:
+            assert isinstance(proba, np.ndarray)
+        else:
+            assert proba.__class__ == X.__class__
+
+    if n_classes == 0:
+        assert pred.__class__ == (X.__class__ if X_xp is not pd else np.ndarray)
+    else:
+        assert pred.__class__ == (y.__class__ if y_xp is not pd else np.ndarray)
+
+    # Note: this is a quick check to ensure that the result has the same
+    # kind of values as the input. There's no particular justification
+    # behind requiring 25% classification accuracy.
+    if n_classes != 0:
+        if y_xp is pd:
+            y_xp = np
+        pred_is_correct = y_xp.astype(y_xp.asarray(pred == y), y_xp.float32)
+        assert y_xp.sum(pred_is_correct) >= (0.25 * int(X.shape[0]))
+
+
+@pytest.mark.skipif(
+    not sklearn_check_version("1.9"),
+    reason="Functionality introduced in later scikit-learn versions.",
+)
+@pytest.mark.skipif(
+    not is_sycl_device_available("gpu"), reason="Test checks GPU-specific functionality."
+)
+@pytest.mark.parametrize(
+    "X_xp, X_device",
+    ([(torch, "xpu"), (torch, "cpu")] if torch_xpu_available else [])
+    + ([(dpnp, "gpu"), (dpnp, "cpu")] if dpnp_available else []),
+)
+@pytest.mark.parametrize(
+    "y_xp, y_device",
+    ([(torch, "xpu"), (torch, "cpu")] if torch_xpu_available else [])
+    + ([(dpnp, "gpu"), (dpnp, "cpu")] if dpnp_available else [])
+    + [(pd, None)],
+)
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        KNeighborsRegressor(algorithm="brute"),
+        KNeighborsClassifier(algorithm="brute"),
+    ],
+)
+def test_knn_mixed_devices(X_xp, y_xp, X_device, y_device, estimator, with_array_api):
+    rng = np.random.default_rng(seed=123)
+    X = rng.standard_normal(size=(50, 4))
+    if is_regressor(estimator):
+        y = rng.standard_normal(size=X.shape[0])
+    else:
+        y = rng.integers(2, size=X.shape[0])
+
+    X = X_xp.asarray(X, device=X_device)
+    if y_xp is pd:
+        if is_regressor(estimator):
+            y = pd.Series(y)
+        else:
+            y = pd.Series(np.array(["a", "b"])[y])
+    else:
+        y = y_xp.asarray(y, device=y_device)
+
+    estimator.fit(X, y)
+    pred = estimator.predict(X)
+    if is_regressor(estimator):
+        assert pred.__class__ == X.__class__
+    else:
+        if y_xp is pd:
+            assert isinstance(pred, np.ndarray)
+        else:
+            assert pred.__class__ == y.__class__
+        proba = estimator.predict_proba(X)
+        assert proba.__class__ == X.__class__
+    _ = estimator.score(X, y)