|
| 1 | +# Authors: The MNE-Python contributors. |
| 2 | +# License: BSD-3-Clause |
| 3 | +# Copyright the MNE-Python contributors. |
| 4 | + |
| 5 | +try: |
| 6 | + from sklearn.utils.validation import validate_data |
| 7 | +except ImportError: |
| 8 | + from sklearn.utils.validation import check_array, check_X_y |
| 9 | + |
| 10 | + # Use a limited version pulled from sklearn 1.7 |
| 11 | + def validate_data( |
| 12 | + _estimator, |
| 13 | + /, |
| 14 | + X="no_validation", |
| 15 | + y="no_validation", |
| 16 | + reset=True, |
| 17 | + validate_separately=False, |
| 18 | + skip_check_array=False, |
| 19 | + **check_params, |
| 20 | + ): |
| 21 | + """Validate input data and set or check feature names and counts of the input. |
| 22 | +
|
| 23 | + This helper function should be used in an estimator that requires input |
| 24 | + validation. This mutates the estimator and sets the `n_features_in_` and |
| 25 | + `feature_names_in_` attributes if `reset=True`. |
| 26 | +
|
| 27 | + .. versionadded:: 1.6 |
| 28 | +
|
| 29 | + Parameters |
| 30 | + ---------- |
| 31 | + _estimator : estimator instance |
| 32 | + The estimator to validate the input for. |
| 33 | +
|
| 34 | + X : {array-like, sparse matrix, dataframe} of shape \ |
| 35 | + (n_samples, n_features), default='no validation' |
| 36 | + The input samples. |
| 37 | + If `'no_validation'`, no validation is performed on `X`. This is |
| 38 | + useful for meta-estimator which can delegate input validation to |
| 39 | + their underlying estimator(s). In that case `y` must be passed and |
| 40 | + the only accepted `check_params` are `multi_output` and |
| 41 | + `y_numeric`. |
| 42 | +
|
| 43 | + y : array-like of shape (n_samples,), default='no_validation' |
| 44 | + The targets. |
| 45 | +
|
| 46 | + - If `None`, :func:`~sklearn.utils.check_array` is called on `X`. If |
| 47 | + the estimator's `requires_y` tag is True, then an error will be raised. |
| 48 | + - If `'no_validation'`, :func:`~sklearn.utils.check_array` is called |
| 49 | + on `X` and the estimator's `requires_y` tag is ignored. This is a default |
| 50 | + placeholder and is never meant to be explicitly set. In that case `X` must |
| 51 | + be passed. |
| 52 | + - Otherwise, only `y` with `_check_y` or both `X` and `y` are checked with |
| 53 | + either :func:`~sklearn.utils.check_array` or |
| 54 | + :func:`~sklearn.utils.check_X_y` depending on `validate_separately`. |
| 55 | +
|
| 56 | + reset : bool, default=True |
| 57 | + Whether to reset the `n_features_in_` attribute. |
| 58 | + If False, the input will be checked for consistency with data |
| 59 | + provided when reset was last True. |
| 60 | +
|
| 61 | + .. note:: |
| 62 | +
|
| 63 | + It is recommended to call `reset=True` in `fit` and in the first |
| 64 | + call to `partial_fit`. All other methods that validate `X` |
| 65 | + should set `reset=False`. |
| 66 | +
|
| 67 | + validate_separately : False or tuple of dicts, default=False |
| 68 | + Only used if `y` is not `None`. |
| 69 | + If `False`, call :func:`~sklearn.utils.check_X_y`. Else, it must be a tuple |
| 70 | + of kwargs to be used for calling :func:`~sklearn.utils.check_array` on `X` |
| 71 | + and `y` respectively. |
| 72 | +
|
| 73 | + `estimator=self` is automatically added to these dicts to generate |
| 74 | + more informative error message in case of invalid input data. |
| 75 | +
|
| 76 | + skip_check_array : bool, default=False |
| 77 | + If `True`, `X` and `y` are unchanged and only `feature_names_in_` and |
| 78 | + `n_features_in_` are checked. Otherwise, :func:`~sklearn.utils.check_array` |
| 79 | + is called on `X` and `y`. |
| 80 | +
|
| 81 | + **check_params : kwargs |
| 82 | + Parameters passed to :func:`~sklearn.utils.check_array` or |
| 83 | + :func:`~sklearn.utils.check_X_y`. Ignored if validate_separately |
| 84 | + is not False. |
| 85 | +
|
| 86 | + `estimator=self` is automatically added to these params to generate |
| 87 | + more informative error message in case of invalid input data. |
| 88 | +
|
| 89 | + Returns |
| 90 | + ------- |
| 91 | + out : {ndarray, sparse matrix} or tuple of these |
| 92 | + The validated input. A tuple is returned if both `X` and `y` are |
| 93 | + validated. |
| 94 | + """ |
| 95 | + no_val_X = isinstance(X, str) and X == "no_validation" |
| 96 | + no_val_y = y is None or (isinstance(y, str) and y == "no_validation") |
| 97 | + |
| 98 | + if no_val_X and no_val_y: |
| 99 | + raise ValueError("Validation should be done on X, y or both.") |
| 100 | + |
| 101 | + default_check_params = {"estimator": _estimator} |
| 102 | + check_params = {**default_check_params, **check_params} |
| 103 | + |
| 104 | + if skip_check_array: |
| 105 | + if not no_val_X and no_val_y: |
| 106 | + out = X |
| 107 | + elif no_val_X and not no_val_y: |
| 108 | + out = y |
| 109 | + else: |
| 110 | + out = X, y |
| 111 | + elif not no_val_X and no_val_y: |
| 112 | + out = check_array(X, input_name="X", **check_params) |
| 113 | + elif no_val_X and not no_val_y: |
| 114 | + out = check_array(y, input_name="y", **check_params) |
| 115 | + else: |
| 116 | + if validate_separately: |
| 117 | + # We need this because some estimators validate X and y |
| 118 | + # separately, and in general, separately calling check_array() |
| 119 | + # on X and y isn't equivalent to just calling check_X_y() |
| 120 | + # :( |
| 121 | + check_X_params, check_y_params = validate_separately |
| 122 | + if "estimator" not in check_X_params: |
| 123 | + check_X_params = {**default_check_params, **check_X_params} |
| 124 | + X = check_array(X, input_name="X", **check_X_params) |
| 125 | + if "estimator" not in check_y_params: |
| 126 | + check_y_params = {**default_check_params, **check_y_params} |
| 127 | + y = check_array(y, input_name="y", **check_y_params) |
| 128 | + else: |
| 129 | + X, y = check_X_y(X, y, **check_params) |
| 130 | + out = X, y |
| 131 | + |
| 132 | + return out |
0 commit comments