diff --git a/CHANGELOG.md b/CHANGELOG.md index 97e4aaf145..1bd16e2675 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `pretty_print_df` function for printing shortened versions of dataframes - Basic Transfer Learning example - Repo now has reminders (https://github.com/marketplace/actions/issue-reminder) enabled +- `mypy` for recommenders ### Changed - `Recommender`s now share their core logic via their base class diff --git a/baybe/recommenders/deprecation.py b/baybe/recommenders/deprecation.py index 4f035ebe20..580897d437 100644 --- a/baybe/recommenders/deprecation.py +++ b/baybe/recommenders/deprecation.py @@ -36,6 +36,6 @@ def structure_recommender_protocol(val: dict, _) -> RecommenderProtocol: f"a future version.", DeprecationWarning, ) - fun = make_dict_structure_fn(cls, converter) + fun = make_dict_structure_fn(cls, converter) # type: ignore return fun(val, cls) diff --git a/baybe/recommenders/meta/sequential.py b/baybe/recommenders/meta/sequential.py index 4facd081c8..cf5a63942c 100644 --- a/baybe/recommenders/meta/sequential.py +++ b/baybe/recommenders/meta/sequential.py @@ -1,4 +1,7 @@ """Meta recommenders that switch recommenders based on the experimentation progress.""" +# TODO After bayesian recommenders are enabled with no training data, a refactoring of +# this file will resolve type errors +# mypy: disable-error-code="arg-type" from typing import Iterable, Iterator, List, Literal, Optional @@ -21,7 +24,7 @@ converter, ) -# TODO: Make predictive recommenders handle empty training data +# TODO: Make bayesian recommenders handle empty training data _unsupported_recommender_error = ValueError( f"For cases where no training is available, the selected recommender " f"must be a subclass of '{NonPredictiveRecommender.__name__}'." @@ -62,7 +65,7 @@ def select_recommender( # noqa: D102 ) -> PureRecommender: # See base class. - # FIXME: enable predictive recommenders for empty training data + # TODO: enable bayesian recommenders for empty training data if (train_x is None or len(train_x) == 0) and not isinstance( self.initial_recommender, NonPredictiveRecommender ): @@ -167,7 +170,7 @@ def select_recommender( # noqa: D102 # Remember the training dataset size for the next call self._n_last_measurements = len(train_x) - # FIXME: enable predictive recommenders for empty training data + # TODO: enable bayesian recommenders for empty training data if (train_x is None or len(train_x) == 0) and not isinstance( recommender, NonPredictiveRecommender ): @@ -249,13 +252,13 @@ def select_recommender( # noqa: D102 # Remember the training dataset size for the next call self._n_last_measurements = len(train_x) - # FIXME: enable predictive recommenders for empty training data + # TODO: enable bayesian recommenders for empty training data if (train_x is None or len(train_x) == 0) and not isinstance( self._last_recommender, NonPredictiveRecommender ): raise _unsupported_recommender_error - return self._last_recommender + return self._last_recommender # type: ignore[return-value] # The recommender iterable cannot be serialized diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py index bc2d4e36b4..3133fae070 100644 --- a/baybe/recommenders/naive.py +++ b/baybe/recommenders/naive.py @@ -1,10 +1,11 @@ """Naive recommender for hybrid spaces.""" import warnings -from typing import ClassVar, Optional +from typing import ClassVar, Optional, cast import pandas as pd from attrs import define, evolve, field, fields +from torch import Tensor from baybe.acquisition import PartialAcquisitionFunction from baybe.recommenders.pure.base import PureRecommender @@ -85,11 +86,9 @@ def recommend( # noqa: D102 ) -> pd.DataFrame: # See base class. - # First check whether the disc_recommender is either bayesian or non-predictive - is_bayesian_recommender = isinstance(self.disc_recommender, BayesianRecommender) - is_np_recommender = isinstance(self.disc_recommender, NonPredictiveRecommender) - - if (not is_bayesian_recommender) and (not is_np_recommender): + if (not isinstance(self.disc_recommender, BayesianRecommender)) and ( + not isinstance(self.disc_recommender, NonPredictiveRecommender) + ): raise NotImplementedError( """The discrete recommender should be either a Bayesian or a NonPredictiveRecommender.""" @@ -117,7 +116,7 @@ def recommend( # noqa: D102 # will then be attached to every discrete point when the acquisition function # is evaluated. cont_part = searchspace.continuous.samples_random(1) - cont_part = to_tensor(cont_part).unsqueeze(-2) + cont_part_tensor = cast(Tensor, to_tensor(cont_part)).unsqueeze(-2) # Get discrete candidates. The metadata flags are ignored since the search space # is hybrid @@ -128,7 +127,7 @@ def recommend( # noqa: D102 ) # We now check whether the discrete recommender is bayesian. - if is_bayesian_recommender: + if isinstance(self.disc_recommender, BayesianRecommender): # Get access to the recommenders acquisition function self.disc_recommender.setup_acquisition_function( searchspace, train_x, train_y @@ -138,7 +137,7 @@ def recommend( # noqa: D102 # whenever evaluating the acquisition function disc_acqf_part = PartialAcquisitionFunction( acqf=self.disc_recommender._acquisition_function, - pinned_part=cont_part, + pinned_part=cont_part_tensor, pin_discrete=False, ) @@ -154,7 +153,7 @@ def recommend( # noqa: D102 # Get one random discrete point that will be attached when evaluating the # acquisition function in the discrete space. disc_part = searchspace.discrete.comp_rep.loc[disc_rec_idx].sample(1) - disc_part = to_tensor(disc_part).unsqueeze(-2) + disc_part_tensor = cast(Tensor, to_tensor(disc_part)).unsqueeze(-2) # Setup a fresh acquisition function for the continuous recommender self.cont_recommender.setup_acquisition_function(searchspace, train_x, train_y) @@ -162,7 +161,7 @@ def recommend( # noqa: D102 # Construct the continuous space as a standalone space cont_acqf_part = PartialAcquisitionFunction( acqf=self.cont_recommender._acquisition_function, - pinned_part=disc_part, + pinned_part=disc_part_tensor, pin_discrete=True, ) self.cont_recommender._acquisition_function = cont_acqf_part diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index 8816e853d9..fdbb8b02f7 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -68,7 +68,10 @@ def _get_acquisition_function_cls( return fun def setup_acquisition_function( - self, searchspace: SearchSpace, train_x: pd.DataFrame, train_y: pd.DataFrame + self, + searchspace: SearchSpace, + train_x: Optional[pd.DataFrame] = None, + train_y: Optional[pd.DataFrame] = None, ) -> None: """Create the current acquisition function from provided training data. @@ -79,7 +82,15 @@ def setup_acquisition_function( searchspace: The search space in which the experiments are to be conducted. train_x: The features of the conducted experiments. train_y: The corresponding response values. + + Raises: + NotImplementedError: If the setup is attempted from empty training data """ + if train_x is None or train_y is None: + raise NotImplementedError( + "Bayesian recommenders do not support empty training data yet." + ) + best_f = train_y.max() surrogate_model = self._fit(searchspace, train_x, train_y) acquisition_function_cls = self._get_acquisition_function_cls() diff --git a/baybe/recommenders/pure/bayesian/sequential_greedy.py b/baybe/recommenders/pure/bayesian/sequential_greedy.py index 444a5b1bd7..bc4c8768ef 100644 --- a/baybe/recommenders/pure/bayesian/sequential_greedy.py +++ b/baybe/recommenders/pure/bayesian/sequential_greedy.py @@ -178,7 +178,7 @@ def _recommend_hybrid( # TODO: Currently assumes that discrete parameters are first and continuous # second. Once parameter redesign [11611] is completed, we might adjust # this. - candidates_comp.columns = list(range(len(candidates_comp.columns))) + candidates_comp.columns = list(range(len(candidates_comp.columns))) # type: ignore[assignment] fixed_features_list = candidates_comp.to_dict("records") else: diff --git a/baybe/recommenders/pure/nonpredictive/clustering.py b/baybe/recommenders/pure/nonpredictive/clustering.py index 07cec021c8..dfeb51e406 100644 --- a/baybe/recommenders/pure/nonpredictive/clustering.py +++ b/baybe/recommenders/pure/nonpredictive/clustering.py @@ -1,12 +1,13 @@ """Recommendation strategies based on clustering.""" from abc import ABC -from typing import ClassVar, List, Type, TypeVar +from typing import ClassVar, List, Type, Union import numpy as np import pandas as pd from attrs import define, field from scipy.stats import multivariate_normal +from sklearn.base import ClusterMixin from sklearn.cluster import KMeans from sklearn.metrics import pairwise_distances from sklearn.mixture import GaussianMixture @@ -16,8 +17,6 @@ from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender from baybe.searchspace import SearchSpaceType, SubspaceDiscrete -_ScikitLearnModel = TypeVar("_ScikitLearnModel") - @define class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC): @@ -41,7 +40,7 @@ class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC): # that checks if a custom mechanism is implemented and uses default otherwise # (similar to what is done in the recommenders) - model_class: ClassVar[Type[_ScikitLearnModel]] + model_class: ClassVar[Type[ClusterMixin]] """Class variable describing the model class.""" model_cluster_num_parameter_name: ClassVar[str] @@ -57,8 +56,8 @@ class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC): def _make_selection_default( self, - model: _ScikitLearnModel, - candidates_scaled: pd.DataFrame, + model: ClusterMixin, + candidates_scaled: Union[pd.DataFrame, np.ndarray], ) -> List[int]: """Select one candidate from each cluster uniformly at random. @@ -80,8 +79,8 @@ def _make_selection_default( def _make_selection_custom( self, - model: _ScikitLearnModel, - candidates_scaled: pd.DataFrame, + model: ClusterMixin, + candidates_scaled: Union[pd.DataFrame, np.ndarray], ) -> List[int]: """Select candidates from the computed clustering. @@ -136,7 +135,7 @@ def _recommend_discrete( class PAMClusteringRecommender(SKLearnClusteringRecommender): """Partitioning Around Medoids (PAM) clustering recommender.""" - model_class: ClassVar[Type[_ScikitLearnModel]] = KMedoids + model_class: ClassVar[Type[ClusterMixin]] = KMedoids # See base class. model_cluster_num_parameter_name: ClassVar[str] = "n_clusters" @@ -156,8 +155,8 @@ def _default_model_params(self) -> dict: def _make_selection_custom( self, - model: _ScikitLearnModel, - candidates_scaled: pd.DataFrame, + model: ClusterMixin, + candidates_scaled: Union[pd.DataFrame, np.ndarray], ) -> List[int]: """Select candidates from the computed clustering. @@ -180,7 +179,7 @@ class KMeansClusteringRecommender(SKLearnClusteringRecommender): """K-means clustering recommender.""" # Class variables - model_class: ClassVar[Type[_ScikitLearnModel]] = KMeans + model_class: ClassVar[Type[ClusterMixin]] = KMeans # See base class. model_cluster_num_parameter_name: ClassVar[str] = "n_clusters" @@ -200,8 +199,8 @@ def _default_model_params(self) -> dict: def _make_selection_custom( self, - model: _ScikitLearnModel, - candidates_scaled: pd.DataFrame, + model: ClusterMixin, + candidates_scaled: Union[pd.DataFrame, np.ndarray], ) -> List[int]: """Select candidates from the computed clustering. @@ -232,7 +231,7 @@ class GaussianMixtureClusteringRecommender(SKLearnClusteringRecommender): """Gaussian mixture model (GMM) clustering recommender.""" # Class variables - model_class: ClassVar[Type[_ScikitLearnModel]] = GaussianMixture + model_class: ClassVar[Type[ClusterMixin]] = GaussianMixture # See base class. model_cluster_num_parameter_name: ClassVar[str] = "n_components" @@ -240,8 +239,8 @@ class GaussianMixtureClusteringRecommender(SKLearnClusteringRecommender): def _make_selection_custom( self, - model: _ScikitLearnModel, - candidates_scaled: pd.DataFrame, + model: ClusterMixin, + candidates_scaled: Union[pd.DataFrame, np.ndarray], ) -> List[int]: """Select candidates from the computed clustering. diff --git a/mypy.ini b/mypy.ini index ef9147e251..a4cfd815a5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -3,8 +3,7 @@ packages = baybe ; at some point, these excludes should all be gone ... exclude = (?x)( - baybe/recommenders - | baybe/searchspace + baybe/searchspace | baybe/serialization | baybe/strategies | baybe/surrogates @@ -18,6 +17,12 @@ exclude = (?x)( | baybe/surrogate.py ) +[mypy-botorch.acquisition] +ignore_missing_imports = True + +[mypy-botorch.optim] +ignore_missing_imports = True + [mypy-botorch.test_functions] ignore_missing_imports = True @@ -27,9 +32,27 @@ ignore_missing_imports = True [mypy-scipy.spatial.distance] ignore_missing_imports = True +[mypy-scipy.stats] +ignore_missing_imports = True + +[mypy-sklearn.base] +ignore_missing_imports = True + +[mypy-sklearn.cluster] +ignore_missing_imports = True + [mypy-sklearn.metrics] ignore_missing_imports = True +[mypy-sklearn.mixture] +ignore_missing_imports = True + +[mypy-sklearn.preprocessing] +ignore_missing_imports = True + +[mypy-sklearn_extra.cluster] +ignore_missing_imports = True + [mypy-joblib] ignore_missing_imports = True