diff --git a/CHANGELOG.md b/CHANGELOG.md
index 97e4aaf145..1bd16e2675 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `pretty_print_df` function for printing shortened versions of dataframes
 - Basic Transfer Learning example
 - Repo now has reminders (https://github.com/marketplace/actions/issue-reminder) enabled
+- `mypy` for recommenders
 
 ### Changed
 - `Recommender`s now share their core logic via their base class
diff --git a/baybe/recommenders/deprecation.py b/baybe/recommenders/deprecation.py
index 4f035ebe20..580897d437 100644
--- a/baybe/recommenders/deprecation.py
+++ b/baybe/recommenders/deprecation.py
@@ -36,6 +36,6 @@ def structure_recommender_protocol(val: dict, _) -> RecommenderProtocol:
             f"a future version.",
             DeprecationWarning,
         )
-    fun = make_dict_structure_fn(cls, converter)
+    fun = make_dict_structure_fn(cls, converter)  # type: ignore
 
     return fun(val, cls)
diff --git a/baybe/recommenders/meta/sequential.py b/baybe/recommenders/meta/sequential.py
index 4facd081c8..cf5a63942c 100644
--- a/baybe/recommenders/meta/sequential.py
+++ b/baybe/recommenders/meta/sequential.py
@@ -1,4 +1,7 @@
 """Meta recommenders that switch recommenders based on the experimentation progress."""
+# TODO After bayesian recommenders are enabled with no training data, a refactoring of
+#  this file will resolve type errors
+# mypy: disable-error-code="arg-type"
 
 from typing import Iterable, Iterator, List, Literal, Optional
 
@@ -21,7 +24,7 @@
     converter,
 )
 
-# TODO: Make predictive recommenders handle empty training data
+# TODO: Make bayesian recommenders handle empty training data
 _unsupported_recommender_error = ValueError(
     f"For cases where no training is available, the selected recommender "
     f"must be a subclass of '{NonPredictiveRecommender.__name__}'."
@@ -62,7 +65,7 @@ def select_recommender(  # noqa: D102
     ) -> PureRecommender:
         # See base class.
 
-        # FIXME: enable predictive recommenders for empty training data
+        # TODO: enable bayesian recommenders for empty training data
         if (train_x is None or len(train_x) == 0) and not isinstance(
             self.initial_recommender, NonPredictiveRecommender
         ):
@@ -167,7 +170,7 @@ def select_recommender(  # noqa: D102
         # Remember the training dataset size for the next call
         self._n_last_measurements = len(train_x)
 
-        # FIXME: enable predictive recommenders for empty training data
+        # TODO: enable bayesian recommenders for empty training data
         if (train_x is None or len(train_x) == 0) and not isinstance(
             recommender, NonPredictiveRecommender
         ):
@@ -249,13 +252,13 @@ def select_recommender(  # noqa: D102
         # Remember the training dataset size for the next call
         self._n_last_measurements = len(train_x)
 
-        # FIXME: enable predictive recommenders for empty training data
+        # TODO: enable bayesian recommenders for empty training data
         if (train_x is None or len(train_x) == 0) and not isinstance(
             self._last_recommender, NonPredictiveRecommender
         ):
             raise _unsupported_recommender_error
 
-        return self._last_recommender
+        return self._last_recommender  # type: ignore[return-value]
 
 
 # The recommender iterable cannot be serialized
diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py
index bc2d4e36b4..3133fae070 100644
--- a/baybe/recommenders/naive.py
+++ b/baybe/recommenders/naive.py
@@ -1,10 +1,11 @@
 """Naive recommender for hybrid spaces."""
 
 import warnings
-from typing import ClassVar, Optional
+from typing import ClassVar, Optional, cast
 
 import pandas as pd
 from attrs import define, evolve, field, fields
+from torch import Tensor
 
 from baybe.acquisition import PartialAcquisitionFunction
 from baybe.recommenders.pure.base import PureRecommender
@@ -85,11 +86,9 @@ def recommend(  # noqa: D102
     ) -> pd.DataFrame:
         # See base class.
 
-        # First check whether the disc_recommender is either bayesian or non-predictive
-        is_bayesian_recommender = isinstance(self.disc_recommender, BayesianRecommender)
-        is_np_recommender = isinstance(self.disc_recommender, NonPredictiveRecommender)
-
-        if (not is_bayesian_recommender) and (not is_np_recommender):
+        if (not isinstance(self.disc_recommender, BayesianRecommender)) and (
+            not isinstance(self.disc_recommender, NonPredictiveRecommender)
+        ):
             raise NotImplementedError(
                 """The discrete recommender should be either a Bayesian or a
                 NonPredictiveRecommender."""
@@ -117,7 +116,7 @@ def recommend(  # noqa: D102
         # will then be attached to every discrete point when the acquisition function
         # is evaluated.
         cont_part = searchspace.continuous.samples_random(1)
-        cont_part = to_tensor(cont_part).unsqueeze(-2)
+        cont_part_tensor = cast(Tensor, to_tensor(cont_part)).unsqueeze(-2)
 
         # Get discrete candidates. The metadata flags are ignored since the search space
         # is hybrid
@@ -128,7 +127,7 @@ def recommend(  # noqa: D102
         )
 
         # We now check whether the discrete recommender is bayesian.
-        if is_bayesian_recommender:
+        if isinstance(self.disc_recommender, BayesianRecommender):
             # Get access to the recommenders acquisition function
             self.disc_recommender.setup_acquisition_function(
                 searchspace, train_x, train_y
@@ -138,7 +137,7 @@ def recommend(  # noqa: D102
             # whenever evaluating the acquisition function
             disc_acqf_part = PartialAcquisitionFunction(
                 acqf=self.disc_recommender._acquisition_function,
-                pinned_part=cont_part,
+                pinned_part=cont_part_tensor,
                 pin_discrete=False,
             )
 
@@ -154,7 +153,7 @@ def recommend(  # noqa: D102
         # Get one random discrete point that will be attached when evaluating the
         # acquisition function in the discrete space.
         disc_part = searchspace.discrete.comp_rep.loc[disc_rec_idx].sample(1)
-        disc_part = to_tensor(disc_part).unsqueeze(-2)
+        disc_part_tensor = cast(Tensor, to_tensor(disc_part)).unsqueeze(-2)
 
         # Setup a fresh acquisition function for the continuous recommender
         self.cont_recommender.setup_acquisition_function(searchspace, train_x, train_y)
@@ -162,7 +161,7 @@ def recommend(  # noqa: D102
         # Construct the continuous space as a standalone space
         cont_acqf_part = PartialAcquisitionFunction(
             acqf=self.cont_recommender._acquisition_function,
-            pinned_part=disc_part,
+            pinned_part=disc_part_tensor,
             pin_discrete=True,
         )
         self.cont_recommender._acquisition_function = cont_acqf_part
diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py
index 8816e853d9..fdbb8b02f7 100644
--- a/baybe/recommenders/pure/bayesian/base.py
+++ b/baybe/recommenders/pure/bayesian/base.py
@@ -68,7 +68,10 @@ def _get_acquisition_function_cls(
         return fun
 
     def setup_acquisition_function(
-        self, searchspace: SearchSpace, train_x: pd.DataFrame, train_y: pd.DataFrame
+        self,
+        searchspace: SearchSpace,
+        train_x: Optional[pd.DataFrame] = None,
+        train_y: Optional[pd.DataFrame] = None,
     ) -> None:
         """Create the current acquisition function from provided training data.
 
@@ -79,7 +82,15 @@ def setup_acquisition_function(
             searchspace: The search space in which the experiments are to be conducted.
             train_x: The features of the conducted experiments.
             train_y: The corresponding response values.
+
+        Raises:
+            NotImplementedError: If the setup is attempted from empty training data
         """
+        if train_x is None or train_y is None:
+            raise NotImplementedError(
+                "Bayesian recommenders do not support empty training data yet."
+            )
+
         best_f = train_y.max()
         surrogate_model = self._fit(searchspace, train_x, train_y)
         acquisition_function_cls = self._get_acquisition_function_cls()
diff --git a/baybe/recommenders/pure/bayesian/sequential_greedy.py b/baybe/recommenders/pure/bayesian/sequential_greedy.py
index 444a5b1bd7..bc4c8768ef 100644
--- a/baybe/recommenders/pure/bayesian/sequential_greedy.py
+++ b/baybe/recommenders/pure/bayesian/sequential_greedy.py
@@ -178,7 +178,7 @@ def _recommend_hybrid(
             # TODO: Currently assumes that discrete parameters are first and continuous
             #   second. Once parameter redesign [11611] is completed, we might adjust
             #   this.
-            candidates_comp.columns = list(range(len(candidates_comp.columns)))
+            candidates_comp.columns = list(range(len(candidates_comp.columns)))  # type: ignore[assignment]
             fixed_features_list = candidates_comp.to_dict("records")
 
         else:
diff --git a/baybe/recommenders/pure/nonpredictive/clustering.py b/baybe/recommenders/pure/nonpredictive/clustering.py
index 07cec021c8..dfeb51e406 100644
--- a/baybe/recommenders/pure/nonpredictive/clustering.py
+++ b/baybe/recommenders/pure/nonpredictive/clustering.py
@@ -1,12 +1,13 @@
 """Recommendation strategies based on clustering."""
 
 from abc import ABC
-from typing import ClassVar, List, Type, TypeVar
+from typing import ClassVar, List, Type, Union
 
 import numpy as np
 import pandas as pd
 from attrs import define, field
 from scipy.stats import multivariate_normal
+from sklearn.base import ClusterMixin
 from sklearn.cluster import KMeans
 from sklearn.metrics import pairwise_distances
 from sklearn.mixture import GaussianMixture
@@ -16,8 +17,6 @@
 from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender
 from baybe.searchspace import SearchSpaceType, SubspaceDiscrete
 
-_ScikitLearnModel = TypeVar("_ScikitLearnModel")
-
 
 @define
 class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC):
@@ -41,7 +40,7 @@ class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC):
     #   that checks if a custom mechanism is implemented and uses default otherwise
     #   (similar to what is done in the recommenders)
 
-    model_class: ClassVar[Type[_ScikitLearnModel]]
+    model_class: ClassVar[Type[ClusterMixin]]
     """Class variable describing the model class."""
 
     model_cluster_num_parameter_name: ClassVar[str]
@@ -57,8 +56,8 @@ class SKLearnClusteringRecommender(NonPredictiveRecommender, ABC):
 
     def _make_selection_default(
         self,
-        model: _ScikitLearnModel,
-        candidates_scaled: pd.DataFrame,
+        model: ClusterMixin,
+        candidates_scaled: Union[pd.DataFrame, np.ndarray],
     ) -> List[int]:
         """Select one candidate from each cluster uniformly at random.
 
@@ -80,8 +79,8 @@ def _make_selection_default(
 
     def _make_selection_custom(
         self,
-        model: _ScikitLearnModel,
-        candidates_scaled: pd.DataFrame,
+        model: ClusterMixin,
+        candidates_scaled: Union[pd.DataFrame, np.ndarray],
     ) -> List[int]:
         """Select candidates from the computed clustering.
 
@@ -136,7 +135,7 @@ def _recommend_discrete(
 class PAMClusteringRecommender(SKLearnClusteringRecommender):
     """Partitioning Around Medoids (PAM) clustering recommender."""
 
-    model_class: ClassVar[Type[_ScikitLearnModel]] = KMedoids
+    model_class: ClassVar[Type[ClusterMixin]] = KMedoids
     # See base class.
 
     model_cluster_num_parameter_name: ClassVar[str] = "n_clusters"
@@ -156,8 +155,8 @@ def _default_model_params(self) -> dict:
 
     def _make_selection_custom(
         self,
-        model: _ScikitLearnModel,
-        candidates_scaled: pd.DataFrame,
+        model: ClusterMixin,
+        candidates_scaled: Union[pd.DataFrame, np.ndarray],
     ) -> List[int]:
         """Select candidates from the computed clustering.
 
@@ -180,7 +179,7 @@ class KMeansClusteringRecommender(SKLearnClusteringRecommender):
     """K-means clustering recommender."""
 
     # Class variables
-    model_class: ClassVar[Type[_ScikitLearnModel]] = KMeans
+    model_class: ClassVar[Type[ClusterMixin]] = KMeans
     # See base class.
 
     model_cluster_num_parameter_name: ClassVar[str] = "n_clusters"
@@ -200,8 +199,8 @@ def _default_model_params(self) -> dict:
 
     def _make_selection_custom(
         self,
-        model: _ScikitLearnModel,
-        candidates_scaled: pd.DataFrame,
+        model: ClusterMixin,
+        candidates_scaled: Union[pd.DataFrame, np.ndarray],
     ) -> List[int]:
         """Select candidates from the computed clustering.
 
@@ -232,7 +231,7 @@ class GaussianMixtureClusteringRecommender(SKLearnClusteringRecommender):
     """Gaussian mixture model (GMM) clustering recommender."""
 
     # Class variables
-    model_class: ClassVar[Type[_ScikitLearnModel]] = GaussianMixture
+    model_class: ClassVar[Type[ClusterMixin]] = GaussianMixture
     # See base class.
 
     model_cluster_num_parameter_name: ClassVar[str] = "n_components"
@@ -240,8 +239,8 @@ class GaussianMixtureClusteringRecommender(SKLearnClusteringRecommender):
 
     def _make_selection_custom(
         self,
-        model: _ScikitLearnModel,
-        candidates_scaled: pd.DataFrame,
+        model: ClusterMixin,
+        candidates_scaled: Union[pd.DataFrame, np.ndarray],
     ) -> List[int]:
         """Select candidates from the computed clustering.
 
diff --git a/mypy.ini b/mypy.ini
index ef9147e251..a4cfd815a5 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -3,8 +3,7 @@ packages = baybe
 
 ; at some point, these excludes should all be gone ...
 exclude = (?x)(
-          baybe/recommenders
-          | baybe/searchspace
+          baybe/searchspace
           | baybe/serialization
           | baybe/strategies
           | baybe/surrogates
@@ -18,6 +17,12 @@ exclude = (?x)(
           | baybe/surrogate.py
           )
 
+[mypy-botorch.acquisition]
+ignore_missing_imports = True
+
+[mypy-botorch.optim]
+ignore_missing_imports = True
+
 [mypy-botorch.test_functions]
 ignore_missing_imports = True
 
@@ -27,9 +32,27 @@ ignore_missing_imports = True
 [mypy-scipy.spatial.distance]
 ignore_missing_imports = True
 
+[mypy-scipy.stats]
+ignore_missing_imports = True
+
+[mypy-sklearn.base]
+ignore_missing_imports = True
+
+[mypy-sklearn.cluster]
+ignore_missing_imports = True
+
 [mypy-sklearn.metrics]
 ignore_missing_imports = True
 
+[mypy-sklearn.mixture]
+ignore_missing_imports = True
+
+[mypy-sklearn.preprocessing]
+ignore_missing_imports = True
+
+[mypy-sklearn_extra.cluster]
+ignore_missing_imports = True
+
 [mypy-joblib]
 ignore_missing_imports = True