[ENH] Add LightGBM Experiment integration (#238)

kajal-jotwani · SimonBlanke · web-flow · commit e66e79505795 · 2026-04-24T06:42:38.000+02:00
## Description  Adds LightGBM support to Hyperactive by introducing a LightGBMExperiment class that wraps the existing SklearnCvExperiment class ## Related Issues  Related to #228 ## Type of Change  - [ ] `[BUG]` - Bug fix (non-breaking change fixing an issue) - [x] `[ENH]` - New feature (non-breaking change adding functionality) - [ ] `[DOC]` - Documentation changes - [ ] `[MNT]` - Maintenance ## How was this solved?  ## Checklist - [x] PR title includes appropriate tag: `[BUG]`, `[ENH]`, `[DOC]` or `[MNT]` - [x] Linked to related issue (if applicable) - [x] Code passes `make check` (lint, format, isort) - [x] Tests added/updated for changes (if applicable) - [x] Documentation updated (if applicable) ## Testing  ## Additional Notes  --------- Co-authored-by: Simon Blanke <simon.blanke@yahoo.com>
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -367,6 +367,10 @@ jobs:
           python -m pip install --no-cache-dir build
           make install-all-extras-for-test
 
+      - name: Remove lightgbm on macOS (native lib requires libomp, tests hang)
+        if: runner.os == 'macOS'
+        run: python -m pip uninstall -y lightgbm
+
       - name: Show dependencies
         run: python -m pip list
 
diff --git a/docs/source/_snippets/user_guide/integrations.py b/docs/source/_snippets/user_guide/integrations.py
@@ -226,6 +226,41 @@ def configure_optimizers(self):
 best_params = optimizer.solve()
 # [end:pytorch_lightning]
 
+# [start:lightgbm_experiment]
+from lightgbm import LGBMClassifier
+from sklearn.datasets import load_iris
+
+from hyperactive.experiment.integrations import LightGBMExperiment
+from hyperactive.opt.gfo import BayesianOptimizer
+
+# Load data
+X, y = load_iris(return_X_y=True)
+
+# Create the experiment
+experiment = LightGBMExperiment(
+    estimator=LGBMClassifier(verbosity=-1),
+    X=X,
+    y=y,
+    cv=3,
+)
+
+# Define search space
+search_space = {
+    "n_estimators": [50, 100, 200],
+    "max_depth": [3, 5, 7, -1],
+    "learning_rate": [0.01, 0.05, 0.1, 0.2],
+}
+
+# Optimize
+optimizer = BayesianOptimizer(
+    search_space=search_space,
+    n_iter=10,
+    experiment=experiment,
+)
+best_params = optimizer.solve()
+print(f"Best parameters: {best_params}")
+# [end:lightgbm_experiment]
+
 
 # --- Runnable test code below ---
 if __name__ == "__main__":
diff --git a/docs/source/api_reference/experiments_integrations.rst b/docs/source/api_reference/experiments_integrations.rst
@@ -7,7 +7,7 @@ The :mod:`hyperactive.experiment.integrations` module contains experiment classe
 for integration with machine learning frameworks.
 
 These experiments provide seamless hyperparameter optimization for scikit-learn,
-sktime, skpro, and PyTorch Lightning models.
+sktime, skpro, PyTorch Lightning, and LightGBM models.
 
 Scikit-Learn
 ------------
@@ -55,3 +55,14 @@ Experiments for PyTorch Lightning models.
     :template: class.rst
 
     TorchExperiment
+
+LightGBM
+--------
+
+Cross-validation experiments for LightGBM estimators.
+
+.. autosummary::
+    :toctree: auto_generated/
+    :template: class.rst
+
+    LightGBMExperiment
diff --git a/docs/source/user_guide/integrations.rst b/docs/source/user_guide/integrations.rst
@@ -7,7 +7,7 @@ Framework Integrations
 Hyperactive integrates with popular ML frameworks, providing drop-in replacements
 for tools like ``GridSearchCV``. Each ML framework has its own conventions for training and evaluation. The integration
 classes handle cross-validation setup, scoring metrics, and parameter translation, so
-you can use any optimizer with scikit-learn, sktime, skpro, or PyTorch models.
+you can use any optimizer with scikit-learn, sktime, skpro, PyTorch, or LightGBM models.
 
 ----
 
@@ -53,6 +53,15 @@ Supported Frameworks
 
       Deep learning models
 
+   .. grid-item-card:: LightGBM
+      :class-card: sd-border-info
+      :link: #lightgbm-integration
+      :link-type: url
+
+      **LightGBMExperiment**
+
+      Gradient boosting models
+
 ----
 
 Quick Reference
@@ -237,6 +246,34 @@ For deep learning hyperparameter optimization with PyTorch Lightning:
 
 ----
 
+LightGBM Integration
+--------------------
+
+For gradient boosting hyperparameter optimization with LightGBM:
+
+.. note::
+
+   Requires ``pip install lightgbm``
+
+.. grid:: 1
+   :gutter: 0
+
+   .. grid-item::
+      :class: sd-bg-light sd-pt-3 sd-pb-1 sd-ps-3 sd-pe-3 sd-rounded-3
+
+      **Key Features**
+
+      - Optimize LightGBM classifiers and regressors
+      - LightGBM follows the sklearn API, so cross-validation works out of the box
+      - Supports all LightGBM hyperparameters (``n_estimators``, ``max_depth``, ``learning_rate``, etc.)
+
+.. literalinclude:: ../_snippets/user_guide/integrations.py
+   :language: python
+   :start-after: # [start:lightgbm_experiment]
+   :end-before: # [end:lightgbm_experiment]
+
+----
+
 Tips
 ----
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -99,6 +99,7 @@ all_extras = [
   "optuna<5",
   "cmaes",  # Required for CmaEsOptimizer (optuna's CMA-ES sampler)
   "lightning",
+  "lightgbm",
 ]
 
 
diff --git a/src/hyperactive/base/tests/test_endtoend.py b/src/hyperactive/base/tests/test_endtoend.py
@@ -47,3 +47,51 @@ def test_endtoend_hillclimbing():
     assert isinstance(best_params, dict), "Best parameters should be a dictionary"
     assert "C" in best_params, "Best parameters should contain 'C'"
     assert "gamma" in best_params, "Best parameters should contain 'gamma'"
+
+
+def test_endtoend_lightgbm():
+    """Test end-to-end usage of HillClimbing optimizer with LightGBM experiment."""
+    from skbase.utils.dependencies import _check_soft_dependencies
+
+    if not _check_soft_dependencies("lightgbm", severity="none"):
+        return None
+
+    # 1. define the experiment
+    from lightgbm import LGBMClassifier
+    from sklearn.datasets import load_iris
+
+    from hyperactive.experiment.integrations import LightGBMExperiment
+
+    X, y = load_iris(return_X_y=True)
+
+    lgbm_exp = LightGBMExperiment(
+        estimator=LGBMClassifier(n_estimators=10, verbosity=-1),
+        X=X,
+        y=y,
+        cv=2,
+    )
+
+    # 2. set up the HillClimbing optimizer
+    import numpy as np
+
+    from hyperactive.opt import HillClimbing
+
+    hillclimbing_config = {
+        "search_space": {
+            "n_estimators": np.array([5, 10, 20]),
+            "max_depth": np.array([2, 3, 5]),
+        },
+        "n_iter": 10,
+    }
+    hill_climbing = HillClimbing(**hillclimbing_config, experiment=lgbm_exp)
+
+    # 3. run the HillClimbing optimizer
+    hill_climbing.solve()
+
+    best_params = hill_climbing.best_params_
+    assert best_params is not None, "Best parameters should not be None"
+    assert isinstance(best_params, dict), "Best parameters should be a dictionary"
+    assert (
+        "n_estimators" in best_params
+    ), "Best parameters should contain 'n_estimators'"
+    assert "max_depth" in best_params, "Best parameters should contain 'max_depth'"
diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py
@@ -1,6 +1,8 @@
 """Integrations with packages for tuning."""
+
 # copyright: hyperactive developers, MIT License (see LICENSE file)
 
+from hyperactive.experiment.integrations.lightgbm_experiment import LightGBMExperiment
 from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
 from hyperactive.experiment.integrations.skpro_probareg import (
     SkproProbaRegExperiment,
@@ -16,6 +18,7 @@
 )
 
 __all__ = [
+    "LightGBMExperiment",
     "SklearnCvExperiment",
     "SkproProbaRegExperiment",
     "SktimeClassificationExperiment",
diff --git a/src/hyperactive/experiment/integrations/lightgbm_experiment.py b/src/hyperactive/experiment/integrations/lightgbm_experiment.py
@@ -0,0 +1,98 @@
+"""Experiment adapter for LightGBM cross-validation experiments."""
+
+# copyright: hyperactive developers, MIT License (see LICENSE file)
+
+from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
+
+
+class LightGBMExperiment(SklearnCvExperiment):
+    """Experiment adapter for LightGBM cross-validation experiments.
+
+    Thin wrapper around ``SklearnCvExperiment`` for LightGBM estimators.
+    LightGBM's sklearn-compatible API (``LGBMClassifier``, ``LGBMRegressor``)
+    works without adaptation. This class exists for discoverability, explicit
+    soft-dependency tracking via the ``python_dependencies`` tag, and as an
+    extension point for future LightGBM-specific behavior.
+
+    Parameters
+    ----------
+    estimator : LGBMClassifier or LGBMRegressor
+        The LightGBM estimator to evaluate. Any sklearn-compatible estimator
+        is accepted, but LightGBM estimators are the intended use case.
+    X : array-like, shape (n_samples, n_features)
+        Input data.
+    y : array-like, shape (n_samples,)
+        Target values.
+    scoring : callable or str, default=None
+        Scoring function. Defaults follow ``SklearnCvExperiment`` conventions:
+        ``accuracy_score`` for classifiers, ``mean_squared_error`` for
+        regressors.
+    cv : int or cross-validation generator, default=KFold(n_splits=3, shuffle=True)
+        Cross-validation strategy.
+
+    Notes
+    -----
+    LightGBM prints training logs to stdout by default. Pass
+    ``verbosity=-1`` to the estimator constructor to suppress this output.
+
+    For all remaining parameter details see ``SklearnCvExperiment``.
+
+    Examples
+    --------
+    >>> from hyperactive.experiment.integrations import LightGBMExperiment
+    >>> from lightgbm import LGBMClassifier
+    >>> from sklearn.datasets import load_iris
+    >>> X, y = load_iris(return_X_y=True)
+    >>> exp = LightGBMExperiment(
+    ...     estimator=LGBMClassifier(verbosity=-1),
+    ...     X=X,
+    ...     y=y,
+    ... )
+    >>> params = {"n_estimators": 50, "max_depth": 3}
+    >>> score, metadata = exp.score(params)
+    """
+
+    _tags = {
+        "authors": ["kajal-jotwani"],
+        "python_dependencies": "lightgbm",
+    }
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator."""
+        from skbase.utils.dependencies import _check_soft_dependencies
+
+        if not _check_soft_dependencies("lightgbm", severity="none"):
+            return []
+
+        from lightgbm import LGBMClassifier, LGBMRegressor
+        from sklearn.datasets import load_diabetes, load_iris
+
+        X, y = load_iris(return_X_y=True)
+        params0 = {
+            "estimator": LGBMClassifier(n_estimators=10, verbosity=-1),
+            "X": X,
+            "y": y,
+            "cv": 2,
+        }
+
+        X, y = load_diabetes(return_X_y=True)
+        params1 = {
+            "estimator": LGBMRegressor(n_estimators=10, verbosity=-1),
+            "X": X,
+            "y": y,
+            "cv": 2,
+        }
+
+        return [params0, params1]
+
+    @classmethod
+    def _get_score_params(cls):
+        """Return parameter settings for score/evaluate tests."""
+        from skbase.utils.dependencies import _check_soft_dependencies
+
+        if not _check_soft_dependencies("lightgbm", severity="none"):
+            return []
+
+        score_params = {"n_estimators": 5, "max_depth": 2}
+        return [score_params, score_params]

Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,7 @@ all_extras = [`
`99`	`99`	`"optuna<5",`
`100`	`100`	`"cmaes", # Required for CmaEsOptimizer (optuna's CMA-ES sampler)`
`101`	`101`	`"lightning",`
	`102`	`+ "lightgbm",`
`102`	`103`	`]`
`103`	`104`
`104`	`105`