Hyperactive/src/hyperactive/integrations/sktime/_forecasting.py at 707f3a2bfebbbeb7b551992ca292538ba843db56 · hyperactive-project/Hyperactive · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
# copyright: hyperactive developers, MIT License (see LICENSE file)

import numpy as np
from skbase.utils.dependencies import _check_soft_dependencies

if _check_soft_dependencies("sktime", severity="none"):
    from sktime.forecasting.base._delegate import _DelegatedForecaster
else:
    from skbase.base import BaseEstimator as _DelegatedForecaster

from hyperactive.experiment.integrations.sktime_forecasting import (
    SktimeForecastingExperiment,
)


class ForecastingOptCV(_DelegatedForecaster):
    """Tune an sktime forecaster via any optimizer in the hyperactive toolbox.

    ``ForecastingOptCV`` uses any available tuning engine from ``hyperactive``
    to tune a forecaster by backtesting.

    It passes backtesting results as scores to the tuning engine,
    which identifies the best hyperparameters.

    Any available tuning engine from hyperactive can be used, for example:

    * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``,
      this results in the same algorithm as ``ForecastingGridSearchCV``
    * hill climbing - ``from hyperactive.opt import HillClimbing``
    * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer``

    Configuration of the tuning engine is as per the respective documentation.

    Formally, ``ForecastingOptCV`` does the following:

    In ``fit``:

    * wraps the ``forecaster``, ``scoring``, and other parameters
      into a ``SktimeForecastingExperiment`` instance, which is passed to the optimizer
      ``optimizer`` as the ``experiment`` argument.
    * Optimal parameters are then obtained from ``optimizer.solve``, and set
      as ``best_params_`` and ``best_forecaster_`` attributes.
    *  If ``refit=True``, ``best_forecaster_`` is fitted to the entire ``y`` and ``X``.

    In ``predict`` and ``predict``-like methods, calls the respective method
    of the ``best_forecaster_`` if ``refit=True``.

    Parameters
    ----------
    forecaster : sktime forecaster, BaseForecaster instance or interface compatible
        The forecaster to tune, must implement the sktime forecaster interface.

    optimizer : hyperactive BaseOptimizer
        The optimizer to be used for hyperparameter search.

    cv : sktime BaseSplitter descendant
        determines split of ``y`` and possibly ``X`` into test and train folds
        y is always split according to ``cv``, see above
        if ``cv_X`` is not passed, ``X`` splits are subset to ``loc`` equal to ``y``
        if ``cv_X`` is passed, ``X`` is split according to ``cv_X``

    strategy : {"refit", "update", "no-update_params"}, optional, default="refit"
        defines the ingestion mode when the forecaster sees new data when window expands
        "refit" = forecaster is refitted to each training window
        "update" = forecaster is updated with training window data, in sequence provided
        "no-update_params" = fit to first training window, re-used without fit or update

    update_behaviour : str, optional, default = "full_refit"
        one of {"full_refit", "inner_only", "no_update"}
        behaviour of the forecaster when calling update
        "full_refit" = both tuning parameters and inner estimator refit on all data seen
        "inner_only" = tuning parameters are not re-tuned, inner estimator is updated
        "no_update" = neither tuning parameters nor inner estimator are updated

    scoring : sktime metric (BaseMetric), str, or callable, optional (default=None)
        scoring metric to use in tuning the forecaster

        * sktime metric objects (BaseMetric) descendants can be searched
        with the ``registry.all_estimators`` search utility,
        for instance via ``all_estimators("metric", as_dataframe=True)``

        * If callable, must have signature
        ``(y_true: 1D np.ndarray, y_pred: 1D np.ndarray) -> float``,
        assuming np.ndarrays being of the same length, and lower being better.
        Metrics in sktime.performance_metrics.forecasting are all of this form.

        * If str, uses registry.resolve_alias to resolve to one of the above.
          Valid strings are valid registry.craft specs, which include
          string repr-s of any BaseMetric object, e.g., "MeanSquaredError()";
          and keys of registry.ALIAS_DICT referring to metrics.

        * If None, defaults to MeanAbsolutePercentageError()

    refit : bool, optional (default=True)
        True = refit the forecaster with the best parameters on the entire data in fit
        False = no refitting takes place. The forecaster cannot be used to predict.
        This is to be used to tune the hyperparameters, and then use the estimator
        as a parameter estimator, e.g., via get_fitted_params or PluginParamsForecaster.

    error_score : "raise" or numeric, default=np.nan
        Value to assign to the score if an exception occurs in estimator fitting. If set
        to "raise", the exception is raised. If a numeric value is given,
        FitFailedWarning is raised.

    cv_X : sktime BaseSplitter descendant, optional
        determines split of ``X`` into test and train folds
        default is ``X`` being split to identical ``loc`` indices as ``y``
        if passed, must have same number of splits as ``cv``

    backend : string, by default "None".
        Parallelization backend to use for runs.
        Runs parallel evaluate if specified and ``strategy="refit"``.

        - "None": executes loop sequentially, simple list comprehension
        - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
        - "dask": uses ``dask``, requires ``dask`` package in environment
        - "dask_lazy": same as "dask",
          but changes the return to (lazy) ``dask.dataframe.DataFrame``.
        - "ray": uses ``ray``, requires ``ray`` package in environment

        Recommendation: Use "dask" or "loky" for parallel evaluate.
        "threading" is unlikely to see speed ups due to the GIL and the serialization
        backend (``cloudpickle``) for "dask" and "loky" is generally more robust
        than the standard ``pickle`` library used in "multiprocessing".

    backend_params : dict, optional
        additional parameters passed to the backend as config.
        Directly passed to ``utils.parallel.parallelize``.
        Valid keys depend on the value of ``backend``:

        - "None": no additional parameters, ``backend_params`` is ignored
        - "loky", "multiprocessing" and "threading": default ``joblib`` backends
          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
          with the exception of ``backend`` which is directly controlled by ``backend``.
          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
          will default to ``joblib`` defaults.
        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
          ``backend`` must be passed as a key of ``backend_params`` in this case.
          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
          will default to ``joblib`` defaults.
        - "dask": any valid keys for ``dask.compute`` can be passed,
          e.g., ``scheduler``

        - "ray": The following keys can be passed:

            - "ray_remote_args": dictionary of valid keys for ``ray.init``
            - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
                down after parallelization.
            - "logger_name": str, default="ray"; name of the logger to use.
            - "mute_warnings": bool, default=False; if True, suppresses warnings

    tune_by_instance : bool, optional (default=False)
        Whether to tune parameter by each time series instance separately,
        in case of Panel or Hierarchical data passed to the tuning estimator.
        Only applies if time series passed are Panel or Hierarchical.
        If True, clones of the forecaster will be fit to each instance separately,
        and are available in fields of the ``forecasters_`` attribute.
        Has the same effect as applying ForecastByLevel wrapper to self.
        If False, the same best parameter is selected for all instances.

    tune_by_variable : bool, optional (default=False)
        Whether to tune parameter by each time series variable separately,
        in case of multivariate data passed to the tuning estimator.
        Only applies if time series passed are strictly multivariate.
        If True, clones of the forecaster will be fit to each variable separately,
        and are available in fields of the ``forecasters_`` attribute.
        Has the same effect as applying ColumnEnsembleForecaster wrapper to self.
        If False, the same best parameter is selected for all variables.

    Example
    -------
    Any available tuning engine from hyperactive can be used, for example:

    * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``
    * hill climbing - ``from hyperactive.opt import HillClimbing``
    * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer``

    For illustration, we use grid search, this can be replaced by any other optimizer.

    1. defining the tuned estimator:
    >>> from sktime.forecasting.naive import NaiveForecaster
    >>> from sktime.split import ExpandingWindowSplitter
    >>> from hyperactive.integrations.sktime import ForecastingOptCV
    >>> from hyperactive.opt import GridSearchSk as GridSearch
    >>>
    >>> param_grid = {"strategy": ["mean", "last", "drift"]}
    >>> tuned_naive = ForecastingOptCV(
    ...     NaiveForecaster(),
    ...     GridSearch(param_grid),
    ...     cv=ExpandingWindowSplitter(
    ...         initial_window=12, step_length=3, fh=range(1, 13)
    ...     ),
    ... )

    2. fitting the tuned estimator:
    >>> from sktime.datasets import load_airline
    >>> from sktime.split import temporal_train_test_split
    >>> y = load_airline()
    >>> y_train, y_test = temporal_train_test_split(y, test_size=12)
    >>>
    >>> tuned_naive.fit(y_train, fh=range(1, 13))
    ForecastingOptCV(...)
    >>> y_pred = tuned_naive.predict()

    3. obtaining best parameters and best forecaster
    >>> best_params = tuned_naive.best_params_
    >>> best_forecaster = tuned_naive.best_forecaster_
    """

    _tags = {
        "authors": "fkiraly",
        "maintainers": "fkiraly",
        "python_dependencies": "sktime",
    }

    # attribute for _DelegatedForecaster, which then delegates
    #     all non-overridden methods are same as of getattr(self, _delegate_name)
    #     see further details in _DelegatedForecaster docstring
    _delegate_name = "best_forecaster_"

    def __init__(
        self,
        forecaster,
        optimizer,
        cv,
        strategy="refit",
        update_behaviour="full_refit",
        scoring=None,
        refit=True,
        error_score=np.nan,
        cv_X=None,
        backend=None,
        backend_params=None,
        tune_by_instance=False,
        tune_by_variable=False,
    ):
        self.forecaster = forecaster
        self.optimizer = optimizer
        self.cv = cv
        self.strategy = strategy
        self.update_behaviour = update_behaviour
        self.scoring = scoring
        self.refit = refit
        self.error_score = error_score
        self.cv_X = cv_X
        self.backend = backend
        self.backend_params = backend_params
        self.tune_by_instance = tune_by_instance
        self.tune_by_variable = tune_by_variable
        super().__init__()

    def _fit(self, y, X, fh):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored

        Returns
        -------
        self : returns an instance of self.
        """
        # Handle broadcasting options when requested and applicable
        if self.tune_by_instance or self.tune_by_variable:
            broadcasted = self._fit_with_broadcasting(y, X, fh)
            if broadcasted:
                return self

        return self._fit_single(y, X, fh)

    def _fit_single(self, y, X, fh):
        """Run the core fit logic without broadcasting shortcuts."""
        import time

        from sktime.utils.validation.forecasting import check_scoring

        forecaster = self.forecaster.clone()

        scoring = check_scoring(self.scoring, obj=self)
        self.scorer_ = scoring

        # Count number of CV splits
        self.n_splits_ = self.cv.get_n_splits(y)

        experiment = SktimeForecastingExperiment(
            forecaster=forecaster,
            scoring=scoring,
            cv=self.cv,
            X=X,
            y=y,
            strategy=self.strategy,
            error_score=self.error_score,
            cv_X=self.cv_X,
            backend=self.backend,
            backend_params=self.backend_params,
        )

        optimizer = self.optimizer.clone()
        optimizer.set_params(experiment=experiment)
        best_params = optimizer.solve()

        self.best_params_ = best_params
        self.best_forecaster_ = forecaster.set_params(**best_params)

        # Store cv_results from optimizer if available
        if hasattr(optimizer, "results"):
            self.cv_results_ = optimizer.results
        else:
            # Create a basic cv_results_ dict
            self.cv_results_ = {"best_params": best_params}

        # Store best_index_ and best_score_ if available from optimizer
        if hasattr(optimizer, "best_score"):
            self.best_score_ = optimizer.best_score
        else:
            # Calculate best score by evaluating best params
            best_score, _ = experiment.score(best_params)
            self.best_score_ = best_score

        self.best_index_ = 0  # For single best result

        # Refit model with best parameters and track time.
        if self.refit:
            start_time = time.time()
            self.best_forecaster_.fit(y=y, X=X, fh=fh)
            end_time = time.time()
            self.refit_time_ = end_time - start_time
        else:
            self.refit_time_ = 0.0

        return self

    def _predict(self, fh, X):
        """Forecast time series at future horizon.

        private _predict containing the core logic, called from predict

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_"
            self.cutoff

        Parameters
        ----------
        fh : guaranteed to be ForecastingHorizon or None, optional (default=None)
            The forecasting horizon with the steps ahead to to predict.
            If not passed in _fit, guaranteed to be passed here
        X : pd.DataFrame, optional (default=None)
            Exogenous time series

        Returns
        -------
        y_pred : pd.Series
            Point predictions
        """
        if not self.refit:
            raise RuntimeError(
                f"In {self.__class__.__name__}, refit must be True to make predictions,"
                f" but found refit=False. If refit=False, {self.__class__.__name__} can"
                " be used only to tune hyper-parameters, as a parameter estimator."
            )
        return super()._predict(fh=fh, X=X)

    def _fit_with_broadcasting(self, y, X, fh):
        """Fit with broadcasting options (tune_by_instance or tune_by_variable).

        Parameters
        ----------
        y : pd.Series or pd.DataFrame
            Target time series to which to fit the forecaster.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.

        Returns
        -------
        bool
            True if broadcasting was performed, False otherwise.
        """
        import pandas as pd
        from sktime.utils.validation.forecasting import check_scoring

        scoring = check_scoring(self.scoring, obj=self)
        self.scorer_ = scoring
        self.n_splits_ = self.cv.get_n_splits(y)

        # Determine if we need to broadcast
        is_panel = "MultiIndex" in str(type(getattr(y, "index", None)))
        is_multivariate = isinstance(y, pd.DataFrame) and len(y.columns) > 1

        forecasters_list = []
        refit_times = []
        broadcast_handled = False

        # Handle tune_by_instance for Panel/Hierarchical data
        if self.tune_by_instance and is_panel:
            broadcast_handled = True
            # Get unique instances
            if hasattr(y.index, "levels"):
                instances = y.index.get_level_values(0).unique()
            else:
                instances = [0]  # Single instance fallback

            for instance in instances:
                # Extract instance data
                if hasattr(y.index, "levels"):
                    y_instance = y.loc[instance]
                    X_instance = X.loc[instance] if X is not None else None
                else:
                    y_instance = y
                    X_instance = X

                # Fit for this instance
                tuner = type(self)(
                    forecaster=self.forecaster.clone(),
                    optimizer=self.optimizer.clone(),
                    cv=self.cv,
                    strategy=self.strategy,
                    update_behaviour=self.update_behaviour,
                    scoring=self.scoring,
                    refit=self.refit,
                    error_score=self.error_score,
                    cv_X=self.cv_X,
                    backend=self.backend,
                    backend_params=self.backend_params,
                    tune_by_instance=False,
                    tune_by_variable=self.tune_by_variable,
                )
                tuner.fit(y_instance, X=X_instance, fh=fh)

                forecasters_list.append(
                    {
                        "instance": instance,
                        "forecaster": tuner.best_forecaster_,
                        "best_params": tuner.best_params_,
                        "best_score": tuner.best_score_,
                        "refit_time": getattr(tuner, "refit_time_", 0.0),
                    }
                )
                refit_times.append(getattr(tuner, "refit_time_", 0.0))

            # Store as DataFrame
            self.forecasters_ = pd.DataFrame(forecasters_list)

        # Handle tune_by_variable for multivariate data
        elif self.tune_by_variable and is_multivariate:
            broadcast_handled = True
            variables = y.columns

            for variable in variables:
                # Extract variable data
                y_var = y[[variable]]
                X_var = X if X is not None else None

                # Fit for this variable
                tuner = type(self)(
                    forecaster=self.forecaster.clone(),
                    optimizer=self.optimizer.clone(),
                    cv=self.cv,
                    strategy=self.strategy,
                    update_behaviour=self.update_behaviour,
                    scoring=self.scoring,
                    refit=self.refit,
                    error_score=self.error_score,
                    cv_X=self.cv_X,
                    backend=self.backend,
                    backend_params=self.backend_params,
                    tune_by_instance=False,
                    tune_by_variable=False,
                )
                tuner.fit(y_var, X=X_var, fh=fh)

                forecasters_list.append(
                    {
                        "variable": variable,
                        "forecaster": tuner.best_forecaster_,
                        "best_params": tuner.best_params_,
                        "best_score": tuner.best_score_,
                        "refit_time": getattr(tuner, "refit_time_", 0.0),
                    }
                )
                refit_times.append(getattr(tuner, "refit_time_", 0.0))

            # Store as DataFrame
            self.forecasters_ = pd.DataFrame(forecasters_list)
        else:
            # If broadcasting was requested but not applicable, fall back to regular fit
            return False

        if not forecasters_list:
            raise RuntimeError(
                "Broadcasting was requested but no forecasters were fitted."
            )

        # Determine best forecaster based on available scores
        scores = [entry.get("best_score") for entry in forecasters_list]
        score_values = [np.inf if score is None else score for score in scores]
        best_index = int(np.argmin(score_values))
        best_entry = forecasters_list[best_index]

        self.best_forecaster_ = best_entry["forecaster"]
        self.best_params_ = best_entry["best_params"]
        self.best_score_ = best_entry.get("best_score")
        self.best_index_ = best_index

        self.cv_results_ = {"forecasters": self.forecasters_}

        # Aggregate refit times from each cloned tuner
        if self.refit:
            self.refit_time_ = float(np.sum(refit_times))
        else:
            self.refit_time_ = 0.0

        return broadcast_handled

    def _update(self, y, X=None, update_params=True):
        """Update time series to incremental training data.

        Parameters
        ----------
        y : guaranteed to be of a type in self.get_tag("y_inner_mtype")
            Time series with which to update the forecaster.
            if self.get_tag("scitype:y")=="univariate":
                guaranteed to have a single column/variable
            if self.get_tag("scitype:y")=="multivariate":
                guaranteed to have 2 or more columns
            if self.get_tag("scitype:y")=="both": no restrictions apply
        X : optional (default=None)
            guaranteed to be of a type in self.get_tag("X_inner_mtype")
            Exogeneous time series for the forecast
        update_params : bool, optional (default=True)
            whether model parameters should be updated

        Returns
        -------
        self : reference to self
        """
        update_behaviour = self.update_behaviour

        if update_behaviour == "full_refit":
            super()._update(y=y, X=X, update_params=update_params)
        elif update_behaviour == "inner_only":
            self.best_forecaster_.update(y=y, X=X, update_params=update_params)
        elif update_behaviour == "no_update":
            self.best_forecaster_.update(y=y, X=X, update_params=False)
        else:
            raise ValueError(
                'update_behaviour must be one of "full_refit", "inner_only",'
                f' or "no_update", but found {update_behaviour}'
            )
        return self

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the estimator.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the set of test parameters to return, for use in tests. If no
            special parameters are defined for a value, will return ``"default"`` set.

        Returns
        -------
        params : dict or list of dict
        """
        from sktime.forecasting.naive import NaiveForecaster
        from sktime.forecasting.trend import PolynomialTrendForecaster
        from sktime.performance_metrics.forecasting import (
            MeanAbsolutePercentageError,
            mean_absolute_percentage_error,
        )
        from sktime.split import SingleWindowSplitter

        from hyperactive.opt.gfo import HillClimbing
        from hyperactive.opt.gridsearch import GridSearchSk
        from hyperactive.opt.random_search import RandomSearchSk

        params_gridsearch = {
            "forecaster": NaiveForecaster(strategy="mean"),
            "cv": SingleWindowSplitter(fh=1),
            "optimizer": GridSearchSk(param_grid={"window_length": [2, 5]}),
            "scoring": MeanAbsolutePercentageError(symmetric=True),
        }
        params_randomsearch = {
            "forecaster": PolynomialTrendForecaster(),
            "cv": SingleWindowSplitter(fh=1),
            "optimizer": RandomSearchSk(param_distributions={"degree": [1, 2]}),
            "scoring": mean_absolute_percentage_error,
            "update_behaviour": "inner_only",
        }
        params_hillclimb = {
            "forecaster": NaiveForecaster(strategy="mean"),
            "cv": SingleWindowSplitter(fh=1),
            "optimizer": HillClimbing(
                search_space={"window_length": [2, 5]},
                n_iter=10,
                n_neighbours=5,
            ),
            "scoring": "MeanAbsolutePercentageError(symmetric=True)",
            "update_behaviour": "no_update",
        }
        return [params_gridsearch, params_randomsearch, params_hillclimb]