@@ -565,7 +565,7 @@ def build_targets(prediction_time, electricity, horizons):
565565 )
566566)
567567
568- predictions = features_with_dropped_cols .skb .apply (
568+ hgbr_predictions = features_with_dropped_cols .skb .apply (
569569 HistGradientBoostingRegressor (
570570 random_state = 0 ,
571571 loss = skrub .choose_from (["squared_error" , "poisson" , "gamma" ], name = "loss" ),
@@ -578,16 +578,33 @@ def build_targets(prediction_time, electricity, horizons):
578578 ),
579579 y = target ,
580580)
581- predictions
581+ hgbr_predictions
582582
583583# %% [markdown]
584584#
585585# The `predictions` expression captures the whole expression graph that
586586# includes the feature engineering steps, the target variable, and the model
587587# training step.
588+ #
589+ # In particular, the input data keys for the full pipeline can be
590+ # inspected as follows:
588591
589592# %%
590- predictions .skb .get_data ().keys ()
593+ hgbr_predictions .skb .get_data ().keys ()
594+
595+ # %% [markdown]
596+ #
597+ # Furthermore, the hyper-parameters of the full pipeline can be retrieved as
598+ # follows:
599+
600+ # %%
601+ hgbr_pipeline = hgbr_predictions .skb .get_pipeline ()
602+ hgbr_pipeline .describe_params ()
603+
604+ # %% [markdown]
605+ #
606+ # When running this notebook locally, you can also interactively inspect all
607+ # the steps of the DAG using the following (once uncommented):
591608
592609# %%
593610# predictions.skb.full_report()
@@ -608,7 +625,7 @@ def build_targets(prediction_time, electricity, horizons):
608625 pl .concat (
609626 [
610627 targets .skb .eval (),
611- predictions .rename (
628+ hgbr_predictions .rename (
612629 {target_column_name : predicted_target_column_name }
613630 ).skb .eval (),
614631 ],
@@ -664,7 +681,7 @@ def build_targets(prediction_time, electricity, horizons):
664681from sklearn .metrics import d2_tweedie_score
665682
666683
667- cv_results = predictions .skb .cross_validate (
684+ cv_results = hgbr_predictions .skb .cross_validate (
668685 cv = ts_cv_5 ,
669686 scoring = {
670687 "r2" : get_scorer ("r2" ),
@@ -714,7 +731,7 @@ def splitter(X, y, index_generator):
714731
715732# %%
716733cv_predictions = collect_cv_predictions (
717- cv_results ["pipeline" ], ts_cv_5 , predictions , prediction_time
734+ cv_results ["pipeline" ], ts_cv_5 , hgbr_predictions , prediction_time
718735)
719736cv_predictions [0 ]
720737
@@ -1089,7 +1106,7 @@ def plot_residuals_by_month(cv_predictions):
10891106ts_cv_2 = TimeSeriesSplit (
10901107 n_splits = 2 , test_size = test_size , max_train_size = max_train_size , gap = 24
10911108)
1092- randomized_search = predictions .skb .get_randomized_search (
1109+ randomized_search = hgbr_predictions .skb .get_randomized_search (
10931110 cv = ts_cv_2 ,
10941111 scoring = "r2" ,
10951112 n_iter = 100 ,
@@ -1187,7 +1204,7 @@ def plot_residuals_by_month(cv_predictions):
11871204 cv = ts_cv_5 ,
11881205 scoring = {
11891206 "r2" : get_scorer ("r2" ),
1190- "mape" : mape_scorer ,
1207+ "mape" : make_scorer ( mean_absolute_percentage_error ) ,
11911208 },
11921209 n_jobs = - 1 ,
11931210 return_pipeline = True ,
0 commit comments