Skip to content

Commit 1cc01a9

Browse files
committed
More specific variable name for the first model predictions
1 parent 662a8b7 commit 1cc01a9

1 file changed

Lines changed: 25 additions & 8 deletions

File tree

content/python_files/feature_engineering.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def build_targets(prediction_time, electricity, horizons):
565565
)
566566
)
567567

568-
predictions = features_with_dropped_cols.skb.apply(
568+
hgbr_predictions = features_with_dropped_cols.skb.apply(
569569
HistGradientBoostingRegressor(
570570
random_state=0,
571571
loss=skrub.choose_from(["squared_error", "poisson", "gamma"], name="loss"),
@@ -578,16 +578,33 @@ def build_targets(prediction_time, electricity, horizons):
578578
),
579579
y=target,
580580
)
581-
predictions
581+
hgbr_predictions
582582

583583
# %% [markdown]
584584
#
585585
# The `predictions` expression captures the whole expression graph that
586586
# includes the feature engineering steps, the target variable, and the model
587587
# training step.
588+
#
589+
# In particular, the input data keys for the full pipeline can be
590+
# inspected as follows:
588591

589592
# %%
590-
predictions.skb.get_data().keys()
593+
hgbr_predictions.skb.get_data().keys()
594+
595+
# %% [markdown]
596+
#
597+
# Furthermore, the hyper-parameters of the full pipeline can be retrieved as
598+
# follows:
599+
600+
# %%
601+
hgbr_pipeline = hgbr_predictions.skb.get_pipeline()
602+
hgbr_pipeline.describe_params()
603+
604+
# %% [markdown]
605+
#
606+
# When running this notebook locally, you can also interactively inspect all
607+
# the steps of the DAG using the following (once uncommented):
591608

592609
# %%
593610
# predictions.skb.full_report()
@@ -608,7 +625,7 @@ def build_targets(prediction_time, electricity, horizons):
608625
pl.concat(
609626
[
610627
targets.skb.eval(),
611-
predictions.rename(
628+
hgbr_predictions.rename(
612629
{target_column_name: predicted_target_column_name}
613630
).skb.eval(),
614631
],
@@ -664,7 +681,7 @@ def build_targets(prediction_time, electricity, horizons):
664681
from sklearn.metrics import d2_tweedie_score
665682

666683

667-
cv_results = predictions.skb.cross_validate(
684+
cv_results = hgbr_predictions.skb.cross_validate(
668685
cv=ts_cv_5,
669686
scoring={
670687
"r2": get_scorer("r2"),
@@ -714,7 +731,7 @@ def splitter(X, y, index_generator):
714731

715732
# %%
716733
cv_predictions = collect_cv_predictions(
717-
cv_results["pipeline"], ts_cv_5, predictions, prediction_time
734+
cv_results["pipeline"], ts_cv_5, hgbr_predictions, prediction_time
718735
)
719736
cv_predictions[0]
720737

@@ -1089,7 +1106,7 @@ def plot_residuals_by_month(cv_predictions):
10891106
ts_cv_2 = TimeSeriesSplit(
10901107
n_splits=2, test_size=test_size, max_train_size=max_train_size, gap=24
10911108
)
1092-
randomized_search = predictions.skb.get_randomized_search(
1109+
randomized_search = hgbr_predictions.skb.get_randomized_search(
10931110
cv=ts_cv_2,
10941111
scoring="r2",
10951112
n_iter=100,
@@ -1187,7 +1204,7 @@ def plot_residuals_by_month(cv_predictions):
11871204
cv=ts_cv_5,
11881205
scoring={
11891206
"r2": get_scorer("r2"),
1190-
"mape": mape_scorer,
1207+
"mape": make_scorer(mean_absolute_percentage_error),
11911208
},
11921209
n_jobs=-1,
11931210
return_pipeline=True,

0 commit comments

Comments
 (0)