Skip to content

Commit aa375da

Browse files
committed
random forest exercise code
1 parent df77e1b commit aa375da

1 file changed

Lines changed: 73 additions & 0 deletions

File tree

content/python_files/feature_engineering.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,3 +1287,76 @@ def scoring(regressor, X, y):
12871287
display(chart)
12881288

12891289
# %%
1290+
# TODO: Exercise using RandomForestRegressor
1291+
from sklearn.ensemble import RandomForestRegressor
1292+
1293+
multioutput_predictions_rf = features_with_dropped_cols.skb.apply(
1294+
RandomForestRegressor(max_leaf_nodes=30, random_state=0, n_jobs=-1),
1295+
y=targets.skb.drop(cols=["prediction_time", "load_mw"]).skb.mark_as_y(),
1296+
).skb.set_name("random_forest")
1297+
1298+
# %%
1299+
named_predictions_rf = multioutput_predictions_rf.rename(
1300+
{k: v for k, v in zip(target_column_names, predicted_target_column_names)}
1301+
)
1302+
1303+
# %%
1304+
plot_at_time = datetime.datetime(2025, 5, 24, 0, 0, tzinfo=datetime.timezone.utc)
1305+
historical_timedelta = datetime.timedelta(hours=24 * 5)
1306+
plot_horizon_forecast(targets, named_predictions_rf, plot_at_time, historical_timedelta)
1307+
1308+
# %%
1309+
plot_at_time = datetime.datetime(2025, 5, 25, 0, 0, tzinfo=datetime.timezone.utc)
1310+
plot_horizon_forecast(targets, named_predictions_rf, plot_at_time, historical_timedelta)
1311+
1312+
# %%
1313+
multioutput_cv_results_rf = multioutput_predictions_rf.skb.cross_validate(
1314+
cv=ts_cv_5,
1315+
scoring=scoring,
1316+
return_train_score=True,
1317+
verbose=1,
1318+
n_jobs=-1,
1319+
)
1320+
1321+
# %%
1322+
multioutput_cv_results_rf.round(3)
1323+
1324+
# %%
1325+
import itertools
1326+
from IPython.display import display
1327+
1328+
for metric_name, dataset_type in itertools.product(["mape", "r2"], ["train", "test"]):
1329+
columns = multioutput_cv_results_rf.columns[
1330+
multioutput_cv_results.columns.str.startswith(f"{dataset_type}_{metric_name}")
1331+
]
1332+
data_to_plot = multioutput_cv_results_rf[columns]
1333+
data_to_plot.columns = [
1334+
col.replace(f"{dataset_type}_", "")
1335+
.replace(f"{metric_name}_", "")
1336+
.replace("_", " ")
1337+
for col in columns
1338+
]
1339+
1340+
data_long = data_to_plot.melt(var_name="horizon", value_name="score")
1341+
chart = (
1342+
altair.Chart(
1343+
data_long,
1344+
title=f"{dataset_type.title()} {metric_name.upper()} Scores by Horizon",
1345+
)
1346+
.mark_boxplot(extent="min-max")
1347+
.encode(
1348+
x=altair.X(
1349+
"horizon:N",
1350+
title="Horizon",
1351+
sort=altair.Sort(
1352+
[f"horizon {h}h" for h in range(1, data_to_plot.shape[1])]
1353+
),
1354+
),
1355+
y=altair.Y("score:Q", title=f"{metric_name.upper()} Score"),
1356+
color=altair.Color("horizon:N", legend=None),
1357+
)
1358+
)
1359+
1360+
display(chart)
1361+
1362+
# %%

0 commit comments

Comments
 (0)