|
67 | 67 | plot_residuals_vs_predicted, |
68 | 68 | plot_binned_residuals, |
69 | 69 | plot_horizon_forecast, |
| 70 | + collect_cv_predictions, |
70 | 71 | ) |
71 | 72 |
|
72 | 73 | # Ignore warnings from pkg_resources triggered by Python 3.13's multiprocessing. |
@@ -773,42 +774,6 @@ def build_targets(prediction_time, electricity, horizons): |
773 | 774 | # We further analyze our cross-validated model by collecting the predictions on each |
774 | 775 | # split. |
775 | 776 |
|
776 | | -# %% |
777 | | -def collect_cv_predictions( |
778 | | - pipelines, |
779 | | - cv_splitter, |
780 | | - predictions, |
781 | | - prediction_time, |
782 | | -): |
783 | | - index_generator = cv_splitter.split(prediction_time.skb.eval()) |
784 | | - |
785 | | - def splitter(X, y, index_generator): |
786 | | - """Workaround to transform a scikit-learn splitter into a function understood |
787 | | - by `skrub.train_test_split`.""" |
788 | | - train_idx, test_idx = next(index_generator) |
789 | | - return X[train_idx], X[test_idx], y[train_idx], y[test_idx] |
790 | | - |
791 | | - results = [] |
792 | | - |
793 | | - for (_, test_idx), pipeline in zip( |
794 | | - cv_splitter.split(prediction_time.skb.eval()), pipelines |
795 | | - ): |
796 | | - split = predictions.skb.train_test_split( |
797 | | - predictions.skb.get_data(), |
798 | | - splitter=splitter, |
799 | | - index_generator=index_generator, |
800 | | - ) |
801 | | - results.append( |
802 | | - pl.DataFrame( |
803 | | - { |
804 | | - "prediction_time": prediction_time.skb.eval()[test_idx], |
805 | | - "load_mw": split["y_test"], |
806 | | - "predicted_load_mw": pipeline.predict(split["test"]), |
807 | | - } |
808 | | - ) |
809 | | - ) |
810 | | - return results |
811 | | - |
812 | 777 |
|
813 | 778 | # %% |
814 | 779 | hgbr_cv_predictions = collect_cv_predictions( |
|
0 commit comments