|
| 1 | +""" pyplots.ai |
| 2 | +pdp-basic: Partial Dependence Plot |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from lets_plot import * |
| 10 | +from sklearn.datasets import make_regression |
| 11 | +from sklearn.ensemble import GradientBoostingRegressor |
| 12 | +from sklearn.inspection import partial_dependence |
| 13 | + |
| 14 | + |
| 15 | +LetsPlot.setup_html() |
| 16 | + |
| 17 | +# Train a model for partial dependence |
| 18 | +np.random.seed(42) |
| 19 | +X, y = make_regression(n_samples=500, n_features=5, noise=20, random_state=42) |
| 20 | +feature_names = ["Temperature", "Humidity", "Pressure", "WindSpeed", "Altitude"] |
| 21 | + |
| 22 | +model = GradientBoostingRegressor(n_estimators=100, max_depth=4, random_state=42) |
| 23 | +model.fit(X, y) |
| 24 | + |
| 25 | +# Compute partial dependence for Temperature (feature 0) |
| 26 | +feature_idx = 0 |
| 27 | +feature_name = feature_names[feature_idx] |
| 28 | +pdp_result = partial_dependence(model, X, features=[feature_idx], kind="both", grid_resolution=80) |
| 29 | + |
| 30 | +feature_values = pdp_result["grid_values"][0] |
| 31 | +avg_pd = pdp_result["average"][0] |
| 32 | + |
| 33 | +# Get individual conditional expectations (ICE) for uncertainty |
| 34 | +ice_lines = pdp_result["individual"][0] |
| 35 | +lower_bound = np.percentile(ice_lines, 10, axis=0) |
| 36 | +upper_bound = np.percentile(ice_lines, 90, axis=0) |
| 37 | + |
| 38 | +# Create DataFrame for plotting |
| 39 | +df_pdp = pd.DataFrame( |
| 40 | + {"feature_value": feature_values, "partial_dependence": avg_pd, "lower": lower_bound, "upper": upper_bound} |
| 41 | +) |
| 42 | + |
| 43 | +# Sample ICE lines for visualization (show a subset) |
| 44 | +n_ice_lines = 50 |
| 45 | +ice_indices = np.random.choice(ice_lines.shape[0], n_ice_lines, replace=False) |
| 46 | +ice_data = [] |
| 47 | +for i, idx in enumerate(ice_indices): |
| 48 | + for j, fv in enumerate(feature_values): |
| 49 | + ice_data.append({"feature_value": fv, "ice_value": ice_lines[idx, j], "line_id": i}) |
| 50 | +df_ice = pd.DataFrame(ice_data) |
| 51 | + |
| 52 | +# Get rug data (sample of training feature values for distribution) |
| 53 | +rug_sample = np.random.choice(X[:, feature_idx], size=100, replace=False) |
| 54 | +y_min = avg_pd.min() - (avg_pd.max() - avg_pd.min()) * 0.02 |
| 55 | +y_max = avg_pd.min() + (avg_pd.max() - avg_pd.min()) * 0.02 |
| 56 | +df_rug = pd.DataFrame( |
| 57 | + {"x": rug_sample, "y_start": np.full(len(rug_sample), y_min), "y_end": np.full(len(rug_sample), y_max)} |
| 58 | +) |
| 59 | + |
| 60 | +# Create the partial dependence plot |
| 61 | +plot = ( |
| 62 | + ggplot() |
| 63 | + # Confidence band (80% interval from ICE lines) |
| 64 | + + geom_ribbon(aes(x="feature_value", ymin="lower", ymax="upper"), data=df_pdp, fill="#306998", alpha=0.2) |
| 65 | + # ICE lines (individual conditional expectations) |
| 66 | + + geom_line( |
| 67 | + aes(x="feature_value", y="ice_value", group="line_id"), data=df_ice, color="#306998", alpha=0.15, size=0.5 |
| 68 | + ) |
| 69 | + # Main PDP line |
| 70 | + + geom_line(aes(x="feature_value", y="partial_dependence"), data=df_pdp, color="#FFD43B", size=2.5) |
| 71 | + # Rug plot showing data distribution (vertical segments at bottom) |
| 72 | + + geom_segment(aes(x="x", y="y_start", xend="x", yend="y_end"), data=df_rug, color="#306998", alpha=0.4, size=0.8) |
| 73 | + # Labels and title |
| 74 | + + labs( |
| 75 | + x=f"{feature_name} (standardized)", |
| 76 | + y="Partial Dependence (predicted outcome)", |
| 77 | + title="pdp-basic · letsplot · pyplots.ai", |
| 78 | + ) |
| 79 | + # Theme for readability |
| 80 | + + theme_minimal() |
| 81 | + + theme( |
| 82 | + plot_title=element_text(size=24, face="bold"), |
| 83 | + axis_title=element_text(size=20), |
| 84 | + axis_text=element_text(size=16), |
| 85 | + panel_grid_major=element_line(color="#CCCCCC", size=0.3), |
| 86 | + panel_grid_minor=element_blank(), |
| 87 | + ) |
| 88 | + + ggsize(1600, 900) |
| 89 | +) |
| 90 | + |
| 91 | +# Save as PNG and HTML |
| 92 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 93 | +ggsave(plot, "plot.html", path=".") |
0 commit comments