|
| 1 | +""" pyplots.ai |
| 2 | +pdp-basic: Partial Dependence Plot |
| 3 | +Library: plotnine 0.15.2 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from plotnine import ( |
| 10 | + aes, |
| 11 | + element_blank, |
| 12 | + element_line, |
| 13 | + element_text, |
| 14 | + geom_line, |
| 15 | + geom_ribbon, |
| 16 | + geom_segment, |
| 17 | + ggplot, |
| 18 | + labs, |
| 19 | + theme, |
| 20 | + theme_minimal, |
| 21 | +) |
| 22 | +from sklearn.datasets import make_regression |
| 23 | +from sklearn.ensemble import GradientBoostingRegressor |
| 24 | +from sklearn.inspection import partial_dependence |
| 25 | + |
| 26 | + |
| 27 | +# Data - Train a model and compute partial dependence |
| 28 | +np.random.seed(42) |
| 29 | + |
| 30 | +# Generate synthetic data for a regression problem |
| 31 | +X, y = make_regression(n_samples=500, n_features=5, noise=10, random_state=42) |
| 32 | +feature_names = ["Temperature", "Humidity", "Pressure", "WindSpeed", "Elevation"] |
| 33 | + |
| 34 | +# Train a gradient boosting model |
| 35 | +model = GradientBoostingRegressor(n_estimators=100, max_depth=3, random_state=42) |
| 36 | +model.fit(X, y) |
| 37 | + |
| 38 | +# Compute partial dependence for the first feature (Temperature) |
| 39 | +feature_idx = 0 |
| 40 | + |
| 41 | +# Get partial dependence values |
| 42 | +pd_results = partial_dependence(model, X, features=[feature_idx], kind="average", grid_resolution=80) |
| 43 | +pd_values = pd_results["average"][0] |
| 44 | +grid_actual = pd_results["grid_values"][0] |
| 45 | + |
| 46 | +# Compute ICE curves for confidence interval estimation |
| 47 | +pd_individual = partial_dependence(model, X, features=[feature_idx], kind="individual", grid_resolution=80) |
| 48 | +ice_values = pd_individual["individual"][0] |
| 49 | + |
| 50 | +# Calculate confidence interval (mean ± 1.96 * std for 95% CI) |
| 51 | +pd_mean = ice_values.mean(axis=0) |
| 52 | +pd_std = ice_values.std(axis=0) |
| 53 | +ci_lower = pd_mean - 1.96 * pd_std |
| 54 | +ci_upper = pd_mean + 1.96 * pd_std |
| 55 | + |
| 56 | +# Create DataFrame for plotting |
| 57 | +df = pd.DataFrame( |
| 58 | + {"feature_value": grid_actual, "partial_dependence": pd_mean, "ci_lower": ci_lower, "ci_upper": ci_upper} |
| 59 | +) |
| 60 | + |
| 61 | +# Rug data - sample of training data for feature distribution at bottom of plot |
| 62 | +y_min = ci_lower.min() |
| 63 | +rug_height = (ci_upper.max() - ci_lower.min()) * 0.02 |
| 64 | +rug_sample = pd.DataFrame({"x": X[:80, feature_idx], "yend": y_min, "y": y_min - rug_height}) |
| 65 | + |
| 66 | +# Plot |
| 67 | +plot = ( |
| 68 | + ggplot(df, aes(x="feature_value", y="partial_dependence")) |
| 69 | + + geom_ribbon(aes(ymin="ci_lower", ymax="ci_upper"), fill="#306998", alpha=0.25) |
| 70 | + + geom_line(color="#306998", size=2) |
| 71 | + + geom_segment( |
| 72 | + data=rug_sample, mapping=aes(x="x", xend="x", y="y", yend="yend"), color="#FFD43B", alpha=0.7, size=0.8 |
| 73 | + ) |
| 74 | + + labs( |
| 75 | + title="pdp-basic · plotnine · pyplots.ai", |
| 76 | + x="Temperature (standardized)", |
| 77 | + y="Partial Dependence (avg. prediction)", |
| 78 | + ) |
| 79 | + + theme_minimal() |
| 80 | + + theme( |
| 81 | + figure_size=(16, 9), |
| 82 | + plot_title=element_text(size=24, weight="bold", ha="left"), |
| 83 | + axis_title=element_text(size=20), |
| 84 | + axis_text=element_text(size=16), |
| 85 | + panel_grid_major=element_line(color="#cccccc", alpha=0.3), |
| 86 | + panel_grid_minor=element_blank(), |
| 87 | + ) |
| 88 | +) |
| 89 | + |
| 90 | +# Save |
| 91 | +plot.save("plot.png", dpi=300, verbose=False) |
0 commit comments