|
| 1 | +""" pyplots.ai |
| 2 | +bar-permutation-importance: Permutation Feature Importance Plot |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from lets_plot import ( |
| 10 | + LetsPlot, |
| 11 | + aes, |
| 12 | + coord_flip, |
| 13 | + element_line, |
| 14 | + element_text, |
| 15 | + geom_bar, |
| 16 | + geom_errorbar, |
| 17 | + geom_vline, |
| 18 | + ggplot, |
| 19 | + ggsave, |
| 20 | + ggsize, |
| 21 | + labs, |
| 22 | + scale_fill_gradient, |
| 23 | + scale_x_discrete, |
| 24 | + theme, |
| 25 | + theme_minimal, |
| 26 | +) |
| 27 | + |
| 28 | + |
| 29 | +LetsPlot.setup_html() |
| 30 | + |
| 31 | +# Data: Simulated permutation importance from a Random Forest model |
| 32 | +np.random.seed(42) |
| 33 | + |
| 34 | +features = [ |
| 35 | + "Income Level", |
| 36 | + "Credit Score", |
| 37 | + "Employment Years", |
| 38 | + "Debt Ratio", |
| 39 | + "Account Age", |
| 40 | + "Payment History", |
| 41 | + "Loan Amount", |
| 42 | + "Interest Rate", |
| 43 | + "Property Value", |
| 44 | + "Monthly Expenses", |
| 45 | + "Savings Balance", |
| 46 | + "Number of Accounts", |
| 47 | + "Recent Inquiries", |
| 48 | + "Education Level", |
| 49 | + "Region Code", |
| 50 | +] |
| 51 | + |
| 52 | +# Generate importance values - higher for more predictive features |
| 53 | +base_importance = np.array( |
| 54 | + [0.15, 0.12, 0.09, 0.08, 0.06, 0.05, 0.04, 0.03, 0.025, 0.02, 0.015, 0.01, 0.008, 0.005, 0.002] |
| 55 | +) |
| 56 | +# Add some noise |
| 57 | +importance_mean = base_importance + np.random.uniform(-0.005, 0.005, len(features)) |
| 58 | +importance_std = np.random.uniform(0.003, 0.02, len(features)) |
| 59 | + |
| 60 | +# Create DataFrame and sort by importance |
| 61 | +df = pd.DataFrame({"feature": features, "importance_mean": importance_mean, "importance_std": importance_std}) |
| 62 | +df = df.sort_values("importance_mean", ascending=True).reset_index(drop=True) |
| 63 | + |
| 64 | +# Create ordered categorical for proper y-axis ordering |
| 65 | +df["feature"] = pd.Categorical(df["feature"], categories=df["feature"].tolist(), ordered=True) |
| 66 | + |
| 67 | +# Calculate error bar positions |
| 68 | +df["ymin"] = df["importance_mean"] - df["importance_std"] |
| 69 | +df["ymax"] = df["importance_mean"] + df["importance_std"] |
| 70 | + |
| 71 | +# Create the plot with horizontal bars using geom_bar + coord_flip |
| 72 | +plot = ( |
| 73 | + ggplot(df, aes(x="feature", y="importance_mean", fill="importance_mean")) |
| 74 | + + geom_bar(stat="identity", width=0.7, alpha=0.9) |
| 75 | + + geom_errorbar(aes(ymin="ymin", ymax="ymax"), width=0.25, size=0.8, color="#333333") |
| 76 | + + geom_vline(xintercept=0, color="#888888", size=0.8, linetype="dashed") |
| 77 | + + coord_flip() |
| 78 | + + scale_fill_gradient(low="#FFD43B", high="#306998", guide="none") |
| 79 | + + scale_x_discrete() |
| 80 | + + labs(x="Feature", y="Mean Decrease in Model Score", title="bar-permutation-importance · letsplot · pyplots.ai") |
| 81 | + + theme_minimal() |
| 82 | + + theme( |
| 83 | + plot_title=element_text(size=24), |
| 84 | + axis_title=element_text(size=20), |
| 85 | + axis_text=element_text(size=16), |
| 86 | + axis_text_y=element_text(size=14), |
| 87 | + panel_grid_major_x=element_line(color="#CCCCCC", size=0.5), |
| 88 | + panel_grid_minor_x=element_line(color="#EEEEEE", size=0.3), |
| 89 | + ) |
| 90 | + + ggsize(1600, 900) |
| 91 | +) |
| 92 | + |
| 93 | +# Save as PNG (scale 3x for 4800x2700) - path="." saves to current directory |
| 94 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 95 | + |
| 96 | +# Save as HTML for interactive view |
| 97 | +ggsave(plot, "plot.html", path=".") |
0 commit comments