|
| 1 | +""" pyplots.ai |
| 2 | +bar-feature-importance: Feature Importance Bar Chart |
| 3 | +Library: seaborn 0.13.2 | Python 3.13.11 |
| 4 | +Quality: 93/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import matplotlib.pyplot as plt |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | +import seaborn as sns |
| 11 | + |
| 12 | + |
| 13 | +# Data: Simulated feature importances from a Random Forest model |
| 14 | +np.random.seed(42) |
| 15 | + |
| 16 | +features = [ |
| 17 | + "Annual Income", |
| 18 | + "Credit Score", |
| 19 | + "Employment Years", |
| 20 | + "Debt-to-Income Ratio", |
| 21 | + "Age", |
| 22 | + "Number of Accounts", |
| 23 | + "Loan Amount", |
| 24 | + "Payment History", |
| 25 | + "Credit Utilization", |
| 26 | + "Home Ownership", |
| 27 | + "Education Level", |
| 28 | + "Marital Status", |
| 29 | + "Monthly Expenses", |
| 30 | + "Savings Balance", |
| 31 | + "Previous Defaults", |
| 32 | +] |
| 33 | + |
| 34 | +# Generate realistic importance values (sum to 1.0 for interpretability) |
| 35 | +raw_importance = np.array([0.18, 0.15, 0.12, 0.11, 0.09, 0.08, 0.07, 0.06, 0.05, 0.03, 0.02, 0.015, 0.01, 0.008, 0.007]) |
| 36 | +importance = raw_importance / raw_importance.sum() |
| 37 | + |
| 38 | +# Standard deviation for error bars (ensemble variability) |
| 39 | +std = np.random.uniform(0.005, 0.025, len(features)) |
| 40 | + |
| 41 | +# Create DataFrame and sort by importance |
| 42 | +df = pd.DataFrame({"feature": features, "importance": importance, "std": std}) |
| 43 | +df = df.sort_values("importance", ascending=True).reset_index(drop=True) |
| 44 | + |
| 45 | +# Create figure |
| 46 | +fig, ax = plt.subplots(figsize=(16, 9)) |
| 47 | + |
| 48 | +# Create color palette based on importance values (sequential gradient) |
| 49 | +colors = sns.color_palette("Blues", n_colors=len(df)) |
| 50 | + |
| 51 | +# Plot horizontal bars using seaborn |
| 52 | +sns.barplot( |
| 53 | + data=df, |
| 54 | + x="importance", |
| 55 | + y="feature", |
| 56 | + hue="feature", |
| 57 | + palette=colors, |
| 58 | + legend=False, |
| 59 | + ax=ax, |
| 60 | + edgecolor="#306998", |
| 61 | + linewidth=1.5, |
| 62 | +) |
| 63 | + |
| 64 | +# Add error bars manually for ensemble variability |
| 65 | +ax.errorbar( |
| 66 | + df["importance"], range(len(df)), xerr=df["std"], fmt="none", color="#306998", capsize=4, capthick=2, linewidth=2 |
| 67 | +) |
| 68 | + |
| 69 | +# Add value annotations at the end of bars |
| 70 | +for i, (imp, std_val) in enumerate(zip(df["importance"], df["std"], strict=True)): |
| 71 | + ax.text( |
| 72 | + imp + std_val + 0.008, i, f"{imp:.3f}", va="center", ha="left", fontsize=14, color="#306998", fontweight="bold" |
| 73 | + ) |
| 74 | + |
| 75 | +# Styling |
| 76 | +ax.set_xlabel("Feature Importance", fontsize=20) |
| 77 | +ax.set_ylabel("Feature", fontsize=20) |
| 78 | +ax.set_title("bar-feature-importance · seaborn · pyplots.ai", fontsize=24, fontweight="bold", pad=20) |
| 79 | +ax.tick_params(axis="both", labelsize=16) |
| 80 | +ax.set_xlim(0, df["importance"].max() + df["std"].max() + 0.05) |
| 81 | + |
| 82 | +# Subtle grid on x-axis only |
| 83 | +ax.grid(True, axis="x", alpha=0.3, linestyle="--") |
| 84 | +ax.set_axisbelow(True) |
| 85 | + |
| 86 | +# Remove top and right spines for cleaner look |
| 87 | +ax.spines["top"].set_visible(False) |
| 88 | +ax.spines["right"].set_visible(False) |
| 89 | + |
| 90 | +plt.tight_layout() |
| 91 | +plt.savefig("plot.png", dpi=300, bbox_inches="tight") |
0 commit comments