|
| 1 | +""" pyplots.ai |
| 2 | +bar-feature-importance: Feature Importance Bar Chart |
| 3 | +Library: altair 6.0.0 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import altair as alt |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | + |
| 12 | +# Data - Feature importances from a hypothetical RandomForest model |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +features = [ |
| 16 | + "customer_lifetime_value", |
| 17 | + "purchase_frequency", |
| 18 | + "avg_order_value", |
| 19 | + "days_since_last_purchase", |
| 20 | + "total_purchases", |
| 21 | + "account_age_months", |
| 22 | + "email_open_rate", |
| 23 | + "website_visits", |
| 24 | + "support_tickets", |
| 25 | + "referral_count", |
| 26 | + "cart_abandonment_rate", |
| 27 | + "discount_usage", |
| 28 | + "mobile_app_usage", |
| 29 | + "newsletter_subscribed", |
| 30 | + "social_media_engagement", |
| 31 | +] |
| 32 | + |
| 33 | +# Realistic importance scores (sum to ~1.0 for tree-based models) |
| 34 | +importances = np.array( |
| 35 | + [0.182, 0.156, 0.134, 0.098, 0.087, 0.072, 0.058, 0.051, 0.042, 0.038, 0.031, 0.022, 0.015, 0.009, 0.005] |
| 36 | +) |
| 37 | + |
| 38 | +# Standard deviations for ensemble variability |
| 39 | +stds = importances * np.random.uniform(0.15, 0.35, len(importances)) |
| 40 | + |
| 41 | +df = pd.DataFrame({"feature": features, "importance": importances, "std": stds}) |
| 42 | + |
| 43 | +# Sort by importance for display |
| 44 | +df = df.sort_values("importance", ascending=True).reset_index(drop=True) |
| 45 | + |
| 46 | +# Create base chart |
| 47 | +base = alt.Chart(df).encode( |
| 48 | + y=alt.Y("feature:N", sort=None, title="Feature", axis=alt.Axis(labelFontSize=16, titleFontSize=20, labelLimit=300)), |
| 49 | + x=alt.X("importance:Q", title="Importance Score", axis=alt.Axis(labelFontSize=16, titleFontSize=20)), |
| 50 | + tooltip=[ |
| 51 | + alt.Tooltip("feature:N", title="Feature"), |
| 52 | + alt.Tooltip("importance:Q", title="Importance", format=".3f"), |
| 53 | + alt.Tooltip("std:Q", title="Std Dev", format=".3f"), |
| 54 | + ], |
| 55 | +) |
| 56 | + |
| 57 | +# Bars with color gradient based on importance |
| 58 | +bars = base.mark_bar(size=30).encode(color=alt.Color("importance:Q", scale=alt.Scale(scheme="blues"), legend=None)) |
| 59 | + |
| 60 | +# Error bars |
| 61 | +error_bars = ( |
| 62 | + base.mark_errorbar(color="#333333", thickness=2) |
| 63 | + .encode(x=alt.X("x_min:Q", title=""), x2="x_max:Q") |
| 64 | + .transform_calculate(x_min="datum.importance - datum.std", x_max="datum.importance + datum.std") |
| 65 | +) |
| 66 | + |
| 67 | +# Text labels at end of bars |
| 68 | +text = ( |
| 69 | + base.mark_text(align="left", baseline="middle", dx=5, fontSize=14, fontWeight="bold") |
| 70 | + .encode(text=alt.Text("importance:Q", format=".3f"), x=alt.X("text_x:Q")) |
| 71 | + .transform_calculate(text_x="datum.importance + datum.std + 0.005") |
| 72 | +) |
| 73 | + |
| 74 | +# Combine layers |
| 75 | +chart = ( |
| 76 | + (bars + error_bars + text) |
| 77 | + .properties( |
| 78 | + width=1400, |
| 79 | + height=800, |
| 80 | + title=alt.Title("bar-feature-importance · altair · pyplots.ai", fontSize=28, anchor="start", offset=20), |
| 81 | + ) |
| 82 | + .configure_axis(labelFontSize=16, titleFontSize=20, gridColor="#e0e0e0", gridOpacity=0.3) |
| 83 | + .configure_view(strokeWidth=0) |
| 84 | +) |
| 85 | + |
| 86 | +# Save as PNG (4800 x 2700 at scale_factor=3) |
| 87 | +chart.save("plot.png", scale_factor=3.0) |
| 88 | + |
| 89 | +# Save interactive HTML version |
| 90 | +chart.save("plot.html") |
0 commit comments