|
| 1 | +""" pyplots.ai |
| 2 | +lift-curve: Model Lift Chart |
| 3 | +Library: plotnine 0.15.2 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-27 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from plotnine import ( |
| 10 | + aes, |
| 11 | + element_text, |
| 12 | + geom_hline, |
| 13 | + geom_line, |
| 14 | + geom_point, |
| 15 | + ggplot, |
| 16 | + labs, |
| 17 | + scale_x_continuous, |
| 18 | + scale_y_continuous, |
| 19 | + theme, |
| 20 | + theme_minimal, |
| 21 | +) |
| 22 | + |
| 23 | + |
| 24 | +# Data - simulated customer response model scores |
| 25 | +np.random.seed(42) |
| 26 | +n_samples = 1000 |
| 27 | + |
| 28 | +# Generate realistic response probabilities |
| 29 | +# Assume a model that has learned some signal |
| 30 | +base_prob = 0.15 # 15% baseline response rate |
| 31 | +model_score = np.random.beta(2, 5, n_samples) # Model predictions |
| 32 | + |
| 33 | +# True responses correlated with model score (good model) |
| 34 | +response_prob = 0.05 + 0.6 * model_score # Higher score = higher response chance |
| 35 | +y_true = (np.random.random(n_samples) < response_prob).astype(int) |
| 36 | +y_score = model_score + np.random.normal(0, 0.05, n_samples) # Add noise |
| 37 | +y_score = np.clip(y_score, 0, 1) |
| 38 | + |
| 39 | +# Calculate lift curve data |
| 40 | +# Sort by predicted score descending |
| 41 | +sorted_indices = np.argsort(y_score)[::-1] |
| 42 | +y_true_sorted = y_true[sorted_indices] |
| 43 | + |
| 44 | +# Calculate cumulative lift |
| 45 | +n_total = len(y_true) |
| 46 | +n_positive = y_true.sum() |
| 47 | +baseline_rate = n_positive / n_total |
| 48 | + |
| 49 | +# Calculate cumulative values at each decile percentage |
| 50 | +percentiles = np.arange(1, 101) |
| 51 | +lift_values = [] |
| 52 | +pct_population = [] |
| 53 | + |
| 54 | +for pct in percentiles: |
| 55 | + n_targeted = int(np.ceil(n_total * pct / 100)) |
| 56 | + n_positive_captured = y_true_sorted[:n_targeted].sum() |
| 57 | + |
| 58 | + # Lift = (response rate in targeted group) / (baseline response rate) |
| 59 | + targeted_rate = n_positive_captured / n_targeted |
| 60 | + lift = targeted_rate / baseline_rate if baseline_rate > 0 else 0 |
| 61 | + |
| 62 | + lift_values.append(lift) |
| 63 | + pct_population.append(pct) |
| 64 | + |
| 65 | +# Create DataFrame for plotting |
| 66 | +df = pd.DataFrame({"pct_population": pct_population, "lift": lift_values}) |
| 67 | + |
| 68 | +# Add reference line data |
| 69 | +df_reference = pd.DataFrame({"pct_population": [0, 100], "lift": [1.0, 1.0]}) |
| 70 | + |
| 71 | +# Create key points for markers (at deciles) |
| 72 | +decile_points = df[df["pct_population"].isin([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])] |
| 73 | + |
| 74 | +# Plot |
| 75 | +plot = ( |
| 76 | + ggplot() |
| 77 | + + geom_hline(yintercept=1.0, linetype="dashed", color="#888888", size=1.2, alpha=0.7) |
| 78 | + + geom_line(data=df, mapping=aes(x="pct_population", y="lift"), color="#306998", size=2.5) |
| 79 | + + geom_point( |
| 80 | + data=decile_points, |
| 81 | + mapping=aes(x="pct_population", y="lift"), |
| 82 | + color="#306998", |
| 83 | + size=5, |
| 84 | + fill="#FFD43B", |
| 85 | + stroke=1.5, |
| 86 | + ) |
| 87 | + + scale_x_continuous(breaks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], limits=(0, 100)) |
| 88 | + + scale_y_continuous(breaks=[0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5], limits=(0, None)) |
| 89 | + + labs(title="lift-curve · plotnine · pyplots.ai", x="Population Targeted (%)", y="Cumulative Lift") |
| 90 | + + theme_minimal() |
| 91 | + + theme( |
| 92 | + figure_size=(16, 9), |
| 93 | + text=element_text(size=14), |
| 94 | + axis_title=element_text(size=20), |
| 95 | + axis_text=element_text(size=16), |
| 96 | + plot_title=element_text(size=24), |
| 97 | + panel_grid_minor=element_text(alpha=0), |
| 98 | + ) |
| 99 | +) |
| 100 | + |
| 101 | +# Save |
| 102 | +plot.save("plot.png", dpi=300, verbose=False) |
0 commit comments