|
| 1 | +# ruff: noqa: F405 |
| 2 | +"""pyplots.ai |
| 3 | +lift-curve: Model Lift Chart |
| 4 | +Library: lets-plot | Python 3.13 |
| 5 | +Quality: pending | Created: 2025-12-27 |
| 6 | +""" |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | +from lets_plot import * # noqa: F403, F405 |
| 11 | + |
| 12 | + |
| 13 | +LetsPlot.setup_html() |
| 14 | + |
| 15 | +# Data: Simulated customer response model |
| 16 | +np.random.seed(42) |
| 17 | +n_samples = 1000 |
| 18 | + |
| 19 | +# Create realistic model scores with good discrimination |
| 20 | +# Higher scores for positive class, lower for negative |
| 21 | +y_true = np.concatenate([np.ones(200), np.zeros(800)]) # 20% response rate |
| 22 | +positive_scores = np.random.beta(5, 2, 200) # Skewed high |
| 23 | +negative_scores = np.random.beta(2, 5, 800) # Skewed low |
| 24 | +y_score = np.concatenate([positive_scores, negative_scores]) |
| 25 | + |
| 26 | +# Shuffle data |
| 27 | +shuffle_idx = np.random.permutation(n_samples) |
| 28 | +y_true = y_true[shuffle_idx] |
| 29 | +y_score = y_score[shuffle_idx] |
| 30 | + |
| 31 | +# Calculate lift curve |
| 32 | +sorted_idx = np.argsort(y_score)[::-1] # Sort by score descending |
| 33 | +y_true_sorted = y_true[sorted_idx] |
| 34 | + |
| 35 | +# Calculate cumulative metrics |
| 36 | +n_positive = np.sum(y_true) |
| 37 | +baseline_rate = n_positive / n_samples |
| 38 | +cumsum_positive = np.cumsum(y_true_sorted) |
| 39 | +population_pct = np.arange(1, n_samples + 1) / n_samples * 100 |
| 40 | +response_rate = cumsum_positive / np.arange(1, n_samples + 1) |
| 41 | +lift = response_rate / baseline_rate |
| 42 | + |
| 43 | +# Sample points for smoother curve (every 1%) |
| 44 | +sample_points = np.arange(10, n_samples + 1, 10) |
| 45 | +df = pd.DataFrame({"population_pct": population_pct[sample_points - 1], "lift": lift[sample_points - 1]}) |
| 46 | + |
| 47 | +# Add starting point |
| 48 | +df = pd.concat([pd.DataFrame({"population_pct": [0], "lift": [lift[0]]}), df], ignore_index=True) |
| 49 | + |
| 50 | +# Reference line data (horizontal at y=1) |
| 51 | +ref_df = pd.DataFrame({"population_pct": [0, 100], "lift": [1, 1]}) |
| 52 | + |
| 53 | +# Create plot |
| 54 | +plot = ( |
| 55 | + ggplot() |
| 56 | + + geom_line(aes(x="population_pct", y="lift"), data=ref_df, color="#888888", size=1.5, linetype="dashed") |
| 57 | + + geom_line(aes(x="population_pct", y="lift"), data=df, color="#306998", size=2.5) |
| 58 | + + geom_point( |
| 59 | + aes(x="population_pct", y="lift"), data=df[df["population_pct"] % 10 == 0], color="#306998", size=5, alpha=0.8 |
| 60 | + ) |
| 61 | + + labs(x="Population Targeted (%)", y="Cumulative Lift", title="lift-curve · letsplot · pyplots.ai") |
| 62 | + + scale_x_continuous(breaks=list(range(0, 101, 10))) |
| 63 | + + scale_y_continuous(breaks=[1, 2, 3, 4, 5, 6]) |
| 64 | + + theme_minimal() |
| 65 | + + theme( |
| 66 | + plot_title=element_text(size=24, face="bold"), |
| 67 | + axis_title=element_text(size=20), |
| 68 | + axis_text=element_text(size=16), |
| 69 | + panel_grid_major=element_line(color="#CCCCCC", size=0.5), |
| 70 | + panel_grid_minor=element_blank(), |
| 71 | + ) |
| 72 | + + ggsize(1600, 900) |
| 73 | +) |
| 74 | + |
| 75 | +# Add annotation for reference line |
| 76 | +plot = plot + geom_text( |
| 77 | + aes(x="x", y="y", label="label"), |
| 78 | + data=pd.DataFrame({"x": [85], "y": [1.15], "label": ["Random (Lift = 1)"]}), |
| 79 | + size=14, |
| 80 | + color="#666666", |
| 81 | +) |
| 82 | + |
| 83 | +# Save as PNG and HTML (path='.' to save in current directory) |
| 84 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 85 | +ggsave(plot, "plot.html", path=".") |
0 commit comments