|
| 1 | +"""pyplots.ai |
| 2 | +line-retention-cohort: User Retention Curve by Cohort |
| 3 | +Library: plotnine | Python 3.13 |
| 4 | +Quality: pending | Created: 2026-03-16 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from plotnine import ( |
| 10 | + aes, |
| 11 | + annotate, |
| 12 | + element_blank, |
| 13 | + element_line, |
| 14 | + element_text, |
| 15 | + geom_hline, |
| 16 | + geom_line, |
| 17 | + geom_point, |
| 18 | + ggplot, |
| 19 | + labs, |
| 20 | + scale_alpha_identity, |
| 21 | + scale_color_manual, |
| 22 | + scale_x_continuous, |
| 23 | + scale_y_continuous, |
| 24 | + theme, |
| 25 | + theme_minimal, |
| 26 | +) |
| 27 | + |
| 28 | + |
| 29 | +# Data |
| 30 | +np.random.seed(42) |
| 31 | + |
| 32 | +cohorts = { |
| 33 | + "Jan 2025": {"size": 1245, "decay": 0.18}, |
| 34 | + "Feb 2025": {"size": 1102, "decay": 0.16}, |
| 35 | + "Mar 2025": {"size": 1380, "decay": 0.14}, |
| 36 | + "Apr 2025": {"size": 1290, "decay": 0.12}, |
| 37 | + "May 2025": {"size": 1455, "decay": 0.10}, |
| 38 | +} |
| 39 | + |
| 40 | +weeks = np.arange(0, 13) |
| 41 | +rows = [] |
| 42 | + |
| 43 | +for cohort_name, info in cohorts.items(): |
| 44 | + retention = 100 * np.exp(-info["decay"] * weeks) |
| 45 | + noise = np.concatenate(([0], np.cumsum(np.random.normal(0, 0.8, len(weeks) - 1)))) |
| 46 | + retention = np.clip(retention + noise, 0, 100) |
| 47 | + retention[0] = 100.0 |
| 48 | + label = f"{cohort_name} (n={info['size']:,})" |
| 49 | + for w, r in zip(weeks, retention, strict=True): |
| 50 | + rows.append({"week": w, "retention": r, "cohort": label}) |
| 51 | + |
| 52 | +df = pd.DataFrame(rows) |
| 53 | + |
| 54 | +cohort_labels = list(df["cohort"].unique()) |
| 55 | +df["cohort"] = pd.Categorical(df["cohort"], categories=cohort_labels, ordered=True) |
| 56 | + |
| 57 | +alpha_map = dict(zip(cohort_labels, [0.45, 0.55, 0.65, 0.80, 1.0], strict=True)) |
| 58 | +df["line_alpha"] = df["cohort"].map(alpha_map).astype(float) |
| 59 | + |
| 60 | +# Colors |
| 61 | +colors = ["#8FADC2", "#7A9AB5", "#306998", "#E8783A", "#D4522A"] |
| 62 | + |
| 63 | +# Plot |
| 64 | +plot = ( |
| 65 | + ggplot(df, aes(x="week", y="retention", color="cohort", group="cohort")) |
| 66 | + + geom_hline(yintercept=20, linetype="dashed", color="#AAAAAA", size=0.6) |
| 67 | + + geom_line(aes(alpha="line_alpha"), size=1.5) |
| 68 | + + scale_alpha_identity() |
| 69 | + + geom_point(aes(alpha="line_alpha"), size=2.5) |
| 70 | + + scale_color_manual(values=colors) |
| 71 | + + scale_x_continuous(breaks=range(0, 13), labels=[str(w) for w in range(0, 13)]) |
| 72 | + + scale_y_continuous( |
| 73 | + limits=(0, 105), breaks=[0, 20, 40, 60, 80, 100], labels=["0%", "20%", "40%", "60%", "80%", "100%"] |
| 74 | + ) |
| 75 | + + annotate("text", x=12.3, y=22.5, label="20% threshold", size=9, color="#999999", ha="right") |
| 76 | + + labs( |
| 77 | + x="Weeks Since Signup", |
| 78 | + y="Retained Users", |
| 79 | + color="Cohort", |
| 80 | + title="line-retention-cohort · plotnine · pyplots.ai", |
| 81 | + ) |
| 82 | + + theme_minimal() |
| 83 | + + theme( |
| 84 | + figure_size=(16, 9), |
| 85 | + text=element_text(size=14), |
| 86 | + plot_title=element_text(size=24, weight="bold"), |
| 87 | + axis_title=element_text(size=20), |
| 88 | + axis_text=element_text(size=16), |
| 89 | + legend_title=element_text(size=18), |
| 90 | + legend_text=element_text(size=14), |
| 91 | + legend_position="right", |
| 92 | + panel_grid_major_x=element_blank(), |
| 93 | + panel_grid_minor=element_blank(), |
| 94 | + panel_grid_major_y=element_line(color="#E0E0E0", size=0.5, alpha=0.5), |
| 95 | + axis_line_x=element_line(color="#333333", size=0.5), |
| 96 | + axis_line_y=element_line(color="#333333", size=0.5), |
| 97 | + ) |
| 98 | +) |
| 99 | + |
| 100 | +# Save |
| 101 | +plot.save("plot.png", dpi=300, verbose=False) |
0 commit comments