|
1 | | -""" pyplots.ai |
| 1 | +"""pyplots.ai |
2 | 2 | line-retention-cohort: User Retention Curve by Cohort |
3 | 3 | Library: letsplot 4.9.0 | Python 3.14.3 |
4 | 4 | Quality: 85/100 | Created: 2026-03-16 |
|
36 | 36 |
|
37 | 37 | df = pd.DataFrame(rows) |
38 | 38 |
|
39 | | -# Colors: cohesive palette starting with Python Blue, older cohorts lighter |
40 | | -colors = ["#93B5C9", "#7EAAB8", "#5590A3", "#306998", "#1B4965"] |
| 39 | +# Endpoint labels: last data point per cohort, with nudge to avoid overlap |
| 40 | +endpoints = df[df["Week"] == 12].copy() |
| 41 | +endpoints["label"] = endpoints["Retention"].apply(lambda x: f"{x:.0f}%") |
| 42 | +# Adjust y positions to prevent label overlap (spread close values apart) |
| 43 | +sorted_ep = endpoints.sort_values("Retention").reset_index(drop=True) |
| 44 | +min_gap = 3.5 |
| 45 | +for i in range(1, len(sorted_ep)): |
| 46 | + if sorted_ep.loc[i, "Retention"] - sorted_ep.loc[i - 1, "Retention"] < min_gap: |
| 47 | + sorted_ep.loc[i, "Retention"] = sorted_ep.loc[i - 1, "Retention"] + min_gap |
| 48 | +endpoints = sorted_ep |
| 49 | + |
| 50 | +# Colorblind-friendly palette with distinct hues (oldest=lightest, newest=boldest) |
| 51 | +colors = ["#A6CEE3", "#B2DF8A", "#FDBF6F", "#E31A1C", "#306998"] |
| 52 | + |
| 53 | +# Line widths: older cohorts thinner, newer cohorts bolder |
| 54 | +line_widths = [1.5, 1.8, 2.0, 2.5, 3.0] |
| 55 | + |
| 56 | +# Build plot with per-cohort layers for varying line widths |
| 57 | +cohort_labels = df["Cohort"].unique().tolist() |
| 58 | + |
| 59 | +plot = ggplot() |
| 60 | + |
| 61 | +# Add lines and points per cohort with distinct widths |
| 62 | +for i, cohort_label in enumerate(cohort_labels): |
| 63 | + cdf = df[df["Cohort"] == cohort_label] |
| 64 | + plot = plot + geom_line( |
| 65 | + aes(x="Week", y="Retention", color="Cohort"), |
| 66 | + data=cdf, |
| 67 | + size=line_widths[i], |
| 68 | + alpha=0.9, |
| 69 | + tooltips=layer_tooltips().line("@Cohort").line("Week @Week").line("Retention @Retention{.1f}%"), |
| 70 | + ) |
41 | 71 |
|
42 | | -# Plot |
43 | 72 | plot = ( |
44 | | - ggplot(df, aes(x="Week", y="Retention", color="Cohort")) |
45 | | - + geom_line(size=2.5, alpha=0.9) |
46 | | - + geom_point(size=4, alpha=0.85) |
47 | | - + geom_hline(yintercept=20, linetype="dashed", color="#888888", size=1) |
| 73 | + plot |
| 74 | + + geom_point(aes(x="Week", y="Retention", color="Cohort"), data=df, size=4, alpha=0.85) |
| 75 | + + geom_hline(yintercept=20, linetype="dashed", color="#999999", size=0.8) |
| 76 | + + geom_text( |
| 77 | + aes(x="Week", y="Retention", label="label", color="Cohort"), data=endpoints, size=14, nudge_x=0.6, hjust=0 |
| 78 | + ) |
| 79 | + + geom_text( |
| 80 | + aes(x="x", y="y", label="label"), |
| 81 | + data=pd.DataFrame({"x": [0.2], "y": [20], "label": ["20% threshold"]}), |
| 82 | + size=12, |
| 83 | + color="#999999", |
| 84 | + hjust=0, |
| 85 | + vjust=-1.2, |
| 86 | + ) |
48 | 87 | + scale_color_manual(values=colors) |
49 | | - + scale_x_continuous(breaks=list(range(0, 13, 2))) |
| 88 | + + scale_x_continuous(breaks=list(range(0, 13, 2)), limits=[0, 14.5]) |
50 | 89 | + scale_y_continuous(breaks=list(range(0, 101, 20)), limits=[0, 105]) |
51 | 90 | + labs(title="line-retention-cohort · letsplot · pyplots.ai", x="Weeks Since Signup", y="Retained Users (%)") |
52 | 91 | + theme_minimal() |
53 | 92 | + theme( |
54 | | - plot_title=element_text(size=28, hjust=0.5), |
| 93 | + plot_title=element_text(size=28, hjust=0.5, face="bold"), |
55 | 94 | axis_title=element_text(size=22), |
56 | 95 | axis_text=element_text(size=18), |
57 | 96 | legend_title=element_blank(), |
58 | 97 | legend_text=element_text(size=16), |
59 | 98 | legend_position="right", |
60 | | - panel_grid_major=element_line(color="#E0E0E0", size=0.5), |
| 99 | + panel_grid_major=element_line(color="#EBEBEB", size=0.4), |
61 | 100 | panel_grid_minor=element_blank(), |
| 101 | + plot_background=element_rect(color="white", fill="white"), |
62 | 102 | ) |
63 | 103 | + ggsize(1600, 900) |
64 | 104 | ) |
|
0 commit comments