|
1 | | -""" pyplots.ai |
| 1 | +"""pyplots.ai |
2 | 2 | line-retention-cohort: User Retention Curve by Cohort |
3 | 3 | Library: altair 6.0.0 | Python 3.14.3 |
4 | 4 | Quality: 84/100 | Created: 2026-03-16 |
|
13 | 13 | np.random.seed(42) |
14 | 14 |
|
15 | 15 | cohorts = { |
16 | | - "Jan 2025": {"size": 1245, "half_life": 3.5}, |
17 | | - "Feb 2025": {"size": 1102, "half_life": 4.0}, |
18 | | - "Mar 2025": {"size": 1380, "half_life": 4.8}, |
19 | | - "Apr 2025": {"size": 1510, "half_life": 5.5}, |
20 | | - "May 2025": {"size": 1423, "half_life": 6.2}, |
| 16 | + "Jan 2025": {"size": 1245, "half_life": 3.5, "order": 0}, |
| 17 | + "Feb 2025": {"size": 1102, "half_life": 4.0, "order": 1}, |
| 18 | + "Mar 2025": {"size": 1380, "half_life": 4.8, "order": 2}, |
| 19 | + "Apr 2025": {"size": 1510, "half_life": 5.5, "order": 3}, |
| 20 | + "May 2025": {"size": 1423, "half_life": 6.2, "order": 4}, |
21 | 21 | } |
22 | 22 |
|
23 | 23 | weeks = np.arange(0, 13) |
|
29 | 29 | retention[0] = 100.0 |
30 | 30 | legend_label = f"{cohort_label} (n={info['size']:,})" |
31 | 31 | for w, r in zip(weeks, retention, strict=True): |
32 | | - rows.append({"Week": w, "Retention (%)": round(r, 1), "Cohort": legend_label}) |
| 32 | + rows.append({"Week": w, "Retention (%)": round(r, 1), "Cohort": legend_label, "order": info["order"]}) |
33 | 33 |
|
34 | 34 | df = pd.DataFrame(rows) |
35 | 35 |
|
36 | | -# Colors - Python Blue first, cohesive palette for 5 cohorts |
37 | | -colors = ["#306998", "#E15759", "#59A14F", "#EDC948", "#B07AA1"] |
| 36 | +# Colorblind-safe palette: blue → teal → amber → orange → deep blue |
| 37 | +# Avoids red-green distinction, uses luminance + hue variation |
| 38 | +colors = ["#88CCEE", "#44AA99", "#DDCC77", "#CC6677", "#332288"] |
38 | 39 | cohort_labels = [f"{c} (n={info['size']:,})" for c, info in cohorts.items()] |
39 | 40 |
|
| 41 | +# Graduated opacity and stroke width: older cohorts fade, newer ones pop |
| 42 | +opacity_map = {0: 0.35, 1: 0.50, 2: 0.65, 3: 0.80, 4: 1.0} |
| 43 | +width_map = {0: 1.5, 1: 2.0, 2: 2.5, 3: 3.0, 4: 4.0} |
| 44 | + |
40 | 45 | # Interactive highlight on hover |
41 | 46 | highlight = alt.selection_point(fields=["Cohort"], on="pointerover", empty=False) |
42 | 47 |
|
43 | | -# Lines - newer cohorts are thicker/more opaque |
44 | | -lines = ( |
45 | | - alt.Chart(df) |
46 | | - .mark_line() |
47 | | - .encode( |
48 | | - x=alt.X( |
49 | | - "Week:Q", |
50 | | - title="Weeks Since Signup", |
51 | | - scale=alt.Scale(domain=[0, 12]), |
52 | | - axis=alt.Axis(labelFontSize=18, titleFontSize=22, tickMinStep=1), |
53 | | - ), |
54 | | - y=alt.Y( |
55 | | - "Retention (%):Q", |
56 | | - title="Retention (%)", |
57 | | - scale=alt.Scale(domain=[0, 100]), |
58 | | - axis=alt.Axis(labelFontSize=18, titleFontSize=22, format=".0f"), |
59 | | - ), |
60 | | - color=alt.Color( |
61 | | - "Cohort:N", |
62 | | - scale=alt.Scale(domain=cohort_labels, range=colors), |
63 | | - legend=alt.Legend(title="Cohort", titleFontSize=18, labelFontSize=15, symbolStrokeWidth=4, symbolSize=200), |
64 | | - ), |
65 | | - strokeWidth=alt.condition(highlight, alt.value(5), alt.value(3)), |
66 | | - opacity=alt.condition(highlight, alt.value(1), alt.value(0.7)), |
67 | | - tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"], |
| 48 | +# Build per-cohort line + point layers for graduated styling |
| 49 | +layers = [] |
| 50 | +for cohort_label, info in cohorts.items(): |
| 51 | + label = f"{cohort_label} (n={info['size']:,})" |
| 52 | + idx = info["order"] |
| 53 | + cohort_df = df[df["Cohort"] == label] |
| 54 | + |
| 55 | + line = ( |
| 56 | + alt.Chart(cohort_df) |
| 57 | + .mark_line(strokeWidth=width_map[idx], opacity=opacity_map[idx]) |
| 58 | + .encode( |
| 59 | + x=alt.X( |
| 60 | + "Week:Q", |
| 61 | + title="Weeks Since Signup", |
| 62 | + scale=alt.Scale(domain=[0, 12]), |
| 63 | + axis=alt.Axis(labelFontSize=18, titleFontSize=22, tickMinStep=1), |
| 64 | + ), |
| 65 | + y=alt.Y( |
| 66 | + "Retention (%):Q", |
| 67 | + title="Retention (%)", |
| 68 | + scale=alt.Scale(domain=[0, 100]), |
| 69 | + axis=alt.Axis(labelFontSize=18, titleFontSize=22, format=".0f"), |
| 70 | + ), |
| 71 | + color=alt.Color( |
| 72 | + "Cohort:N", |
| 73 | + scale=alt.Scale(domain=cohort_labels, range=colors), |
| 74 | + legend=alt.Legend( |
| 75 | + title="Cohort", titleFontSize=18, labelFontSize=15, symbolStrokeWidth=4, symbolSize=200 |
| 76 | + ), |
| 77 | + ), |
| 78 | + strokeWidth=alt.condition(highlight, alt.value(6), alt.value(width_map[idx])), |
| 79 | + opacity=alt.condition(highlight, alt.value(1.0), alt.value(opacity_map[idx])), |
| 80 | + tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"], |
| 81 | + ) |
| 82 | + .add_params(highlight) |
68 | 83 | ) |
69 | | - .add_params(highlight) |
70 | | -) |
71 | 84 |
|
72 | | -# Points on data values |
73 | | -points = ( |
74 | | - alt.Chart(df) |
75 | | - .mark_point(filled=True) |
76 | | - .encode( |
77 | | - x="Week:Q", |
78 | | - y="Retention (%):Q", |
79 | | - color=alt.Color("Cohort:N", scale=alt.Scale(domain=cohort_labels, range=colors), legend=None), |
80 | | - size=alt.condition(highlight, alt.value(180), alt.value(80)), |
81 | | - opacity=alt.condition(highlight, alt.value(1), alt.value(0.6)), |
82 | | - tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"], |
| 85 | + point = ( |
| 86 | + alt.Chart(cohort_df) |
| 87 | + .mark_point(filled=True, size=60 + idx * 30) |
| 88 | + .encode( |
| 89 | + x="Week:Q", |
| 90 | + y="Retention (%):Q", |
| 91 | + color=alt.Color("Cohort:N", scale=alt.Scale(domain=cohort_labels, range=colors), legend=None), |
| 92 | + opacity=alt.condition(highlight, alt.value(1.0), alt.value(opacity_map[idx])), |
| 93 | + size=alt.condition(highlight, alt.value(200), alt.value(60 + idx * 30)), |
| 94 | + tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"], |
| 95 | + ) |
83 | 96 | ) |
84 | | -) |
| 97 | + |
| 98 | + layers.extend([line, point]) |
85 | 99 |
|
86 | 100 | # Reference line at 20% retention threshold |
87 | 101 | threshold_df = pd.DataFrame({"y": [20]}) |
88 | | -threshold = alt.Chart(threshold_df).mark_rule(strokeDash=[8, 6], strokeWidth=2, color="#888888").encode(y="y:Q") |
| 102 | +threshold = alt.Chart(threshold_df).mark_rule(strokeDash=[8, 6], strokeWidth=2, color="#666666").encode(y="y:Q") |
89 | 103 |
|
90 | 104 | # Threshold label |
91 | 105 | threshold_label = ( |
92 | 106 | alt.Chart(threshold_df) |
93 | | - .mark_text(text="20% Target", align="left", dx=5, dy=-12, fontSize=16, color="#888888") |
| 107 | + .mark_text(text="20% Target", align="left", dx=5, dy=-12, fontSize=16, fontWeight="bold", color="#666666") |
94 | 108 | .encode(x=alt.value(20), y="y:Q") |
95 | 109 | ) |
96 | 110 |
|
97 | 111 | # Combine layers |
98 | 112 | chart = ( |
99 | | - (threshold + threshold_label + lines + points) |
100 | | - .properties(width=1600, height=900, title=alt.Title("line-retention-cohort · altair · pyplots.ai", fontSize=28)) |
101 | | - .configure_axis(gridColor="#E0E0E0", gridOpacity=0.2) |
| 113 | + alt.layer(threshold, threshold_label, *layers) |
| 114 | + .properties( |
| 115 | + width=1600, |
| 116 | + height=900, |
| 117 | + title=alt.Title( |
| 118 | + "line-retention-cohort · altair · pyplots.ai", |
| 119 | + fontSize=28, |
| 120 | + fontWeight="bold", |
| 121 | + subtitle="Newer cohorts retain better — product improvements are working", |
| 122 | + subtitleFontSize=18, |
| 123 | + subtitleColor="#555555", |
| 124 | + ), |
| 125 | + ) |
| 126 | + .configure_axis( |
| 127 | + gridColor="#D0D0D0", |
| 128 | + gridOpacity=0.3, |
| 129 | + domainColor="#888888", |
| 130 | + tickColor="#888888", |
| 131 | + labelColor="#333333", |
| 132 | + titleColor="#222222", |
| 133 | + ) |
102 | 134 | .configure_view(strokeWidth=0) |
| 135 | + .configure(background="#FAFAFA") |
103 | 136 | ) |
104 | 137 |
|
105 | 138 | # Save |
|
0 commit comments