Skip to content

Commit de4d627

Browse files
fix(altair): address review feedback for line-retention-cohort
Attempt 2/3 - fixes based on AI review
1 parent 4f7858a commit de4d627

1 file changed

Lines changed: 82 additions & 71 deletions

File tree

  • plots/line-retention-cohort/implementations

plots/line-retention-cohort/implementations/altair.py

Lines changed: 82 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" pyplots.ai
1+
"""pyplots.ai
22
line-retention-cohort: User Retention Curve by Cohort
33
Library: altair 6.0.0 | Python 3.14.3
44
Quality: 86/100 | Created: 2026-03-16
@@ -13,104 +13,115 @@
1313
np.random.seed(42)
1414

1515
cohorts = {
16-
"Jan 2025": {"size": 1245, "half_life": 3.5, "order": 0},
17-
"Feb 2025": {"size": 1102, "half_life": 4.0, "order": 1},
18-
"Mar 2025": {"size": 1380, "half_life": 4.8, "order": 2},
19-
"Apr 2025": {"size": 1510, "half_life": 5.5, "order": 3},
20-
"May 2025": {"size": 1423, "half_life": 6.2, "order": 4},
16+
"Jan 2025": {"size": 1245, "half_life": 3.5},
17+
"Feb 2025": {"size": 1102, "half_life": 4.0},
18+
"Mar 2025": {"size": 1380, "half_life": 4.8},
19+
"Apr 2025": {"size": 1510, "half_life": 5.5},
20+
"May 2025": {"size": 1423, "half_life": 6.2},
2121
}
2222

2323
weeks = np.arange(0, 13)
2424
rows = []
25-
for cohort_label, info in cohorts.items():
25+
for i, (cohort_label, info) in enumerate(cohorts.items()):
2626
retention = 100 * np.exp(-weeks / info["half_life"])
2727
noise = np.concatenate([[0], np.cumsum(np.random.randn(12) * 1.5)])
2828
retention = np.clip(retention + noise, 5, 100)
2929
retention[0] = 100.0
3030
legend_label = f"{cohort_label} (n={info['size']:,})"
3131
for w, r in zip(weeks, retention, strict=True):
32-
rows.append({"Week": w, "Retention (%)": round(r, 1), "Cohort": legend_label, "order": info["order"]})
32+
rows.append({"Week": w, "Retention (%)": round(r, 1), "Cohort": legend_label, "order": i})
3333

3434
df = pd.DataFrame(rows)
3535

36-
# Colorblind-safe palette: blue → teal → amber → orange → deep blue
37-
# Avoids red-green distinction, uses luminance + hue variation
36+
# Colorblind-safe palette (Tol-inspired)
37+
cohort_labels = list(df["Cohort"].unique())
3838
colors = ["#88CCEE", "#44AA99", "#DDCC77", "#CC6677", "#332288"]
39-
cohort_labels = [f"{c} (n={info['size']:,})" for c, info in cohorts.items()]
40-
41-
# Graduated opacity and stroke width: older cohorts fade, newer ones pop
42-
opacity_map = {0: 0.35, 1: 0.50, 2: 0.65, 3: 0.80, 4: 1.0}
43-
width_map = {0: 1.5, 1: 2.0, 2: 2.5, 3: 3.0, 4: 4.0}
39+
order_domain = list(range(5))
40+
opacity_range = [0.45, 0.59, 0.73, 0.87, 1.0]
41+
width_range = [1.8, 2.4, 3.0, 3.6, 4.2]
42+
size_range = [60, 90, 120, 150, 180]
4443

4544
# Interactive highlight on hover
4645
highlight = alt.selection_point(fields=["Cohort"], on="pointerover", empty=False)
4746

48-
# Build per-cohort line + point layers for graduated styling
49-
layers = []
50-
for cohort_label, info in cohorts.items():
51-
label = f"{cohort_label} (n={info['size']:,})"
52-
idx = info["order"]
53-
cohort_df = df[df["Cohort"] == label]
54-
55-
line = (
56-
alt.Chart(cohort_df)
57-
.mark_line(strokeWidth=width_map[idx], opacity=opacity_map[idx])
58-
.encode(
59-
x=alt.X(
60-
"Week:Q",
61-
title="Weeks Since Signup",
62-
scale=alt.Scale(domain=[0, 12]),
63-
axis=alt.Axis(labelFontSize=18, titleFontSize=22, tickMinStep=1),
64-
),
65-
y=alt.Y(
66-
"Retention (%):Q",
67-
title="Retention (%)",
68-
scale=alt.Scale(domain=[0, 100]),
69-
axis=alt.Axis(labelFontSize=18, titleFontSize=22, format=".0f"),
70-
),
71-
color=alt.Color(
72-
"Cohort:N",
73-
scale=alt.Scale(domain=cohort_labels, range=colors),
74-
legend=alt.Legend(
75-
title="Cohort", titleFontSize=18, labelFontSize=15, symbolStrokeWidth=4, symbolSize=200
76-
),
77-
),
78-
strokeWidth=alt.condition(highlight, alt.value(6), alt.value(width_map[idx])),
79-
opacity=alt.condition(highlight, alt.value(1.0), alt.value(opacity_map[idx])),
80-
tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"],
81-
)
82-
.add_params(highlight)
83-
)
84-
85-
point = (
86-
alt.Chart(cohort_df)
87-
.mark_point(filled=True, size=60 + idx * 30)
88-
.encode(
89-
x="Week:Q",
90-
y="Retention (%):Q",
91-
color=alt.Color("Cohort:N", scale=alt.Scale(domain=cohort_labels, range=colors), legend=None),
92-
opacity=alt.condition(highlight, alt.value(1.0), alt.value(opacity_map[idx])),
93-
size=alt.condition(highlight, alt.value(200), alt.value(60 + idx * 30)),
94-
tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"],
95-
)
96-
)
97-
98-
layers.extend([line, point])
99-
10047
# Reference line at 20% retention threshold
10148
threshold_df = pd.DataFrame({"y": [20]})
10249
threshold = alt.Chart(threshold_df).mark_rule(strokeDash=[8, 6], strokeWidth=2, color="#666666").encode(y="y:Q")
103-
104-
# Threshold label
10550
threshold_label = (
10651
alt.Chart(threshold_df)
10752
.mark_text(text="20% Target", align="left", dx=5, dy=-12, fontSize=16, fontWeight="bold", color="#666666")
10853
.encode(x=alt.value(20), y="y:Q")
10954
)
11055

56+
# Shared axis encodings
57+
x_enc = alt.X(
58+
"Week:Q",
59+
title="Weeks Since Signup",
60+
scale=alt.Scale(domain=[0, 12]),
61+
axis=alt.Axis(labelFontSize=18, titleFontSize=22, tickMinStep=1),
62+
)
63+
y_enc = alt.Y(
64+
"Retention (%):Q",
65+
title="Retention (%)",
66+
scale=alt.Scale(domain=[0, 100]),
67+
axis=alt.Axis(labelFontSize=18, titleFontSize=22, format=".0f"),
68+
)
69+
color_enc = alt.Color(
70+
"Cohort:N",
71+
scale=alt.Scale(domain=cohort_labels, range=colors),
72+
sort=cohort_labels,
73+
legend=alt.Legend(title="Cohort", titleFontSize=18, labelFontSize=16, symbolStrokeWidth=4, symbolSize=200),
74+
)
75+
76+
# Lines — single Chart, graduated styling via order-based scales
77+
lines = (
78+
alt.Chart(df)
79+
.mark_line()
80+
.encode(
81+
x=x_enc,
82+
y=y_enc,
83+
color=color_enc,
84+
strokeWidth=alt.condition(
85+
highlight,
86+
alt.value(6),
87+
alt.StrokeWidth("order:O", scale=alt.Scale(domain=order_domain, range=width_range), legend=None),
88+
),
89+
opacity=alt.condition(
90+
highlight,
91+
alt.value(1.0),
92+
alt.Opacity("order:O", scale=alt.Scale(domain=order_domain, range=opacity_range), legend=None),
93+
),
94+
detail="Cohort:N",
95+
tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"],
96+
)
97+
.add_params(highlight)
98+
)
99+
100+
# Points
101+
points = (
102+
alt.Chart(df)
103+
.mark_point(filled=True)
104+
.encode(
105+
x="Week:Q",
106+
y="Retention (%):Q",
107+
color=alt.Color("Cohort:N", scale=alt.Scale(domain=cohort_labels, range=colors), legend=None),
108+
opacity=alt.condition(
109+
highlight,
110+
alt.value(1.0),
111+
alt.Opacity("order:O", scale=alt.Scale(domain=order_domain, range=opacity_range), legend=None),
112+
),
113+
size=alt.condition(
114+
highlight,
115+
alt.value(200),
116+
alt.Size("order:O", scale=alt.Scale(domain=order_domain, range=size_range), legend=None),
117+
),
118+
tooltip=["Cohort:N", "Week:Q", "Retention (%):Q"],
119+
)
120+
)
121+
111122
# Combine layers
112123
chart = (
113-
alt.layer(threshold, threshold_label, *layers)
124+
alt.layer(threshold, threshold_label, lines, points)
114125
.properties(
115126
width=1600,
116127
height=900,
@@ -126,7 +137,7 @@
126137
.configure_axis(
127138
gridColor="#D0D0D0",
128139
gridOpacity=0.3,
129-
domainColor="#888888",
140+
domainWidth=0,
130141
tickColor="#888888",
131142
labelColor="#333333",
132143
titleColor="#222222",

0 commit comments

Comments
 (0)