feat(letsplot): implement line-retention-cohort (#4928)

github-actions[bot] · web-flow · commit 324130695e4a · 2026-03-16T20:57:43.000Z
## Implementation: `line-retention-cohort` - letsplot Implements the **letsplot** version of `line-retention-cohort`. **File:** `plots/line-retention-cohort/implementations/letsplot.py` **Parent Issue:** #4572 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/23164943466)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
diff --git a/plots/line-retention-cohort/implementations/letsplot.py b/plots/line-retention-cohort/implementations/letsplot.py
@@ -0,0 +1,108 @@
+""" pyplots.ai
+line-retention-cohort: User Retention Curve by Cohort
+Library: letsplot 4.9.0 | Python 3.14.3
+Quality: 91/100 | Created: 2026-03-16
+"""
+
+import numpy as np
+import pandas as pd
+from lets_plot import *
+
+
+LetsPlot.setup_html()
+
+# Data: Monthly signup cohorts tracked weekly for 12 weeks
+np.random.seed(42)
+weeks = np.arange(0, 13)
+
+cohorts = {
+    "Jan 2025": {"size": 1245, "decay": 0.18},
+    "Feb 2025": {"size": 1102, "decay": 0.16},
+    "Mar 2025": {"size": 1380, "decay": 0.14},
+    "Apr 2025": {"size": 1510, "decay": 0.12},
+    "May 2025": {"size": 1425, "decay": 0.10},
+}
+
+rows = []
+for cohort_name, params in cohorts.items():
+    retention = 100 * np.exp(-params["decay"] * weeks)
+    noise = np.random.normal(0, 1.5, len(weeks))
+    noise[0] = 0
+    retention = np.clip(retention + noise, 0, 100)
+    retention[0] = 100.0
+    label = f"{cohort_name} (n={params['size']:,})"
+    for w, r in zip(weeks, retention):
+        rows.append({"Week": w, "Retention": r, "Cohort": label})
+
+df = pd.DataFrame(rows)
+
+# Endpoint labels: last data point per cohort, with nudge to avoid overlap
+endpoints = df[df["Week"] == 12].copy()
+endpoints["label"] = endpoints["Retention"].apply(lambda x: f"{x:.0f}%")
+# Adjust y positions to prevent label overlap (spread close values apart)
+sorted_ep = endpoints.sort_values("Retention").reset_index(drop=True)
+min_gap = 3.5
+for i in range(1, len(sorted_ep)):
+    if sorted_ep.loc[i, "Retention"] - sorted_ep.loc[i - 1, "Retention"] < min_gap:
+        sorted_ep.loc[i, "Retention"] = sorted_ep.loc[i - 1, "Retention"] + min_gap
+endpoints = sorted_ep
+
+# Colorblind-friendly palette with distinct hues (oldest=lightest, newest=boldest)
+colors = ["#A6CEE3", "#B2DF8A", "#FDBF6F", "#E31A1C", "#306998"]
+
+# Line widths: older cohorts thinner, newer cohorts bolder
+line_widths = [1.5, 1.8, 2.0, 2.5, 3.0]
+
+# Build plot with per-cohort layers for varying line widths
+cohort_labels = df["Cohort"].unique().tolist()
+
+plot = ggplot()
+
+# Add lines and points per cohort with distinct widths
+for i, cohort_label in enumerate(cohort_labels):
+    cdf = df[df["Cohort"] == cohort_label]
+    plot = plot + geom_line(
+        aes(x="Week", y="Retention", color="Cohort"),
+        data=cdf,
+        size=line_widths[i],
+        alpha=0.9,
+        tooltips=layer_tooltips().line("@Cohort").line("Week @Week").line("Retention @Retention{.1f}%"),
+    )
+
+plot = (
+    plot
+    + geom_point(aes(x="Week", y="Retention", color="Cohort"), data=df, size=4, alpha=0.85)
+    + geom_hline(yintercept=20, linetype="dashed", color="#999999", size=0.8)
+    + geom_text(
+        aes(x="Week", y="Retention", label="label", color="Cohort"), data=endpoints, size=14, nudge_x=0.6, hjust=0
+    )
+    + geom_text(
+        aes(x="x", y="y", label="label"),
+        data=pd.DataFrame({"x": [0.2], "y": [20], "label": ["20% threshold"]}),
+        size=12,
+        color="#999999",
+        hjust=0,
+        vjust=-1.2,
+    )
+    + scale_color_manual(values=colors)
+    + scale_x_continuous(breaks=list(range(0, 13, 2)), limits=[0, 14.5])
+    + scale_y_continuous(breaks=list(range(0, 101, 20)), limits=[0, 105])
+    + labs(title="line-retention-cohort · letsplot · pyplots.ai", x="Weeks Since Signup", y="Retained Users (%)")
+    + theme_minimal()
+    + theme(
+        plot_title=element_text(size=28, hjust=0.5, face="bold"),
+        axis_title=element_text(size=22),
+        axis_text=element_text(size=18),
+        legend_title=element_blank(),
+        legend_text=element_text(size=16),
+        legend_position="right",
+        panel_grid_major=element_line(color="#EBEBEB", size=0.4),
+        panel_grid_minor=element_blank(),
+        plot_background=element_rect(color="white", fill="white"),
+    )
+    + ggsize(1600, 900)
+)
+
+# Save
+ggsave(plot, "plot.png", path=".", scale=3)
+ggsave(plot, "plot.html", path=".")
diff --git a/plots/line-retention-cohort/metadata/letsplot.yaml b/plots/line-retention-cohort/metadata/letsplot.yaml
@@ -0,0 +1,225 @@
+library: letsplot
+specification_id: line-retention-cohort
+created: '2026-03-16T20:44:20Z'
+updated: '2026-03-16T20:57:20Z'
+generated_by: claude-opus-4-5-20251101
+workflow_run: 23164943466
+issue: 4572
+python_version: 3.14.3
+library_version: 4.9.0
+preview_url: https://storage.googleapis.com/pyplots-images/plots/line-retention-cohort/letsplot/plot.png
+preview_thumb: https://storage.googleapis.com/pyplots-images/plots/line-retention-cohort/letsplot/plot_thumb.png
+preview_html: https://storage.googleapis.com/pyplots-images/plots/line-retention-cohort/letsplot/plot.html
+quality_score: 91
+review:
+  strengths:
+  - Excellent data storytelling through progressive line weights and color intensity
+    emphasizing newer cohorts
+  - Endpoint labels with overlap prevention provide clear final-retention context
+  - 20% threshold reference line adds analytical value
+  - Full spec compliance with all required features implemented
+  - Interactive tooltips leverage lets-plot distinctive capabilities
+  weaknesses:
+  - Endpoint labels for lower cohorts (12%, 15%, 17%) are slightly tight despite overlap
+    prevention
+  image_description: 'The plot displays 5 retention curves for monthly signup cohorts
+    (Jan–May 2025) on a clean white background. All curves start at 100% at week 0
+    and decay over 12 weeks with exponential profiles. Colors progress from light
+    blue (Jan 2025, oldest) through green (Feb), orange (Mar), red (Apr), to dark
+    navy blue (May 2025, newest). Newer cohorts have thicker lines, creating clear
+    visual hierarchy. Data points are marked along each curve. Endpoint percentage
+    labels (12%, 15%, 17%, 23%, 31%) are displayed at week 12, color-matched to their
+    respective cohorts. A dashed gray horizontal line at y=20 marks the "20% threshold"
+    benchmark. The legend on the right lists each cohort with sample size (e.g., "Jan
+    2025 (n=1,245)"). Title reads "line-retention-cohort · letsplot · pyplots.ai".
+    X-axis: "Weeks Since Signup", Y-axis: "Retained Users (%)". Subtle light gray
+    major gridlines on a minimal theme.'
+  criteria_checklist:
+    visual_quality:
+      score: 29
+      max: 30
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 8
+        max: 8
+        passed: true
+        comment: 'All font sizes explicitly set: title=28, axis_title=22, axis_text=18,
+          legend_text=16'
+      - id: VQ-02
+        name: No Overlap
+        score: 5
+        max: 6
+        passed: true
+        comment: Endpoint labels use overlap prevention but 12%/15%/17% are still
+          fairly tight
+      - id: VQ-03
+        name: Element Visibility
+        score: 6
+        max: 6
+        passed: true
+        comment: Lines well-sized with progressive widths 1.5-3.0, points at size=4
+          clearly visible
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 4
+        passed: true
+        comment: 'Distinct colorblind-friendly palette: light blue, green, orange,
+          red, dark blue'
+      - id: VQ-05
+        name: Layout & Canvas
+        score: 4
+        max: 4
+        passed: true
+        comment: Plot fills canvas well, x-axis extended to accommodate endpoint labels
+      - id: VQ-06
+        name: Axis Labels & Title
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: Weeks Since Signup, Retained Users
+          (%)'
+    design_excellence:
+      score: 15
+      max: 20
+      items:
+      - id: DE-01
+        name: Aesthetic Sophistication
+        score: 6
+        max: 8
+        passed: true
+        comment: Custom palette with light-to-bold progression, endpoint labels and
+          threshold annotation add polish
+      - id: DE-02
+        name: Visual Refinement
+        score: 4
+        max: 6
+        passed: true
+        comment: theme_minimal(), subtle grid, no minor grid, white background
+      - id: DE-03
+        name: Data Storytelling
+        score: 5
+        max: 6
+        passed: true
+        comment: Strong visual hierarchy through line width/color, endpoint labels,
+          threshold reference line
+    spec_compliance:
+      score: 15
+      max: 15
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 5
+        max: 5
+        passed: true
+        comment: Correct line chart with multiple cohort retention curves
+      - id: SC-02
+        name: Required Features
+        score: 4
+        max: 4
+        passed: true
+        comment: All spec features present including threshold line, varying line
+          thickness, legend with sizes
+      - id: SC-03
+        name: Data Mapping
+        score: 3
+        max: 3
+        passed: true
+        comment: X=weeks since signup, Y=retention percentage, correctly mapped
+      - id: SC-04
+        name: Title & Legend
+        score: 3
+        max: 3
+        passed: true
+        comment: Title format correct, legend labels match spec format with cohort
+          size
+    data_quality:
+      score: 15
+      max: 15
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 6
+        max: 6
+        passed: true
+        comment: 5 cohorts with different decay rates showing clear variation
+      - id: DQ-02
+        name: Realistic Context
+        score: 5
+        max: 5
+        passed: true
+        comment: Monthly signup cohorts with realistic sizes and plausible retention
+          decay rates
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 4
+        max: 4
+        passed: true
+        comment: Retention values 12-31% at week 12, cohort sizes 1102-1510 realistic
+    code_quality:
+      score: 10
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Clean imports-data-plot-save flow
+      - id: CQ-02
+        name: Reproducibility
+        score: 2
+        max: 2
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: 'All imports used: numpy, pandas, lets_plot'
+      - id: CQ-04
+        name: Code Elegance
+        score: 2
+        max: 2
+        passed: true
+        comment: Clean code with thoughtful endpoint label overlap prevention
+      - id: CQ-05
+        name: Output & API
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png with scale=3 and plot.html
+    library_mastery:
+      score: 7
+      max: 10
+      items:
+      - id: LM-01
+        name: Idiomatic Usage
+        score: 4
+        max: 5
+        passed: true
+        comment: Good ggplot grammar usage, per-group loop justified for varying line
+          widths
+      - id: LM-02
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses layer_tooltips() for interactive hover, HTML export, ggsize()
+  verdict: APPROVED
+impl_tags:
+  dependencies: []
+  techniques:
+  - annotations
+  - layer-composition
+  - hover-tooltips
+  - html-export
+  patterns:
+  - data-generation
+  - iteration-over-groups
+  dataprep: []
+  styling:
+  - alpha-blending
+  - grid-styling