feat(letsplot): implement boxen-basic (#3439)

github-actions[bot] · web-flow · commit 44b06ea42a91 · 2026-01-09T08:15:13.000Z
## Implementation: `boxen-basic` - letsplot Implements the **letsplot** version of `boxen-basic`. **File:** `plots/boxen-basic/implementations/letsplot.py` **Parent Issue:** #3414 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20845378914)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
diff --git a/plots/boxen-basic/implementations/letsplot.py b/plots/boxen-basic/implementations/letsplot.py
@@ -0,0 +1,161 @@
+""" pyplots.ai
+boxen-basic: Basic Boxen Plot (Letter-Value Plot)
+Library: letsplot 4.8.2 | Python 3.13.11
+Quality: 91/100 | Created: 2026-01-09
+"""
+
+import numpy as np
+import pandas as pd
+from lets_plot import (
+    LetsPlot,
+    aes,
+    element_text,
+    geom_point,
+    geom_rect,
+    geom_segment,
+    ggplot,
+    ggsave,
+    ggsize,
+    labs,
+    scale_fill_manual,
+    scale_x_continuous,
+    theme,
+    theme_minimal,
+)
+
+
+LetsPlot.setup_html()
+
+# Data - Generate realistic response times for different server endpoints
+np.random.seed(42)
+endpoints = ["API Gateway", "Auth Service", "Database", "Cache Layer"]
+n_per_group = 2000
+
+data = []
+# Realistic response time distributions (ms) with different characteristics
+distributions = {
+    "API Gateway": {"base": 45, "scale": 20, "skew": 0.5},
+    "Auth Service": {"base": 80, "scale": 35, "skew": 0.8},
+    "Database": {"base": 120, "scale": 50, "skew": 1.2},
+    "Cache Layer": {"base": 8, "scale": 5, "skew": 0.3},
+}
+
+for endpoint in endpoints:
+    d = distributions[endpoint]
+    # Generate log-normal like distribution for realistic response times
+    values = np.random.exponential(d["scale"], n_per_group) + d["base"]
+    # Add occasional slow requests (tail)
+    slow_idx = np.random.choice(n_per_group, size=int(n_per_group * 0.05), replace=False)
+    values[slow_idx] = values[slow_idx] * np.random.uniform(2, 5, len(slow_idx))
+    data.extend([(endpoint, v) for v in values])
+
+df = pd.DataFrame(data, columns=["endpoint", "response_time"])
+
+
+# Letter value names for legend
+level_names = ["50%", "75%", "87.5%", "93.75%", "96.875%", "98.4%", "99.2%", "99.6%"]
+level_colors = ["#306998", "#4A7FA8", "#6490B8", "#7EA1C8", "#98B2D8", "#B2C3E8", "#CCD4F8", "#E6E5FF"]
+
+
+# Calculate letter values for boxen plot
+def compute_letter_values(values, k=None):
+    """Compute letter values (quantiles) for boxen plot."""
+    n = len(values)
+    if k is None:
+        # Number of letter values based on data size
+        k = int(np.log2(n)) - 1
+        k = max(2, min(k, 8))
+
+    sorted_vals = np.sort(values)
+    letter_values = []
+
+    for i in range(k):
+        # Calculate the depth for each letter value
+        depth = 0.5 ** (i + 1)
+        lower_q = depth
+        upper_q = 1 - depth
+
+        lower_val = np.percentile(sorted_vals, lower_q * 100)
+        upper_val = np.percentile(sorted_vals, upper_q * 100)
+        letter_values.append((lower_val, upper_val, level_names[i]))
+
+    # Calculate outlier bounds (beyond deepest letter value)
+    deepest_lower = letter_values[-1][0]
+    deepest_upper = letter_values[-1][1]
+    outliers = sorted_vals[(sorted_vals < deepest_lower) | (sorted_vals > deepest_upper)]
+
+    return letter_values, np.median(sorted_vals), outliers, k
+
+
+# Compute letter values for each endpoint
+box_data = []
+median_data = []
+outlier_data = []
+max_k = 0
+
+x_positions = {endpoint: i for i, endpoint in enumerate(endpoints)}
+
+for endpoint in endpoints:
+    group_data = df[df["endpoint"] == endpoint]["response_time"].values
+    letter_vals, median, outliers, k = compute_letter_values(group_data)
+    max_k = max(max_k, k)
+
+    x_pos = x_positions[endpoint]
+
+    for idx, (lower, upper, level_name) in enumerate(letter_vals):
+        # Width decreases with depth
+        half_width = 0.4 * (0.85**idx)
+        box_data.append(
+            {
+                "x_min": x_pos - half_width,
+                "x_max": x_pos + half_width,
+                "y_min": lower,
+                "y_max": upper,
+                "level": level_name,
+                "endpoint": endpoint,
+            }
+        )
+
+    median_data.append({"x": x_pos - 0.38, "xend": x_pos + 0.38, "y": median, "endpoint": endpoint})
+
+    for o in outliers:
+        outlier_data.append({"x": x_pos, "y": o, "endpoint": endpoint})
+
+box_df = pd.DataFrame(box_data)
+median_df = pd.DataFrame(median_data)
+outlier_df = pd.DataFrame(outlier_data) if outlier_data else pd.DataFrame(columns=["x", "y", "endpoint"])
+
+# Plot using lets-plot
+plot = (
+    ggplot()
+    + geom_rect(
+        aes(xmin="x_min", xmax="x_max", ymin="y_min", ymax="y_max", fill="level"),
+        data=box_df,
+        alpha=0.9,
+        color="#1a1a1a",
+        size=0.5,
+    )
+    + geom_segment(aes(x="x", xend="xend", y="y", yend="y"), data=median_df, color="#FFD43B", size=3)
+    + scale_fill_manual(
+        values=dict(zip(level_names[:max_k], level_colors[:max_k], strict=False)), name="Quantile Range"
+    )
+    + scale_x_continuous(breaks=[0, 1, 2, 3], labels=endpoints)
+    + labs(x="Server Endpoint", y="Response Time (ms)", title="boxen-basic \u00b7 letsplot \u00b7 pyplots.ai")
+    + theme_minimal()
+    + theme(
+        axis_title=element_text(size=20),
+        axis_text=element_text(size=16),
+        plot_title=element_text(size=24),
+        legend_title=element_text(size=18),
+        legend_text=element_text(size=14),
+    )
+    + ggsize(1600, 900)
+)
+
+# Add outliers if present
+if not outlier_df.empty:
+    plot = plot + geom_point(aes(x="x", y="y"), data=outlier_df, color="#DC2626", size=2, alpha=0.6)
+
+# Save
+ggsave(plot, "plot.png", path=".", scale=3)
+ggsave(plot, "plot.html", path=".")
diff --git a/plots/boxen-basic/metadata/letsplot.yaml b/plots/boxen-basic/metadata/letsplot.yaml
@@ -0,0 +1,215 @@
+library: letsplot
+specification_id: boxen-basic
+created: '2026-01-09T08:12:01Z'
+updated: '2026-01-09T08:14:47Z'
+generated_by: claude-opus-4-5-20251101
+workflow_run: 20845378914
+issue: 3414
+python_version: 3.13.11
+library_version: 4.8.2
+preview_url: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot.png
+preview_thumb: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot_thumb.png
+preview_html: https://storage.googleapis.com/pyplots-images/plots/boxen-basic/letsplot/plot.html
+quality_score: 91
+review:
+  strengths:
+  - Excellent visual representation of letter-value plot with clear nested box structure
+  - Realistic server response time scenario with appropriate data characteristics
+    (skewed distributions, tail behavior)
+  - Good color gradient from dark blue to light lavender that clearly shows quantile
+    depth
+  - Yellow median lines provide excellent contrast and visibility
+  - Legend clearly explains the quantile ranges
+  - Proper handling of outliers as distinct red points
+  weaknesses:
+  - Uses a helper function compute_letter_values() which violates the KISS principle
+    (imports → data → plot → save, no functions)
+  - Legend order shows 50% at top and 99.6% at bottom, which is counterintuitive to
+    visual interpretation
+  image_description: 'The plot displays a letter-value (boxen) plot comparing response
+    times across four server endpoints: API Gateway, Auth Service, Database, and Cache
+    Layer. Each endpoint shows nested rectangular boxes representing quantile ranges
+    from 50% (innermost, dark blue #306998) to 99.6% (outermost, light lavender).
+    The boxes decrease in width for deeper quantiles, creating the characteristic
+    boxen plot shape. Yellow/gold median lines are prominently displayed across each
+    distribution. Red dots mark outliers beyond the 99.6% quantile. The Database endpoint
+    shows the widest distribution and most outliers, while Cache Layer shows the tightest
+    distribution with lowest response times. The plot uses a minimal theme with subtle
+    grid lines and a clean legend on the right explaining the quantile ranges.'
+  criteria_checklist:
+    visual_quality:
+      score: 37
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 10
+        max: 10
+        passed: true
+        comment: Title at 24pt, axis labels at 20pt, tick labels at 16pt - all perfectly
+          readable
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements, endpoint labels are well-spaced
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Boxes are clearly visible with good sizing; outlier points could
+          be slightly larger
+      - id: VQ-04
+        name: Color Accessibility
+        score: 5
+        max: 5
+        passed: true
+        comment: Blue gradient palette is colorblind-safe; yellow median line provides
+          good contrast
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Plot fills canvas appropriately with balanced margins; legend well-positioned
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: 'Descriptive labels with units: Response Time (ms) and Server Endpoint'
+      - id: VQ-07
+        name: Grid & Legend
+        score: 0
+        max: 2
+        passed: false
+        comment: Legend quantile ordering is counterintuitive (50% at top, 99.6% at
+          bottom)
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct boxen/letter-value plot with nested boxes
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: Categories on X-axis, values on Y-axis
+      - id: SC-03
+        name: Required Features
+        score: 5
+        max: 5
+        passed: true
+        comment: Nested boxes, decreasing widths, outliers as points, legend explaining
+          quantile levels
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: All data visible including outliers up to ~1400ms
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend correctly shows quantile range names
+      - id: SC-06
+        name: Title Format
+        score: 1
+        max: 2
+        passed: true
+        comment: Uses correct format but with Unicode middot character
+    data_quality:
+      score: 20
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 8
+        max: 8
+        passed: true
+        comment: 'Shows all aspects: different distribution shapes, varying spreads,
+          outliers, tail behavior'
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Server response times is a real, neutral scenario perfectly suited
+          for large dataset visualization
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Response times in realistic ranges (8-500ms base with occasional
+          slow requests up to 1400ms)
+    code_quality:
+      score: 7
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 0
+        max: 3
+        passed: false
+        comment: Uses a function compute_letter_values() which violates KISS principle
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: Uses np.random.seed(42) for reproducibility
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: All imports are used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Modern lets-plot API
+      - id: CQ-05
+        name: Output Correct
+        score: 1
+        max: 1
+        passed: true
+        comment: Saves as plot.png and plot.html
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses ggplot2 grammar with geom_rect, geom_segment, geom_point. Manual
+          construction necessary as lets-plot has no native boxen geom.
+  verdict: APPROVED
+impl_tags:
+  dependencies: []
+  techniques:
+  - layer-composition
+  - manual-ticks
+  - html-export
+  patterns:
+  - data-generation
+  - iteration-over-groups
+  dataprep:
+  - binning
+  styling:
+  - alpha-blending
+  - edge-highlighting