diff --git a/plots/box-basic/implementations/letsplot.py b/plots/box-basic/implementations/letsplot.py index 33284d38a5..3e41ad4b53 100644 --- a/plots/box-basic/implementations/letsplot.py +++ b/plots/box-basic/implementations/letsplot.py @@ -1,7 +1,7 @@ """ pyplots.ai box-basic: Basic Box Plot -Library: letsplot 4.8.1 | Python 3.13.11 -Quality: 98/100 | Created: 2025-12-23 +Library: letsplot 4.8.2 | Python 3.14 +Quality: 90/100 | Created: 2025-12-23 """ import numpy as np @@ -9,15 +9,23 @@ from lets_plot import ( LetsPlot, aes, + as_discrete, + element_blank, + element_line, + element_rect, element_text, + flavor_high_contrast_light, geom_boxplot, + geom_hline, + geom_text, ggplot, ggsave, ggsize, labs, + layer_tooltips, scale_fill_manual, + scale_y_continuous, theme, - theme_minimal, ) @@ -27,7 +35,6 @@ np.random.seed(42) categories = ["Engineering", "Marketing", "Sales", "HR", "Finance"] data = [] -# Realistic salary distributions for each department distributions = { "Engineering": (85000, 15000), "Marketing": (65000, 12000), @@ -40,27 +47,111 @@ mean, std = distributions[cat] n = np.random.randint(50, 100) values = np.random.normal(mean, std, n) - # Add a few outliers outliers = np.random.choice([mean + 3.5 * std, mean - 2.5 * std], size=3) values = np.concatenate([values, outliers]) data.extend([(cat, v) for v in values]) -df = pd.DataFrame(data, columns=["category", "value"]) +df = pd.DataFrame(data, columns=["department", "salary"]) + +# Compute medians for annotation labels +medians = df.groupby("department")["salary"].median().reset_index() +medians.columns = ["department", "median_salary"] +medians["label"] = medians["median_salary"].apply(lambda x: f"${x:,.0f}") + +# Insight: compare highest vs lowest median departments +sorted_medians = medians.sort_values("median_salary") +low_dept = sorted_medians.iloc[0] +high_dept = sorted_medians.iloc[-1] +pct_diff = (high_dept["median_salary"] - low_dept["median_salary"]) / low_dept["median_salary"] +insight_text = f"+{pct_diff:.0%} vs. {low_dept['department']}" + +# Overall mean for reference line +overall_mean = df["salary"].mean() + +# Annotation dataframes +insight_df = pd.DataFrame( + { + "department": [high_dept["department"]], + "y": [high_dept["median_salary"] + 22000], + "lbl": [f"{high_dept['department'][:3]}. {insight_text}"], + } +) +mean_label_df = pd.DataFrame( + {"department": [high_dept["department"]], "y": [overall_mean + 3000], "lbl": [f"Avg: ${overall_mean:,.0f}"]} +) # Plot -colors = ["#306998", "#FFD43B", "#DC2626", "#16A34A", "#9333EA"] +# Wong colorblind-safe palette (no two similar blues) +colors = ["#0072B2", "#E69F00", "#D55E00", "#009E73", "#CC79A7"] plot = ( - ggplot(df, aes(x="category", y="value", fill="category")) - + geom_boxplot(alpha=0.8, size=1.5, outlier_size=4) + ggplot(df, aes(x=as_discrete("department", order=1, order_by="..middle.."), y="salary", fill="department")) + + geom_boxplot( + alpha=0.85, + size=1.2, + outlier_size=5, + outlier_shape=21, + outlier_color="#333333", + width=0.72, + tooltips=layer_tooltips() + .title("@department") + .line("Median|$@{..middle..}") + .line("Q1|$@{..lower..}") + .line("Q3|$@{..upper..}") + .line("Min|$@{..ymin..}") + .line("Max|$@{..ymax..}"), + ) + scale_fill_manual(values=colors) - + labs(x="Department", y="Salary ($)", title="box-basic · letsplot · pyplots.ai") - + theme_minimal() + # Median value labels above each box + + geom_text( + aes(x="department", y="median_salary", label="label"), + data=medians, + size=11, + color="#333333", + fontface="bold", + nudge_y=5000, + inherit_aes=False, + ) + # Overall mean reference line + + geom_hline(yintercept=overall_mean, color="#888888", size=0.8, linetype="dashed") + + geom_text( + aes(x="department", y="y", label="lbl"), + data=mean_label_df, + size=10, + color="#666666", + fontface="italic", + hjust=0.5, + inherit_aes=False, + ) + # Key insight annotation + + geom_text( + aes(x="department", y="y", label="lbl"), + data=insight_df, + size=11, + color="#1E4F72", + fontface="bold italic", + inherit_aes=False, + ) + + scale_y_continuous(format="${,.0f}") + + labs( + x="Department", + y="Annual Salary (USD)", + title="box-basic \u00b7 letsplot \u00b7 pyplots.ai", + subtitle="Salary distributions across five departments, ordered by median", + ) + + flavor_high_contrast_light() + theme( + plot_title=element_text(size=24, face="bold"), + plot_subtitle=element_text(size=16, color="#555555"), axis_title=element_text(size=20), axis_text=element_text(size=16), - plot_title=element_text(size=24), + axis_ticks=element_blank(), + panel_grid_major_x=element_blank(), + panel_grid_minor=element_blank(), + panel_grid_major_y=element_line(color="#DDDDDD", size=0.5), legend_position="none", + plot_background=element_rect(fill="white", color="white"), + plot_margin=[10, 35, 10, 10], ) + ggsize(1600, 900) ) diff --git a/plots/box-basic/metadata/letsplot.yaml b/plots/box-basic/metadata/letsplot.yaml index 54636675e6..e563764a8b 100644 --- a/plots/box-basic/metadata/letsplot.yaml +++ b/plots/box-basic/metadata/letsplot.yaml @@ -1,161 +1,184 @@ library: letsplot specification_id: box-basic created: '2025-12-23T00:36:22Z' -updated: '2025-12-23T00:40:11Z' -generated_by: claude-opus-4-5-20251101 +updated: '2026-02-14T22:38:16Z' +generated_by: claude-opus-4-6 workflow_run: 20447786735 issue: 0 -python_version: 3.13.11 -library_version: 4.8.1 +python_version: '3.14' +library_version: 4.8.2 preview_url: https://storage.googleapis.com/pyplots-images/plots/box-basic/letsplot/plot.png preview_thumb: https://storage.googleapis.com/pyplots-images/plots/box-basic/letsplot/plot_thumb.png preview_html: https://storage.googleapis.com/pyplots-images/plots/box-basic/letsplot/plot.html -quality_score: 98 +quality_score: 90 impl_tags: dependencies: [] techniques: - - html-export + - annotations + - layer-composition + - html-export patterns: - - data-generation - - iteration-over-groups + - data-generation + - groupby-aggregation + - iteration-over-groups dataprep: [] - styling: [] + styling: + - alpha-blending + - grid-styling review: strengths: - - Excellent use of realistic salary data with appropriate distributions per department - - Outliers properly generated and clearly visible as distinct points - - Perfect title format following pyplots.ai convention - - Clean ggplot2-style grammar implementation - - Proper sizing for 4800x2700 output with scale=3 - - Different colors make categories easily distinguishable - - Appropriate text sizes (24pt title, 20pt labels, 16pt ticks) + - 'Excellent use of lets-plot distinctive features: stat-based ordering via as_discrete, + layer_tooltips with stat variables, flavor theme' + - Strong data storytelling with median annotations, overall mean reference line, + and percentage comparison insight + - Wong colorblind-safe palette ensures accessibility + - Clean code structure with realistic salary data scenario + - Boxes ordered by median provides natural narrative flow weaknesses: - - Grid lines could be slightly more visible (currently very subtle with theme_minimal) - image_description: 'The plot displays 5 box plots showing salary distributions across - departments: Engineering (blue), Marketing (yellow), Sales (red), HR (green), - and Finance (purple). Each box clearly shows the median line, IQR (box), and whiskers - extending to 1.5*IQR. Outliers are displayed as large black dots - visible above - Engineering (~125K, ~137K), Marketing (~107K), Sales (~125K, ~131K), HR (~30K, - ~90K), and Finance (~120K, ~122K). The title "box-basic · letsplot · pyplots.ai" - appears at the top in appropriate size. X-axis labeled "Department" and Y-axis - labeled "Salary ($)" with values ranging from ~20,000 to 140,000. Clean minimal - theme with subtle grid lines. Layout is well-balanced with good proportions.' + - 'Minor text proximity issue: Avg label and insight annotation slightly crowd the + Engineering box area' + - Some wasted whitespace on the right side of the canvas — layout could be tighter + - Annotation DataFrames are somewhat verbose (three separate DataFrames for text + layers) + image_description: 'The plot displays 5 box-and-whisker plots showing salary distributions + across departments (HR, Marketing, Sales, Finance, Engineering), ordered left-to-right + by ascending median salary. Each box uses a distinct color from the Wong colorblind-safe + palette: teal (HR), golden yellow (Marketing), orange (Sales), pink (Finance), + blue (Engineering). Outliers appear as large filled circles above and below whiskers. + A dashed gray horizontal reference line marks the overall average salary (~$71,502) + with a label "Avg: $71,502" near the right side. Median dollar values ($54,599 + to $85,333) are annotated in bold above each box. A bold italic insight annotation + "Eng. +56% vs. HR" highlights the salary gap between highest and lowest median + departments. Title reads "box-basic · letsplot · pyplots.ai" with subtitle "Salary + distributions across five departments, ordered by median". Y-axis is "Annual Salary + (USD)" with dollar formatting, X-axis is "Department". Clean white background + with subtle horizontal gridlines.' criteria_checklist: visual_quality: - score: 38 - max: 40 + score: 26 + max: 30 items: - id: VQ-01 name: Text Legibility - score: 10 - max: 10 + score: 8 + max: 8 passed: true - comment: Title ~24pt, axis labels ~20pt, tick labels ~16pt - all perfectly - readable + comment: 'All font sizes explicitly set: title 24pt, axis titles 20pt, axis + text 16pt, annotation text 10-11pt' - id: VQ-02 name: No Overlap - score: 8 - max: 8 + score: 5 + max: 6 passed: true - comment: No overlapping text, category labels well spaced + comment: 'Minor: Avg label and insight annotation slightly crowd Engineering + box area' - id: VQ-03 name: Element Visibility - score: 8 - max: 8 + score: 6 + max: 6 passed: true - comment: Box sizes optimal, outliers clearly visible with size=4 + comment: Boxes well-sized (width=0.72), outliers visible (size=5), good alpha + (0.85) - id: VQ-04 name: Color Accessibility - score: 5 - max: 5 + score: 4 + max: 4 passed: true - comment: Five distinct colors (blue, yellow, red, green, purple) with good - contrast + comment: Wong colorblind-safe palette with 5 distinct hues - id: VQ-05 name: Layout Balance - score: 5 - max: 5 - passed: true - comment: Perfect proportions, no cut-off, good use of space + score: 1 + max: 4 + passed: false + comment: Some wasted whitespace on right side, boxes somewhat narrow relative + to canvas width - id: VQ-06 - name: Axis Labels + name: Axis Labels & Title score: 2 max: 2 passed: true - comment: 'Descriptive with units: "Salary ($)", "Department"' - - id: VQ-07 - name: Grid & Legend - score: 0 - max: 2 - passed: false - comment: Grid is subtle and good, legend correctly hidden (not needed) - but - grid could be slightly more visible + comment: Annual Salary (USD) with units, Department descriptive + design_excellence: + score: 16 + max: 20 + items: + - id: DE-01 + name: Aesthetic Sophistication + score: 6 + max: 8 + passed: true + comment: 'Strong design: Wong palette, intentional hierarchy, flavor theme, + custom grid styling' + - id: DE-02 + name: Visual Refinement + score: 5 + max: 6 + passed: true + comment: Ticks removed, minor grid removed, x-grid removed, subtle y-grid, + clean margins + - id: DE-03 + name: Data Storytelling + score: 5 + max: 6 + passed: true + comment: Median labels, overall mean reference line, insight annotation comparing + highest vs lowest spec_compliance: - score: 25 - max: 25 + score: 15 + max: 15 items: - id: SC-01 name: Plot Type - score: 8 - max: 8 - passed: true - comment: Correct box-and-whisker plot - - id: SC-02 - name: Data Mapping score: 5 max: 5 passed: true - comment: Categories on X, values on Y correctly assigned - - id: SC-03 + comment: Correct box plot with all components + - id: SC-02 name: Required Features - score: 5 - max: 5 + score: 4 + max: 4 passed: true - comment: Median line ✓, outliers as points ✓, whiskers at 1.5*IQR ✓, different - colors - - id: SC-04 - name: Data Range + comment: 'All spec features present: median line, outliers, whiskers at 1.5*IQR, + different colors' + - id: SC-03 + name: Data Mapping score: 3 max: 3 passed: true - comment: All data visible including outliers - - id: SC-05 - name: Legend Accuracy - score: 2 - max: 2 - passed: true - comment: Legend hidden appropriately (colors explained by x-axis) - - id: SC-06 + comment: Categories on X-axis, values on Y-axis correctly assigned + - id: SC-04 name: Title Format - score: 2 - max: 2 + score: 3 + max: 3 passed: true - comment: 'Correct format: "box-basic · letsplot · pyplots.ai"' + comment: Title box-basic · letsplot · pyplots.ai correct, legend appropriately + hidden data_quality: - score: 20 - max: 20 + score: 14 + max: 15 items: - id: DQ-01 name: Feature Coverage - score: 8 - max: 8 + score: 5 + max: 6 passed: true - comment: Shows outliers, different spreads (Sales widest, HR narrowest), different - medians + comment: Different distributions with varying spreads, outliers present, meaningful + median differences - id: DQ-02 name: Realistic Context - score: 7 - max: 7 + score: 5 + max: 5 passed: true - comment: Salary by department is a real, comprehensible scenario + comment: Salary distributions across departments — real, neutral business + scenario - id: DQ-03 name: Appropriate Scale - score: 5 - max: 5 + score: 4 + max: 4 passed: true - comment: Realistic salary values ($20K-$140K range appropriate for US salaries) + comment: Realistic salary values in $20K-$140K range code_quality: - score: 10 + score: 9 max: 10 items: - id: CQ-01 @@ -163,11 +186,11 @@ review: score: 3 max: 3 passed: true - comment: Simple imports → data → plot → save structure + comment: Clean Imports → Data → Plot → Save flow - id: CQ-02 name: Reproducibility - score: 3 - max: 3 + score: 2 + max: 2 passed: true comment: np.random.seed(42) set - id: CQ-03 @@ -175,21 +198,35 @@ review: score: 2 max: 2 passed: true - comment: All imports used + comment: All imports are used - id: CQ-04 - name: No Deprecated API + name: Code Elegance score: 1 - max: 1 + max: 2 passed: true - comment: Current lets-plot API + comment: Slightly verbose with three annotation DataFrames but serves storytelling - id: CQ-05 - name: Output Correct + name: Output & API score: 1 max: 1 passed: true - comment: Saves as plot.png and plot.html - library_features: - score: 5 - max: 5 - items: [] + comment: Saves as plot.png with scale=3, current API + library_mastery: + score: 10 + max: 10 + items: + - id: LM-01 + name: Idiomatic Usage + score: 5 + max: 5 + passed: true + comment: 'Expert grammar of graphics: as_discrete ordering, layer_tooltips, + flavor themes, ggsave' + - id: LM-02 + name: Distinctive Features + score: 5 + max: 5 + passed: true + comment: as_discrete stat-based ordering, layer_tooltips with stat variables, + flavor_high_contrast_light verdict: APPROVED