|
| 1 | +""" pyplots.ai |
| 2 | +violin-box: Violin Plot with Embedded Box Plot |
| 3 | +Library: pygal 3.1.0 | Python 3.13.11 |
| 4 | +Quality: 88/100 | Created: 2025-12-30 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pygal |
| 9 | +from pygal.style import Style |
| 10 | + |
| 11 | + |
| 12 | +# Data - Generate distributions for different categories with scores constrained to 0-100 |
| 13 | +np.random.seed(42) |
| 14 | +raw_data = { |
| 15 | + "Engineering": np.random.normal(75, 10, 200), |
| 16 | + "Marketing": np.random.normal(62, 12, 200), |
| 17 | + "Sales": np.random.normal(68, 14, 200), |
| 18 | + "Operations": np.random.normal(55, 8, 200), |
| 19 | +} |
| 20 | +# Clip all values to 0-100 range |
| 21 | +data = {k: np.clip(v, 0, 100) for k, v in raw_data.items()} |
| 22 | + |
| 23 | +# Color palette: 4 violin colors + white for boxes + dark gray for whiskers/median/outliers |
| 24 | +# Pattern per category: violin, box, whisker*4, median, outliers = 8 series |
| 25 | +# 4 categories = 32 series, colors cycle through |
| 26 | +violin_colors = ["#306998", "#FFD43B", "#4CAF50", "#FF5722"] |
| 27 | +box_color = "#FFFFFF" # White fill for box - improves internal contrast |
| 28 | +line_color = "#333333" # Dark gray for whiskers, median, outliers |
| 29 | + |
| 30 | +# Build color sequence: for each violin, we need violin color, then white for box, |
| 31 | +# then dark gray for lines (whisker, cap, median, outliers) |
| 32 | +colors_list = [] |
| 33 | +for vc in violin_colors: |
| 34 | + colors_list.extend([vc, box_color, line_color, line_color, line_color, line_color, line_color, line_color]) |
| 35 | + |
| 36 | +# Custom style for 4800x2700 px canvas |
| 37 | +custom_style = Style( |
| 38 | + background="white", |
| 39 | + plot_background="white", |
| 40 | + foreground="#333333", |
| 41 | + foreground_strong="#333333", |
| 42 | + foreground_subtle="#666666", |
| 43 | + colors=tuple(colors_list), |
| 44 | + title_font_size=72, |
| 45 | + label_font_size=48, |
| 46 | + major_label_font_size=42, |
| 47 | + legend_font_size=42, |
| 48 | + value_font_size=36, |
| 49 | + opacity=0.7, |
| 50 | + opacity_hover=0.9, |
| 51 | +) |
| 52 | + |
| 53 | +# Create XY chart for violin plot with embedded box |
| 54 | +chart = pygal.XY( |
| 55 | + width=4800, |
| 56 | + height=2700, |
| 57 | + style=custom_style, |
| 58 | + title="violin-box · pygal · pyplots.ai", |
| 59 | + x_title="Department", |
| 60 | + y_title="Performance Score (0-100 scale)", |
| 61 | + show_legend=True, |
| 62 | + legend_at_bottom=True, |
| 63 | + legend_at_bottom_columns=4, |
| 64 | + stroke=True, |
| 65 | + fill=True, |
| 66 | + dots_size=0, |
| 67 | + show_x_guides=False, |
| 68 | + show_y_guides=True, |
| 69 | + range=(0, 110), |
| 70 | + xrange=(0, 6), |
| 71 | + margin=50, |
| 72 | +) |
| 73 | + |
| 74 | +# Parameters for violin shapes |
| 75 | +violin_width = 0.35 |
| 76 | +n_points = 100 |
| 77 | + |
| 78 | +# Box plot styling for stronger contrast |
| 79 | +box_stroke_style = {"width": 4, "dasharray": ""} |
| 80 | +median_stroke_style = {"width": 6, "dasharray": ""} |
| 81 | +whisker_stroke_style = {"width": 3, "dasharray": ""} |
| 82 | + |
| 83 | +# Track if legend entries have been added (only add once for first occurrence) |
| 84 | +added_box_legend = False |
| 85 | +added_median_legend = False |
| 86 | +added_outlier_legend = False |
| 87 | + |
| 88 | +# Add violins with embedded box plots for each category |
| 89 | +for i, (category, values) in enumerate(data.items()): |
| 90 | + center_x = i + 1.5 |
| 91 | + |
| 92 | + # Compute KDE using Silverman's rule |
| 93 | + n = len(values) |
| 94 | + std = np.std(values) |
| 95 | + iqr = np.percentile(values, 75) - np.percentile(values, 25) |
| 96 | + bandwidth = 0.9 * min(std, iqr / 1.34) * n ** (-0.2) |
| 97 | + |
| 98 | + # Create range of y values for density |
| 99 | + y_min, y_max = values.min(), values.max() |
| 100 | + y_range = np.linspace(max(0, y_min - 5), min(100, y_max + 5), n_points) |
| 101 | + |
| 102 | + # Gaussian kernel density estimation |
| 103 | + density = np.zeros_like(y_range) |
| 104 | + for v in values: |
| 105 | + density += np.exp(-0.5 * ((y_range - v) / bandwidth) ** 2) |
| 106 | + density /= n * bandwidth * np.sqrt(2 * np.pi) |
| 107 | + |
| 108 | + # Normalize density to desired width |
| 109 | + density = density / density.max() * violin_width |
| 110 | + |
| 111 | + # Create violin shape (mirrored density) |
| 112 | + left_points = [(center_x - d, y) for y, d in zip(y_range, density, strict=True)] |
| 113 | + right_points = [(center_x + d, y) for y, d in zip(y_range[::-1], density[::-1], strict=True)] |
| 114 | + violin_points = left_points + right_points + [left_points[0]] |
| 115 | + |
| 116 | + chart.add(category, violin_points) |
| 117 | + |
| 118 | + # Calculate box plot statistics |
| 119 | + median = float(np.median(values)) |
| 120 | + q1 = float(np.percentile(values, 25)) |
| 121 | + q3 = float(np.percentile(values, 75)) |
| 122 | + iqr_val = q3 - q1 |
| 123 | + |
| 124 | + # Whiskers: 1.5 * IQR or data min/max |
| 125 | + lower_whisker = max(values.min(), q1 - 1.5 * iqr_val) |
| 126 | + upper_whisker = min(values.max(), q3 + 1.5 * iqr_val) |
| 127 | + |
| 128 | + # Identify outliers |
| 129 | + outliers = values[(values < lower_whisker) | (values > upper_whisker)] |
| 130 | + |
| 131 | + box_width = 0.10 |
| 132 | + |
| 133 | + # Quartile box (IQR) - white filled for contrast |
| 134 | + quartile_box = [ |
| 135 | + (center_x - box_width, q1), |
| 136 | + (center_x - box_width, q3), |
| 137 | + (center_x + box_width, q3), |
| 138 | + (center_x + box_width, q1), |
| 139 | + (center_x - box_width, q1), |
| 140 | + ] |
| 141 | + # Only add legend entry for box once |
| 142 | + box_label = "Box (Q1-Q3)" if not added_box_legend else None |
| 143 | + chart.add(box_label, quartile_box, stroke=True, fill=True, show_dots=False, stroke_style=box_stroke_style) |
| 144 | + added_box_legend = True |
| 145 | + |
| 146 | + # Whisker lines (vertical lines from box to whisker ends) - no legend |
| 147 | + lower_whisker_line = [(center_x, q1), (center_x, lower_whisker)] |
| 148 | + upper_whisker_line = [(center_x, q3), (center_x, upper_whisker)] |
| 149 | + chart.add(None, lower_whisker_line, stroke=True, fill=False, show_dots=False, stroke_style=whisker_stroke_style) |
| 150 | + chart.add(None, upper_whisker_line, stroke=True, fill=False, show_dots=False, stroke_style=whisker_stroke_style) |
| 151 | + |
| 152 | + # Whisker caps (horizontal lines at ends) - no legend |
| 153 | + cap_width = box_width * 0.8 |
| 154 | + lower_cap = [(center_x - cap_width, lower_whisker), (center_x + cap_width, lower_whisker)] |
| 155 | + upper_cap = [(center_x - cap_width, upper_whisker), (center_x + cap_width, upper_whisker)] |
| 156 | + chart.add(None, lower_cap, stroke=True, fill=False, show_dots=False, stroke_style=whisker_stroke_style) |
| 157 | + chart.add(None, upper_cap, stroke=True, fill=False, show_dots=False, stroke_style=whisker_stroke_style) |
| 158 | + |
| 159 | + # Median line (thicker, contrasting) |
| 160 | + median_line = [(center_x - box_width * 1.2, median), (center_x + box_width * 1.2, median)] |
| 161 | + median_label = "Median" if not added_median_legend else None |
| 162 | + chart.add(median_label, median_line, stroke=True, fill=False, show_dots=False, stroke_style=median_stroke_style) |
| 163 | + added_median_legend = True |
| 164 | + |
| 165 | + # Outliers as points - larger dots for better visibility |
| 166 | + if len(outliers) > 0: |
| 167 | + outlier_points = [(center_x, float(o)) for o in outliers] |
| 168 | + outlier_label = "Outliers" if not added_outlier_legend else None |
| 169 | + chart.add(outlier_label, outlier_points, stroke=False, fill=False, show_dots=True, dots_size=18) |
| 170 | + added_outlier_legend = True |
| 171 | + |
| 172 | +# X-axis labels at violin positions |
| 173 | +chart.x_labels = ["", "Engineering", "Marketing", "Sales", "Operations", ""] |
| 174 | +chart.x_labels_major_count = 4 |
| 175 | + |
| 176 | +# Save outputs |
| 177 | +chart.render_to_file("plot.html") |
| 178 | +chart.render_to_png("plot.png") |
0 commit comments