|
1 | 1 | """ pyplots.ai |
2 | 2 | box-basic: Basic Box Plot |
3 | | -Library: highcharts unknown | Python 3.13.11 |
4 | | -Quality: 91/100 | Created: 2025-12-23 |
| 3 | +Library: highcharts 1.10.3 | Python 3.14 |
| 4 | +Quality: 92/100 | Created: 2025-12-23 |
5 | 5 | """ |
6 | 6 |
|
| 7 | +import json |
7 | 8 | import tempfile |
8 | 9 | import time |
9 | 10 | import urllib.request |
|
13 | 14 | from highcharts_core.chart import Chart |
14 | 15 | from highcharts_core.options import HighchartsOptions |
15 | 16 | from highcharts_core.options.series.boxplot import BoxPlotSeries |
| 17 | +from highcharts_core.options.series.data.boxplot import BoxPlotData |
16 | 18 | from highcharts_core.options.series.scatter import ScatterSeries |
17 | 19 | from selenium import webdriver |
18 | 20 | from selenium.webdriver.chrome.options import Options |
19 | 21 |
|
20 | 22 |
|
21 | | -# Data - generate sample data for 5 categories with different distributions |
| 23 | +# Data - employee performance scores across 5 departments |
22 | 24 | np.random.seed(42) |
23 | | -categories = ["Group A", "Group B", "Group C", "Group D", "Group E"] |
| 25 | +departments = ["Engineering", "Marketing", "Sales", "Design", "Finance"] |
24 | 26 | colors = ["#306998", "#FFD43B", "#9467BD", "#17BECF", "#8C564B"] |
| 27 | +colors_fill = [ |
| 28 | + "rgba(48, 105, 152, 0.72)", |
| 29 | + "rgba(255, 212, 59, 0.72)", |
| 30 | + "rgba(148, 103, 189, 0.72)", |
| 31 | + "rgba(23, 190, 207, 0.72)", |
| 32 | + "rgba(140, 86, 75, 0.72)", |
| 33 | +] |
25 | 34 |
|
26 | | -# Generate raw data (100 points each with different means and spreads) |
27 | | -raw_data = [ |
28 | | - np.random.normal(50, 10, 100), # Group A: moderate mean, moderate spread |
29 | | - np.random.normal(65, 15, 100), # Group B: higher mean, larger spread |
30 | | - np.random.normal(45, 8, 100), # Group C: lower mean, tighter spread |
31 | | - np.random.normal(70, 12, 100), # Group D: highest mean |
32 | | - np.random.normal(55, 20, 100), # Group E: moderate mean, widest spread |
| 35 | +scores = [ |
| 36 | + np.random.normal(78, 8, 80), # Engineering: high, tight |
| 37 | + np.random.normal(72, 14, 60), # Marketing: moderate, wide spread |
| 38 | + np.random.normal(68, 9, 90), # Sales: lower mean, moderate |
| 39 | + np.random.normal(82, 7, 50), # Design: highest, tight |
| 40 | + np.random.normal(75, 18, 70), # Finance: moderate, widest spread |
33 | 41 | ] |
34 | 42 |
|
35 | | -# Calculate box plot statistics (inline, no functions) |
36 | | -box_data = [] |
| 43 | +# Calculate box plot statistics |
| 44 | +box_stats = [] |
37 | 45 | outlier_data = [] |
38 | 46 |
|
39 | | -for i, data in enumerate(raw_data): |
| 47 | +for i, data in enumerate(scores): |
| 48 | + data = np.clip(data, 0, 100) |
40 | 49 | q1 = float(np.percentile(data, 25)) |
41 | 50 | median = float(np.percentile(data, 50)) |
42 | 51 | q3 = float(np.percentile(data, 75)) |
43 | 52 | iqr = q3 - q1 |
44 | | - whisker_low = max(float(data.min()), q1 - 1.5 * iqr) |
45 | | - whisker_high = min(float(data.max()), q3 + 1.5 * iqr) |
| 53 | + whisker_low = float(max(data[data >= q1 - 1.5 * iqr].min(), data.min())) |
| 54 | + whisker_high = float(min(data[data <= q3 + 1.5 * iqr].max(), data.max())) |
46 | 55 |
|
47 | | - # Box data: [low, q1, median, q3, high] |
48 | | - box_data.append( |
49 | | - {"low": whisker_low, "q1": q1, "median": median, "q3": q3, "high": whisker_high, "color": colors[i]} |
| 56 | + box_stats.append( |
| 57 | + { |
| 58 | + "low": round(whisker_low, 1), |
| 59 | + "q1": round(q1, 1), |
| 60 | + "median": round(median, 1), |
| 61 | + "q3": round(q3, 1), |
| 62 | + "high": round(whisker_high, 1), |
| 63 | + } |
50 | 64 | ) |
51 | 65 |
|
52 | | - # Find and add outliers |
53 | | - outliers = data[(data < whisker_low) | (data > whisker_high)] |
54 | | - for outlier in outliers: |
55 | | - outlier_data.append([i, float(outlier)]) |
| 66 | + outliers = data[(data < q1 - 1.5 * iqr) | (data > q3 + 1.5 * iqr)] |
| 67 | + for val in outliers: |
| 68 | + outlier_data.append([i, round(float(val), 1)]) |
| 69 | + |
| 70 | +# Identify key insights for annotations |
| 71 | +medians = [s["median"] for s in box_stats] |
| 72 | +spreads = [s["q3"] - s["q1"] for s in box_stats] |
| 73 | +best_dept_idx = int(np.argmax(medians)) |
| 74 | +widest_dept_idx = int(np.argmax(spreads)) |
56 | 75 |
|
57 | | -# Create chart |
| 76 | +# Build BoxPlotData objects via highcharts-core API |
| 77 | +box_data = [ |
| 78 | + BoxPlotData( |
| 79 | + low=box_stats[i]["low"], |
| 80 | + q1=box_stats[i]["q1"], |
| 81 | + median=box_stats[i]["median"], |
| 82 | + q3=box_stats[i]["q3"], |
| 83 | + high=box_stats[i]["high"], |
| 84 | + color=colors[i], |
| 85 | + ) |
| 86 | + for i in range(len(departments)) |
| 87 | +] |
| 88 | + |
| 89 | +# Per-point data with fillColor (not exposed by BoxPlotData API, injected post-generation) |
| 90 | +box_data_api_js = "[" + ",\n".join(d.to_js_literal() for d in box_data) + "]" |
| 91 | +box_data_with_fill = json.dumps( |
| 92 | + [ |
| 93 | + { |
| 94 | + "low": box_stats[i]["low"], |
| 95 | + "q1": box_stats[i]["q1"], |
| 96 | + "median": box_stats[i]["median"], |
| 97 | + "q3": box_stats[i]["q3"], |
| 98 | + "high": box_stats[i]["high"], |
| 99 | + "color": colors[i], |
| 100 | + "fillColor": colors_fill[i], |
| 101 | + } |
| 102 | + for i in range(len(departments)) |
| 103 | + ] |
| 104 | +) |
| 105 | + |
| 106 | +# Build chart using highcharts-core Python API |
58 | 107 | chart = Chart(container="container") |
59 | 108 | chart.options = HighchartsOptions() |
60 | 109 |
|
61 | | -# Chart configuration |
62 | 110 | chart.options.chart = { |
63 | 111 | "type": "boxplot", |
64 | 112 | "width": 4800, |
65 | 113 | "height": 2700, |
66 | | - "backgroundColor": "#ffffff", |
67 | | - "marginBottom": 280, |
68 | | - "spacingBottom": 80, |
| 114 | + "backgroundColor": "#fafafa", |
| 115 | + "marginBottom": 220, |
| 116 | + "marginLeft": 240, |
| 117 | + "marginRight": 120, |
| 118 | + "spacingTop": 40, |
| 119 | + "style": {"fontFamily": "'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif"}, |
| 120 | + "animation": False, |
69 | 121 | } |
70 | 122 |
|
71 | | -# Title |
72 | 123 | chart.options.title = { |
73 | | - "text": "box-basic · highcharts · pyplots.ai", |
74 | | - "style": {"fontSize": "72px", "fontWeight": "bold"}, |
| 124 | + "text": "box-basic \u00b7 highcharts \u00b7 pyplots.ai", |
| 125 | + "style": {"fontSize": "64px", "fontWeight": "700", "color": "#1a1a2e", "letterSpacing": "0.5px"}, |
| 126 | + "margin": 50, |
| 127 | +} |
| 128 | + |
| 129 | +chart.options.subtitle = { |
| 130 | + "text": "Annual Performance Review Scores by Department", |
| 131 | + "style": {"fontSize": "42px", "color": "#636e72", "fontWeight": "300"}, |
75 | 132 | } |
76 | 133 |
|
77 | | -# X-axis |
78 | 134 | chart.options.x_axis = { |
79 | | - "categories": categories, |
80 | | - "title": {"text": "Category", "style": {"fontSize": "48px"}}, |
81 | | - "labels": {"style": {"fontSize": "40px"}}, |
| 135 | + "categories": departments, |
| 136 | + "title": { |
| 137 | + "text": "Department", |
| 138 | + "style": {"fontSize": "44px", "color": "#2d3436", "fontWeight": "600"}, |
| 139 | + "margin": 24, |
| 140 | + }, |
| 141 | + "labels": {"style": {"fontSize": "38px", "color": "#2d3436", "fontWeight": "500"}}, |
| 142 | + "lineWidth": 0, |
| 143 | + "tickWidth": 0, |
| 144 | + "gridLineWidth": 0, |
82 | 145 | } |
83 | 146 |
|
84 | | -# Y-axis |
85 | 147 | chart.options.y_axis = { |
86 | | - "title": {"text": "Value", "style": {"fontSize": "48px"}}, |
87 | | - "labels": {"style": {"fontSize": "36px"}}, |
| 148 | + "title": { |
| 149 | + "text": "Score (out of 100)", |
| 150 | + "style": {"fontSize": "44px", "color": "#2d3436", "fontWeight": "600"}, |
| 151 | + "margin": 20, |
| 152 | + }, |
| 153 | + "labels": {"style": {"fontSize": "34px", "color": "#636e72"}}, |
88 | 154 | "gridLineWidth": 1, |
89 | | - "gridLineColor": "rgba(0, 0, 0, 0.1)", |
| 155 | + "gridLineColor": "rgba(0, 0, 0, 0.06)", |
| 156 | + "gridLineDashStyle": "Dot", |
| 157 | + "tickInterval": 5, |
| 158 | + "lineWidth": 0, |
90 | 159 | } |
91 | 160 |
|
92 | | -# Legend |
93 | | -chart.options.legend = {"enabled": True, "itemStyle": {"fontSize": "36px"}} |
| 161 | +chart.options.legend = {"enabled": False} |
| 162 | +chart.options.credits = {"enabled": False} |
| 163 | +chart.options.tooltip = {"enabled": False} |
94 | 164 |
|
95 | | -# Plot options for box styling |
96 | 165 | chart.options.plot_options = { |
97 | 166 | "boxplot": { |
98 | | - "lineWidth": 4, |
| 167 | + "pointWidth": 480, |
| 168 | + "lineWidth": 3, |
99 | 169 | "medianWidth": 6, |
100 | | - "medianColor": "#1a1a1a", |
| 170 | + "medianColor": "#1a1a2e", |
| 171 | + "stemColor": "#555555", |
101 | 172 | "stemWidth": 3, |
| 173 | + "stemDashStyle": "Solid", |
102 | 174 | "whiskerWidth": 4, |
103 | 175 | "whiskerLength": "50%", |
104 | | - "colorByPoint": True, |
105 | | - } |
| 176 | + "whiskerColor": "#555555", |
| 177 | + }, |
| 178 | + "series": {"animation": False}, |
106 | 179 | } |
107 | 180 |
|
108 | | -# Box plot series with individual colors per box |
109 | | -box_series = BoxPlotSeries() |
110 | | -box_series.name = "Distribution" |
111 | | -box_series.data = box_data |
112 | | -box_series.colors = colors |
113 | | - |
114 | | -chart.add_series(box_series) |
| 181 | +# Create BoxPlotSeries and ScatterSeries via Python API |
| 182 | +box_series = BoxPlotSeries(name="Department Scores", data=box_data) |
115 | 183 |
|
116 | | -# Outliers as scatter series |
117 | | -if outlier_data: |
118 | | - outlier_series = ScatterSeries() |
119 | | - outlier_series.name = "Outliers" |
120 | | - outlier_series.data = outlier_data |
121 | | - outlier_series.marker = { |
122 | | - "fillColor": "#E74C3C", |
| 184 | +outlier_series = ScatterSeries( |
| 185 | + name="Outliers", |
| 186 | + data=outlier_data, |
| 187 | + marker={ |
| 188 | + "fillColor": "rgba(231, 76, 60, 0.75)", |
123 | 189 | "lineWidth": 2, |
124 | | - "lineColor": "#C0392B", |
125 | | - "radius": 12, |
| 190 | + "lineColor": "#c0392b", |
| 191 | + "radius": 14, |
126 | 192 | "symbol": "circle", |
127 | | - } |
128 | | - chart.add_series(outlier_series) |
| 193 | + }, |
| 194 | + z_index=10, |
| 195 | + show_in_legend=False, |
| 196 | +) |
| 197 | + |
| 198 | +chart.add_series(box_series) |
| 199 | +chart.add_series(outlier_series) |
| 200 | + |
| 201 | +# Generate JS config from the Python API |
| 202 | +chart_js = chart.to_js_literal(event_listener_enabled=False) |
| 203 | + |
| 204 | +# Inject fillColor into box data (BoxPlotData doesn't expose fillColor property) |
| 205 | +chart_js = chart_js.replace(box_data_api_js, box_data_with_fill) |
129 | 206 |
|
130 | 207 | # Download Highcharts JS files (required for headless Chrome) |
131 | 208 | highcharts_url = "https://code.highcharts.com/highcharts.js" |
132 | 209 | with urllib.request.urlopen(highcharts_url, timeout=30) as response: |
133 | 210 | highcharts_js = response.read().decode("utf-8") |
134 | 211 |
|
135 | | -# BoxPlot requires highcharts-more.js |
136 | 212 | highcharts_more_url = "https://code.highcharts.com/highcharts-more.js" |
137 | 213 | with urllib.request.urlopen(highcharts_more_url, timeout=30) as response: |
138 | 214 | highcharts_more_js = response.read().decode("utf-8") |
139 | 215 |
|
| 216 | +# Annotation JS for data storytelling — uses Highcharts renderer API (no Python equivalent) |
| 217 | +best_dept = departments[best_dept_idx] |
| 218 | +best_median = medians[best_dept_idx] |
| 219 | +widest_dept = departments[widest_dept_idx] |
| 220 | +widest_iqr = spreads[widest_dept_idx] |
| 221 | +n_outliers = len(outlier_data) |
| 222 | +outlier_s = "s" if n_outliers != 1 else "" |
| 223 | + |
| 224 | +annotation_js = f""" |
| 225 | +setTimeout(function() {{ |
| 226 | + var chart = Highcharts.charts[0]; |
| 227 | + if (!chart) return; |
| 228 | +
|
| 229 | + // Top Performer annotation — positioned at top-left of plot area |
| 230 | + chart.renderer.label( |
| 231 | + '<span style="font-size:30px;color:#1a6b3c;font-weight:700;">\\u25B2 Top Performer</span>' + |
| 232 | + '<br><span style="font-size:26px;color:#555;">{best_dept} \\u2014 Median: {best_median:.0f}</span>' + |
| 233 | + '<br><span style="font-size:24px;color:#777;">Highest scores, consistent results</span>', |
| 234 | + chart.plotLeft + 20, |
| 235 | + chart.plotTop + 15 |
| 236 | + ) |
| 237 | + .attr({{ |
| 238 | + fill: 'rgba(255,255,255,0.95)', |
| 239 | + stroke: '#27ae60', |
| 240 | + 'stroke-width': 2.5, |
| 241 | + r: 12, |
| 242 | + padding: 18, |
| 243 | + zIndex: 20 |
| 244 | + }}) |
| 245 | + .css({{ lineHeight: '38px' }}) |
| 246 | + .add(); |
| 247 | +
|
| 248 | + // Widest Spread annotation — positioned at top-right of plot area |
| 249 | + chart.renderer.label( |
| 250 | + '<span style="font-size:30px;color:#b45309;font-weight:700;">\\u25CF Widest Spread</span>' + |
| 251 | + '<br><span style="font-size:26px;color:#555;">{widest_dept} \\u2014 IQR: {widest_iqr:.0f} pts</span>' + |
| 252 | + '<br><span style="font-size:24px;color:#777;">Highly variable performance</span>', |
| 253 | + chart.plotLeft + chart.plotWidth - 620, |
| 254 | + chart.plotTop + 15 |
| 255 | + ) |
| 256 | + .attr({{ |
| 257 | + fill: 'rgba(255,255,255,0.95)', |
| 258 | + stroke: '#e67e22', |
| 259 | + 'stroke-width': 2.5, |
| 260 | + r: 12, |
| 261 | + padding: 18, |
| 262 | + zIndex: 20 |
| 263 | + }}) |
| 264 | + .css({{ lineHeight: '38px' }}) |
| 265 | + .add(); |
| 266 | +
|
| 267 | + // Outlier count annotation — bottom-left of plot area |
| 268 | + chart.renderer.label( |
| 269 | + '<span style="font-size:28px;color:#c0392b;font-weight:600;">\\u25CF {n_outliers} outlier{outlier_s} detected</span>' + |
| 270 | + '<br><span style="font-size:24px;color:#777;">Scores beyond 1.5\\u00d7IQR from quartiles</span>', |
| 271 | + chart.plotLeft + 20, |
| 272 | + chart.plotTop + chart.plotHeight - 120 |
| 273 | + ) |
| 274 | + .attr({{ |
| 275 | + fill: 'rgba(255,255,255,0.95)', |
| 276 | + stroke: '#e74c3c', |
| 277 | + 'stroke-width': 2, |
| 278 | + r: 12, |
| 279 | + padding: 16, |
| 280 | + zIndex: 20 |
| 281 | + }}) |
| 282 | + .css({{ lineHeight: '36px' }}) |
| 283 | + .add(); |
| 284 | +}}, 500); |
| 285 | +""" |
| 286 | + |
140 | 287 | # Generate HTML with inline scripts |
141 | | -html_str = chart.to_js_literal() |
142 | 288 | html_content = f"""<!DOCTYPE html> |
143 | 289 | <html> |
144 | 290 | <head> |
|
148 | 294 | </head> |
149 | 295 | <body style="margin:0;"> |
150 | 296 | <div id="container" style="width: 4800px; height: 2700px;"></div> |
151 | | - <script>{html_str}</script> |
| 297 | + <script>{chart_js}</script> |
| 298 | + <script>{annotation_js}</script> |
152 | 299 | </body> |
153 | 300 | </html>""" |
154 | 301 |
|
|
157 | 304 | f.write(html_content) |
158 | 305 | temp_path = f.name |
159 | 306 |
|
160 | | -# Save HTML file for interactive viewing |
161 | | -with open("plot.html", "w", encoding="utf-8") as f: |
162 | | - f.write(html_content) |
163 | | - |
164 | 307 | # Take screenshot with Selenium |
165 | 308 | chrome_options = Options() |
166 | 309 | chrome_options.add_argument("--headless") |
167 | 310 | chrome_options.add_argument("--no-sandbox") |
168 | 311 | chrome_options.add_argument("--disable-dev-shm-usage") |
169 | 312 | chrome_options.add_argument("--disable-gpu") |
170 | | -chrome_options.add_argument("--window-size=4800,2700") |
| 313 | +chrome_options.add_argument("--window-size=5000,3000") |
171 | 314 |
|
172 | 315 | driver = webdriver.Chrome(options=chrome_options) |
173 | 316 | driver.get(f"file://{temp_path}") |
174 | | -time.sleep(5) # Wait for chart to render |
175 | | -driver.save_screenshot("plot.png") |
| 317 | +time.sleep(6) |
| 318 | + |
| 319 | +container = driver.find_element("id", "container") |
| 320 | +container.screenshot("plot.png") |
176 | 321 | driver.quit() |
177 | 322 |
|
178 | | -# Clean up temp file |
| 323 | +# Clean up |
179 | 324 | Path(temp_path).unlink() |
0 commit comments