|
33 | 33 | } |
34 | 34 | ) |
35 | 35 |
|
36 | | -# Compute boxplot statistics for each group |
37 | | -groups = ["A", "B", "C", "D"] |
38 | | -boxplot_data = [] |
39 | | -outliers_data = [] |
| 36 | +# Calculate box plot statistics for each group |
| 37 | +groups = data["group"].unique() |
| 38 | +box_data = [] |
| 39 | +outlier_data = [] |
40 | 40 |
|
41 | 41 | for i, group in enumerate(groups): |
42 | 42 | values = data[data["group"] == group]["value"].values |
43 | 43 | q1 = np.percentile(values, 25) |
44 | 44 | median = np.percentile(values, 50) |
45 | 45 | q3 = np.percentile(values, 75) |
46 | 46 | iqr = q3 - q1 |
47 | | - whisker_low = q1 - 1.5 * iqr |
48 | | - whisker_high = q3 + 1.5 * iqr |
| 47 | + whisker_low = max(values.min(), q1 - 1.5 * iqr) |
| 48 | + whisker_high = min(values.max(), q3 + 1.5 * iqr) |
49 | 49 |
|
50 | | - # Actual whisker ends are the most extreme data within bounds |
51 | | - low = values[values >= whisker_low].min() |
52 | | - high = values[values <= whisker_high].max() |
| 50 | + # Find actual whisker values (closest data points within IQR range) |
| 51 | + lower_whisker = values[values >= q1 - 1.5 * iqr].min() |
| 52 | + upper_whisker = values[values <= q3 + 1.5 * iqr].max() |
53 | 53 |
|
54 | | - boxplot_data.append([low, q1, median, q3, high]) |
| 54 | + box_data.append([lower_whisker, q1, median, q3, upper_whisker]) |
55 | 55 |
|
56 | | - # Outliers |
57 | | - outlier_values = values[(values < whisker_low) | (values > whisker_high)] |
58 | | - for val in outlier_values: |
59 | | - outliers_data.append([i, val]) |
| 56 | + # Collect outliers |
| 57 | + outliers = values[(values < q1 - 1.5 * iqr) | (values > q3 + 1.5 * iqr)] |
| 58 | + for outlier in outliers: |
| 59 | + outlier_data.append([i, outlier]) |
60 | 60 |
|
61 | 61 | # Create chart |
62 | 62 | chart = Chart(container="container") |
63 | 63 | chart.options = HighchartsOptions() |
64 | 64 |
|
65 | | -chart.options.chart = {"type": "boxplot", "width": 4800, "height": 2700, "backgroundColor": "#ffffff"} |
| 65 | +# Chart configuration |
| 66 | +chart.options.chart = { |
| 67 | + "type": "boxplot", |
| 68 | + "width": 4800, |
| 69 | + "height": 2700, |
| 70 | + "backgroundColor": "#ffffff", |
| 71 | + "style": {"fontFamily": "sans-serif"}, |
| 72 | +} |
66 | 73 |
|
67 | | -chart.options.title = {"text": "Basic Box Plot", "style": {"fontSize": "48px"}} |
| 74 | +# Title |
| 75 | +chart.options.title = {"text": "Basic Box Plot", "style": {"fontSize": "60px", "fontWeight": "bold"}} |
68 | 76 |
|
| 77 | +# X-axis |
69 | 78 | chart.options.x_axis = { |
70 | | - "categories": groups, |
71 | | - "title": {"text": "Group", "style": {"fontSize": "40px"}}, |
72 | | - "labels": {"style": {"fontSize": "32px"}}, |
| 79 | + "categories": list(groups), |
| 80 | + "title": {"text": "Group", "style": {"fontSize": "48px"}}, |
| 81 | + "labels": {"style": {"fontSize": "40px"}}, |
73 | 82 | } |
74 | 83 |
|
| 84 | +# Y-axis |
75 | 85 | chart.options.y_axis = { |
76 | | - "title": {"text": "Value", "style": {"fontSize": "40px"}}, |
77 | | - "labels": {"style": {"fontSize": "32px"}}, |
| 86 | + "title": {"text": "Value", "style": {"fontSize": "48px"}}, |
| 87 | + "labels": {"style": {"fontSize": "40px"}}, |
| 88 | + "gridLineColor": "#e0e0e0", |
| 89 | + "gridLineWidth": 1, |
78 | 90 | } |
79 | 91 |
|
80 | | -chart.options.legend = {"enabled": False} |
| 92 | +# Legend |
| 93 | +chart.options.legend = {"enabled": True, "itemStyle": {"fontSize": "40px"}} |
| 94 | + |
| 95 | +# Colors from style guide |
| 96 | +colors = ["#306998", "#FFD43B", "#DC2626", "#059669", "#8B5CF6", "#F97316"] |
81 | 97 |
|
82 | | -# Boxplot series |
83 | | -boxplot_series = BoxPlotSeries() |
84 | | -boxplot_series.name = "Distribution" |
85 | | -boxplot_series.data = boxplot_data |
86 | | -boxplot_series.color = "#306998" |
87 | | -boxplot_series.fillColor = "rgba(48, 105, 152, 0.4)" |
88 | | -boxplot_series.lineWidth = 3 |
89 | | -boxplot_series.medianWidth = 4 |
90 | | -boxplot_series.medianColor = "#DC2626" |
91 | | -boxplot_series.whiskerLength = "60%" |
92 | | -boxplot_series.whiskerWidth = 3 |
| 98 | +# Box plot series |
| 99 | +box_series = BoxPlotSeries() |
| 100 | +box_series.name = "Distribution" |
| 101 | +box_series.data = box_data |
| 102 | +box_series.color = colors[0] |
| 103 | +box_series.fillColor = "#306998" |
| 104 | +box_series.medianColor = "#ffffff" |
| 105 | +box_series.medianWidth = 4 |
| 106 | +box_series.stemWidth = 3 |
| 107 | +box_series.whiskerWidth = 3 |
| 108 | +box_series.whiskerLength = "50%" |
93 | 109 |
|
94 | | -chart.add_series(boxplot_series) |
| 110 | +chart.add_series(box_series) |
95 | 111 |
|
96 | | -# Outliers as scatter series |
97 | | -if outliers_data: |
| 112 | +# Outliers series (if any) |
| 113 | +if outlier_data: |
98 | 114 | from highcharts_core.options.series.scatter import ScatterSeries |
99 | 115 |
|
100 | 116 | outlier_series = ScatterSeries() |
101 | 117 | outlier_series.name = "Outliers" |
102 | | - outlier_series.data = outliers_data |
103 | | - outlier_series.color = "#DC2626" |
104 | | - outlier_series.marker = {"radius": 6, "symbol": "circle"} |
105 | | - |
| 118 | + outlier_series.data = outlier_data |
| 119 | + outlier_series.color = colors[2] |
| 120 | + outlier_series.marker = {"symbol": "circle", "radius": 8, "fillColor": colors[2]} |
106 | 121 | chart.add_series(outlier_series) |
107 | 122 |
|
108 | | -# Download Highcharts JS and highcharts-more.js (needed for boxplot) |
| 123 | +# Plot options |
| 124 | +chart.options.plot_options = {"boxplot": {"colorByPoint": True, "colors": colors[:4]}} |
| 125 | + |
| 126 | +# Download Highcharts JS files (required for headless Chrome) |
109 | 127 | highcharts_url = "https://code.highcharts.com/highcharts.js" |
110 | 128 | with urllib.request.urlopen(highcharts_url, timeout=30) as response: |
111 | 129 | highcharts_js = response.read().decode("utf-8") |
|
139 | 157 | chrome_options.add_argument("--no-sandbox") |
140 | 158 | chrome_options.add_argument("--disable-dev-shm-usage") |
141 | 159 | chrome_options.add_argument("--disable-gpu") |
142 | | -chrome_options.add_argument("--window-size=5000,3000") |
| 160 | +chrome_options.add_argument("--window-size=4800,2700") |
143 | 161 |
|
144 | 162 | driver = webdriver.Chrome(options=chrome_options) |
145 | 163 | driver.get(f"file://{temp_path}") |
146 | 164 | time.sleep(5) |
147 | | - |
148 | | -# Screenshot the container element for exact dimensions |
149 | | -container = driver.find_element("id", "container") |
150 | | -container.screenshot("plot.png") |
| 165 | +driver.save_screenshot("plot.png") |
151 | 166 | driver.quit() |
152 | 167 |
|
153 | 168 | Path(temp_path).unlink() |
0 commit comments