Skip to content

Commit c0f38e7

Browse files
feat(highcharts): implement shap-summary (#2991)
## Implementation: `shap-summary` - highcharts Implements the **highcharts** version of `shap-summary`. **File:** `plots/shap-summary/implementations/highcharts.py` **Parent Issue:** #2923 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20612964334)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent ef74f6a commit c0f38e7

2 files changed

Lines changed: 255 additions & 0 deletions

File tree

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
""" pyplots.ai
2+
shap-summary: SHAP Summary Plot
3+
Library: highcharts unknown | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-31
5+
"""
6+
7+
import tempfile
8+
import time
9+
import urllib.request
10+
from pathlib import Path
11+
12+
import numpy as np
13+
from highcharts_core.chart import Chart
14+
from highcharts_core.options import HighchartsOptions
15+
from highcharts_core.options.series.scatter import ScatterSeries
16+
from selenium import webdriver
17+
from selenium.webdriver.chrome.options import Options
18+
19+
20+
# Data - Simulated SHAP values for a regression model
21+
np.random.seed(42)
22+
n_samples = 200
23+
n_features = 12
24+
25+
feature_names = [
26+
"House Size (sqft)",
27+
"Bedrooms",
28+
"Location Score",
29+
"Age (years)",
30+
"Bathrooms",
31+
"Garage Spaces",
32+
"Lot Size (acres)",
33+
"School Rating",
34+
"Crime Rate",
35+
"Distance to City (mi)",
36+
"Year Built",
37+
"HOA Fee ($)",
38+
]
39+
40+
# Generate feature values (normalized 0-1 for coloring)
41+
feature_values = np.random.rand(n_samples, n_features)
42+
43+
# Generate SHAP values with varying importance per feature
44+
# Features at top have higher magnitude SHAP values
45+
importance_weights = np.linspace(1.5, 0.2, n_features)
46+
shap_values = np.zeros((n_samples, n_features))
47+
48+
for i in range(n_features):
49+
# Create correlation between feature value and SHAP value
50+
base_effect = (feature_values[:, i] - 0.5) * importance_weights[i] * 2
51+
noise = np.random.randn(n_samples) * importance_weights[i] * 0.3
52+
shap_values[:, i] = base_effect + noise
53+
54+
# Sort features by mean absolute SHAP value (most important first)
55+
mean_abs_shap = np.mean(np.abs(shap_values), axis=1)
56+
feature_importance = np.mean(np.abs(shap_values), axis=0)
57+
sorted_indices = np.argsort(feature_importance)[::-1]
58+
59+
# Take top 10 features
60+
top_n = 10
61+
sorted_indices = sorted_indices[:top_n]
62+
63+
# Prepare series data - one series per color bucket for gradient effect
64+
# Use blue (low) to red (high) color gradient
65+
n_color_bins = 10
66+
color_gradient = [
67+
"#3B4CC0", # Blue (low)
68+
"#5A7DC7",
69+
"#7AAAD0",
70+
"#A0C4DE",
71+
"#C5D5E8",
72+
"#E8C5C5",
73+
"#DEA0A0",
74+
"#D07A7A",
75+
"#C75A5A",
76+
"#C03B3B", # Red (high)
77+
]
78+
79+
all_series = []
80+
81+
for bin_idx in range(n_color_bins):
82+
bin_low = bin_idx / n_color_bins
83+
bin_high = (bin_idx + 1) / n_color_bins
84+
85+
series_data = []
86+
87+
for feat_idx, sorted_feat_idx in enumerate(sorted_indices):
88+
y_pos = top_n - 1 - feat_idx # Invert so most important is at top
89+
90+
for sample_idx in range(n_samples):
91+
feat_val = feature_values[sample_idx, sorted_feat_idx]
92+
93+
if bin_low <= feat_val < bin_high or (bin_idx == n_color_bins - 1 and feat_val == 1.0):
94+
shap_val = shap_values[sample_idx, sorted_feat_idx]
95+
# Add jitter to reduce overlap
96+
jitter = np.random.uniform(-0.3, 0.3)
97+
series_data.append({"x": round(shap_val, 4), "y": y_pos + jitter})
98+
99+
if series_data:
100+
series = ScatterSeries()
101+
series.data = series_data
102+
series.name = f"Feature Value: {bin_low:.1f}-{bin_high:.1f}"
103+
series.color = color_gradient[bin_idx]
104+
series.marker = {"radius": 8, "symbol": "circle"}
105+
series.show_in_legend = bin_idx in [0, 4, 9] # Show only low, mid, high
106+
all_series.append(series)
107+
108+
# Create chart
109+
chart = Chart(container="container")
110+
chart.options = HighchartsOptions()
111+
112+
# Chart configuration
113+
chart.options.chart = {
114+
"type": "scatter",
115+
"width": 4800,
116+
"height": 2700,
117+
"backgroundColor": "#ffffff",
118+
"marginLeft": 350,
119+
"marginBottom": 150,
120+
}
121+
122+
# Title
123+
chart.options.title = {
124+
"text": "shap-summary \u00b7 highcharts \u00b7 pyplots.ai",
125+
"style": {"fontSize": "48px", "fontWeight": "bold"},
126+
}
127+
128+
# Subtitle
129+
chart.options.subtitle = {"text": "Feature Importance and Impact on Model Predictions", "style": {"fontSize": "28px"}}
130+
131+
# X-axis (SHAP value)
132+
chart.options.x_axis = {
133+
"title": {"text": "SHAP Value (Impact on Prediction)", "style": {"fontSize": "36px"}, "margin": 20},
134+
"labels": {"style": {"fontSize": "28px"}},
135+
"gridLineWidth": 1,
136+
"gridLineColor": "#e0e0e0",
137+
"plotLines": [{"value": 0, "color": "#333333", "width": 4, "zIndex": 5}],
138+
}
139+
140+
# Y-axis (features)
141+
y_categories = [feature_names[i] for i in sorted_indices][::-1] # Reverse for top-to-bottom
142+
chart.options.y_axis = {
143+
"title": {"text": "", "style": {"fontSize": "28px"}},
144+
"categories": y_categories,
145+
"labels": {"style": {"fontSize": "30px"}},
146+
"gridLineWidth": 0,
147+
"reversed": False,
148+
}
149+
150+
# Legend configuration for color scale
151+
chart.options.legend = {
152+
"enabled": True,
153+
"align": "right",
154+
"verticalAlign": "middle",
155+
"layout": "vertical",
156+
"title": {"text": "Feature Value", "style": {"fontSize": "32px", "fontWeight": "bold"}},
157+
"itemStyle": {"fontSize": "26px"},
158+
"symbolRadius": 8,
159+
"symbolHeight": 20,
160+
"symbolWidth": 20,
161+
"itemMarginBottom": 10,
162+
}
163+
164+
# Plot options
165+
chart.options.plot_options = {
166+
"scatter": {
167+
"marker": {"radius": 8, "states": {"hover": {"enabled": True, "lineColor": "#333333"}}},
168+
"jitter": {"x": 0, "y": 0},
169+
},
170+
"series": {"animation": False},
171+
}
172+
173+
# Tooltip
174+
chart.options.tooltip = {
175+
"headerFormat": "<b>{series.name}</b><br>",
176+
"pointFormat": "SHAP Value: {point.x:.3f}",
177+
"style": {"fontSize": "18px"},
178+
}
179+
180+
# Add all series
181+
for s in all_series:
182+
chart.add_series(s)
183+
184+
# Download Highcharts JS
185+
highcharts_url = "https://code.highcharts.com/highcharts.js"
186+
with urllib.request.urlopen(highcharts_url, timeout=30) as response:
187+
highcharts_js = response.read().decode("utf-8")
188+
189+
# Generate HTML with inline scripts
190+
html_str = chart.to_js_literal()
191+
html_content = f"""<!DOCTYPE html>
192+
<html>
193+
<head>
194+
<meta charset="utf-8">
195+
<script>{highcharts_js}</script>
196+
</head>
197+
<body style="margin:0;">
198+
<div id="container" style="width: 4800px; height: 2700px;"></div>
199+
<script>{html_str}</script>
200+
</body>
201+
</html>"""
202+
203+
# Write temp HTML and take screenshot
204+
with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False, encoding="utf-8") as f:
205+
f.write(html_content)
206+
temp_path = f.name
207+
208+
chrome_options = Options()
209+
chrome_options.add_argument("--headless")
210+
chrome_options.add_argument("--no-sandbox")
211+
chrome_options.add_argument("--disable-dev-shm-usage")
212+
chrome_options.add_argument("--disable-gpu")
213+
chrome_options.add_argument("--window-size=4800,2700")
214+
215+
driver = webdriver.Chrome(options=chrome_options)
216+
driver.get(f"file://{temp_path}")
217+
time.sleep(5)
218+
driver.save_screenshot("plot.png")
219+
driver.quit()
220+
221+
Path(temp_path).unlink()
222+
223+
# Also save HTML for interactive version
224+
with open("plot.html", "w", encoding="utf-8") as f:
225+
f.write(html_content)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
library: highcharts
2+
specification_id: shap-summary
3+
created: '2025-12-31T06:10:37Z'
4+
updated: '2025-12-31T09:04:07Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20612964334
7+
issue: 2923
8+
python_version: 3.13.11
9+
library_version: unknown
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/shap-summary/highcharts/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/shap-summary/highcharts/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/shap-summary/highcharts/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent implementation of SHAP summary plot using scatter series with color-coded
17+
bins
18+
- Well-chosen realistic context (house price prediction) with meaningful feature
19+
names
20+
- Proper sorting of features by mean absolute SHAP value (most important at top)
21+
- Clear visual separation using vertical line at x=0
22+
- Good use of jittering to reduce point overlap
23+
- Correct title format and professional subtitle
24+
- Properly saves both PNG and interactive HTML versions
25+
weaknesses:
26+
- Blue-red color scheme, while conventional for SHAP, is not optimal for colorblind
27+
accessibility (consider viridis or plasma alternatives in future)
28+
- All features show similar linear correlation patterns - more diverse SHAP distributions
29+
(e.g., some features with non-linear effects, clustered values) would better demonstrate
30+
the plot type capabilities

0 commit comments

Comments
 (0)