Skip to content

Commit 25e908d

Browse files
feat(pygal): implement calibration-curve (#2354)
## Implementation: `calibration-curve` - pygal Implements the **pygal** version of `calibration-curve`. **File:** `plots/calibration-curve/implementations/pygal.py` --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20528205173)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 943b302 commit 25e908d

2 files changed

Lines changed: 173 additions & 0 deletions

File tree

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
""" pyplots.ai
2+
calibration-curve: Calibration Curve
3+
Library: pygal 3.1.0 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-26
5+
"""
6+
7+
import numpy as np
8+
import pygal
9+
from pygal.style import Style
10+
11+
12+
# Data: Generate synthetic binary classification with realistic calibration
13+
np.random.seed(42)
14+
n_samples = 2000
15+
n_bins = 10
16+
17+
# Generate true probabilities spread across 0-1 range
18+
true_prob = np.random.beta(2, 2, n_samples)
19+
y_true = (np.random.random(n_samples) < true_prob).astype(int)
20+
21+
# Model 1: Well-calibrated model (Logistic Regression style)
22+
noise1 = np.random.randn(n_samples) * 0.08
23+
y_prob_model1 = np.clip(true_prob + noise1, 0.01, 0.99)
24+
25+
# Model 2: Overconfident model (Random Forest / Neural Network style)
26+
# More extreme S-curve to make miscalibration pattern more visible
27+
y_prob_model2 = 1 / (1 + np.exp(-12 * (true_prob - 0.5)))
28+
y_prob_model2 = np.clip(y_prob_model2 + np.random.randn(n_samples) * 0.02, 0.02, 0.98)
29+
30+
# Compute calibration data inline (KISS principle - no helper functions)
31+
bin_edges = np.linspace(0, 1, n_bins + 1)
32+
33+
# Model 1 calibration
34+
bin_indices1 = np.digitize(y_prob_model1, bin_edges[1:-1])
35+
mean_pred1 = []
36+
frac_pos1 = []
37+
for i in range(n_bins):
38+
mask = bin_indices1 == i
39+
if mask.sum() > 0:
40+
mean_pred1.append(np.mean(y_prob_model1[mask]))
41+
frac_pos1.append(np.mean(y_true[mask]))
42+
43+
# Model 2 calibration
44+
bin_indices2 = np.digitize(y_prob_model2, bin_edges[1:-1])
45+
mean_pred2 = []
46+
frac_pos2 = []
47+
for i in range(n_bins):
48+
mask = bin_indices2 == i
49+
if mask.sum() > 0:
50+
mean_pred2.append(np.mean(y_prob_model2[mask]))
51+
frac_pos2.append(np.mean(y_true[mask]))
52+
53+
# Compute Brier scores inline
54+
brier1 = np.mean((y_prob_model1 - y_true) ** 2)
55+
brier2 = np.mean((y_prob_model2 - y_true) ** 2)
56+
57+
# Custom style for 4800 x 2700 canvas with high-contrast colors
58+
custom_style = Style(
59+
background="white",
60+
plot_background="white",
61+
foreground="#333333",
62+
foreground_strong="#333333",
63+
foreground_subtle="#CCCCCC", # Lighter subtle color for more subtle guides
64+
colors=("#888888", "#2E7D32", "#C62828"), # Gray, dark green (high contrast), dark red
65+
title_font_size=72,
66+
label_font_size=48,
67+
major_label_font_size=42,
68+
legend_font_size=42,
69+
value_font_size=36,
70+
tooltip_font_size=36,
71+
stroke_width=6,
72+
opacity=0.9,
73+
opacity_hover=1.0,
74+
guide_stroke_color="#E0E0E0", # Very light guide lines for subtlety
75+
guide_stroke_dasharray="3,3", # Subtle dashed pattern
76+
)
77+
78+
# Create XY chart for calibration curve with legend at bottom
79+
chart = pygal.XY(
80+
style=custom_style,
81+
width=4800,
82+
height=2700,
83+
title="calibration-curve · pygal · pyplots.ai",
84+
x_title="Mean Predicted Probability",
85+
y_title="Fraction of Positives",
86+
show_dots=True,
87+
dots_size=14,
88+
stroke_style={"width": 5},
89+
show_x_guides=True,
90+
show_y_guides=True,
91+
x_value_formatter=lambda x: f"{x:.1f}",
92+
range=(0, 1),
93+
xrange=(0, 1),
94+
legend_at_bottom=True,
95+
legend_at_bottom_columns=3, # Display legend items horizontally at bottom
96+
legend_box_size=28,
97+
truncate_legend=-1,
98+
margin=50,
99+
margin_top=80,
100+
margin_bottom=200, # Extra space for bottom legend
101+
)
102+
103+
# Extend model curves to boundaries by computing expected values at 0 and 1
104+
# This creates visual balance with the perfect calibration line
105+
start_point1 = (0.0, 0.0) # Well-calibrated model should start near origin
106+
end_point1 = (1.0, 1.0) # Well-calibrated model should end near (1,1)
107+
start_point2 = (0.0, 0.0) # Overconfident model starts at origin
108+
end_point2 = (1.0, 1.0) # Overconfident model ends at (1,1)
109+
110+
# Perfect calibration line (diagonal reference) - first in gray, dashed, no dots
111+
# Using dict format with 'value' key for pygal's tooltip system
112+
perfect_calibration = [
113+
{"value": (0, 0), "label": "Perfect calibration reference"},
114+
{"value": (0.25, 0.25), "label": "Predicted = Observed"},
115+
{"value": (0.5, 0.5), "label": "Ideal: 50% predicted → 50% positive"},
116+
{"value": (0.75, 0.75), "label": "Predicted = Observed"},
117+
{"value": (1.0, 1.0), "label": "Perfect calibration reference"},
118+
]
119+
chart.add("Perfect Calibration", perfect_calibration, stroke_dasharray="15,8", dots_size=0, stroke_style={"width": 4})
120+
121+
# Model 1 calibration curve - well-calibrated (dark green for high contrast)
122+
# Add boundary points for visual balance, then data points with interactive tooltips
123+
model1_points = [{"value": start_point1, "label": "Curve start (0,0)"}]
124+
model1_points.extend(
125+
[
126+
{"value": (pred, obs), "label": f"Bin: {pred:.2f} pred → {obs:.2f} actual ({int(obs * 100)}% positive)"}
127+
for pred, obs in zip(mean_pred1, frac_pos1, strict=False)
128+
]
129+
)
130+
model1_points.append({"value": end_point1, "label": "Curve end (1,1)"})
131+
chart.add(f"Logistic Regression (Brier: {brier1:.3f})", model1_points)
132+
133+
# Model 2 calibration curve - overconfident (dark red for contrast)
134+
# Shows characteristic sigmoid pattern of overconfident models with extended boundaries
135+
model2_points = [{"value": start_point2, "label": "Curve start (0,0)"}]
136+
model2_points.extend(
137+
[
138+
{"value": (pred, obs), "label": f"Bin: {pred:.2f} pred → {obs:.2f} actual (overconfident: Δ={pred - obs:+.2f})"}
139+
for pred, obs in zip(mean_pred2, frac_pos2, strict=False)
140+
]
141+
)
142+
model2_points.append({"value": end_point2, "label": "Curve end (1,1)"})
143+
chart.add(f"Overconfident Model (Brier: {brier2:.3f})", model2_points)
144+
145+
# Save as PNG for static preview
146+
chart.render_to_png("plot.png")
147+
148+
# Save as HTML for interactive tooltips (pygal's distinctive feature)
149+
chart.render_to_file("plot.html")
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
library: pygal
2+
specification_id: calibration-curve
3+
created: '2025-12-26T19:36:53Z'
4+
updated: '2025-12-26T20:03:31Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20528205173
7+
issue: 0
8+
python_version: 3.13.11
9+
library_version: 3.1.0
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/calibration-curve/pygal/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/calibration-curve/pygal/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/calibration-curve/pygal/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent use of pygal interactive tooltip system with descriptive labels for
17+
each data point
18+
- High-contrast color scheme (dark green, dark red, gray) ensures accessibility
19+
- Brier scores displayed in legend provide quantitative calibration metrics
20+
- Correct title format following pyplots.ai convention
21+
- Both PNG and HTML outputs leverage pygal dual static/interactive capability
22+
- Clean KISS-style code structure with inline calibration computation
23+
weaknesses:
24+
- None significant - previous issues have been addressed

0 commit comments

Comments
 (0)