|
| 1 | +""" pyplots.ai |
| 2 | +residual-plot: Residual Plot |
| 3 | +Library: pygal 3.1.0 | Python 3.13.11 |
| 4 | +Quality: 88/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pygal |
| 9 | +from pygal.style import Style |
| 10 | + |
| 11 | + |
| 12 | +# Data - Linear regression example with some non-linearity |
| 13 | +np.random.seed(42) |
| 14 | +n_points = 100 |
| 15 | + |
| 16 | +# Generate fitted values (x-axis) - house price predictions in $1000s |
| 17 | +fitted_values = np.linspace(150, 500, n_points) |
| 18 | + |
| 19 | +# Generate residuals with slight heteroscedasticity and a few outliers |
| 20 | +base_residuals = np.random.normal(0, 20, n_points) |
| 21 | +# Add slight heteroscedasticity (variance increases with fitted values) |
| 22 | +heteroscedasticity = (fitted_values / 500) * np.random.normal(0, 15, n_points) |
| 23 | +residuals = base_residuals + heteroscedasticity |
| 24 | + |
| 25 | +# Add a few outliers |
| 26 | +outlier_indices = [15, 45, 78] |
| 27 | +residuals[outlier_indices] = [85, -75, 90] |
| 28 | + |
| 29 | +# Calculate standard deviation for reference bands |
| 30 | +std_residuals = np.std(residuals) |
| 31 | +upper_band = 2 * std_residuals |
| 32 | +lower_band = -2 * std_residuals |
| 33 | + |
| 34 | +# Identify outliers (beyond 2 standard deviations) |
| 35 | +outlier_mask = np.abs(residuals) > 2 * std_residuals |
| 36 | + |
| 37 | +# Custom style for 4800x2700 canvas following library guide recommendations |
| 38 | +custom_style = Style( |
| 39 | + background="white", |
| 40 | + plot_background="white", |
| 41 | + foreground="#333333", |
| 42 | + foreground_strong="#333333", |
| 43 | + foreground_subtle="#999999", |
| 44 | + colors=("#306998", "#E74C3C", "#2C3E50", "#AAAAAA", "#AAAAAA"), |
| 45 | + title_font_size=32, |
| 46 | + label_font_size=22, |
| 47 | + major_label_font_size=20, |
| 48 | + legend_font_size=24, |
| 49 | + value_font_size=16, |
| 50 | + tooltip_font_size=16, |
| 51 | + stroke_width=3, |
| 52 | + guide_stroke_color="#DDDDDD", |
| 53 | + guide_stroke_dasharray="3, 3", |
| 54 | +) |
| 55 | + |
| 56 | +# Create XY scatter chart for residual plot |
| 57 | +# Use explicit x_labels to control axis display and range settings |
| 58 | +chart = pygal.XY( |
| 59 | + width=4800, |
| 60 | + height=2700, |
| 61 | + style=custom_style, |
| 62 | + title="residual-plot · pygal · pyplots.ai", |
| 63 | + x_title="Fitted Values - Predicted Price ($1000s)", |
| 64 | + y_title="Residuals - Actual minus Predicted ($1000s)", |
| 65 | + show_legend=True, |
| 66 | + legend_at_bottom=True, |
| 67 | + legend_at_bottom_columns=5, |
| 68 | + show_x_guides=True, |
| 69 | + show_y_guides=True, |
| 70 | + stroke=False, |
| 71 | + dots_size=12, |
| 72 | + truncate_legend=-1, |
| 73 | + x_label_rotation=0, |
| 74 | + xrange=(140, 510), |
| 75 | + range=(-100, 110), |
| 76 | +) |
| 77 | + |
| 78 | +# Set explicit x-axis labels to display actual fitted values (not indices) |
| 79 | +chart.x_labels = [150, 200, 250, 300, 350, 400, 450, 500] |
| 80 | + |
| 81 | +# Prepare data points - separate normal and outlier points |
| 82 | +normal_points = [(float(fitted_values[i]), float(residuals[i])) for i in range(n_points) if not outlier_mask[i]] |
| 83 | +outlier_points = [(float(fitted_values[i]), float(residuals[i])) for i in range(n_points) if outlier_mask[i]] |
| 84 | + |
| 85 | +# Add data series |
| 86 | +chart.add("Residuals", normal_points) |
| 87 | +chart.add("Outliers (>2σ)", outlier_points) |
| 88 | + |
| 89 | +# Add zero reference line - create more points for solid appearance |
| 90 | +zero_line_points = [(float(x), 0.0) for x in np.linspace(150, 500, 50)] |
| 91 | +chart.add("Zero Reference (Perfect Fit)", zero_line_points, stroke=True, show_dots=False, stroke_style={"width": 5}) |
| 92 | + |
| 93 | +# Add +2σ reference band line with multiple points for visibility |
| 94 | +upper_band_points = [(float(x), float(upper_band)) for x in np.linspace(150, 500, 50)] |
| 95 | +chart.add( |
| 96 | + "+2σ Threshold", upper_band_points, stroke=True, show_dots=False, stroke_style={"width": 3, "dasharray": "10, 8"} |
| 97 | +) |
| 98 | + |
| 99 | +# Add -2σ reference band line with multiple points for visibility |
| 100 | +lower_band_points = [(float(x), float(lower_band)) for x in np.linspace(150, 500, 50)] |
| 101 | +chart.add( |
| 102 | + "-2σ Threshold", lower_band_points, stroke=True, show_dots=False, stroke_style={"width": 3, "dasharray": "10, 8"} |
| 103 | +) |
| 104 | + |
| 105 | +# Save as PNG and HTML |
| 106 | +chart.render_to_png("plot.png") |
| 107 | +chart.render_to_file("plot.html") |
0 commit comments