|
| 1 | +# ruff: noqa: F405 |
| 2 | +"""pyplots.ai |
| 3 | +residual-plot: Residual Plot |
| 4 | +Library: lets-plot | Python 3.13 |
| 5 | +Quality: pending | Created: 2025-12-26 |
| 6 | +""" |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | +from lets_plot import * # noqa: F403, F405 |
| 11 | + |
| 12 | + |
| 13 | +LetsPlot.setup_html() |
| 14 | + |
| 15 | +# Data: Generate realistic regression scenario with deliberate pattern in residuals |
| 16 | +np.random.seed(42) |
| 17 | +n = 150 |
| 18 | + |
| 19 | +# Create feature with mild non-linearity to show residual patterns |
| 20 | +X = np.linspace(10, 100, n) |
| 21 | +noise = np.random.normal(0, 5, n) |
| 22 | +# Add slight heteroscedasticity: variance increases with X |
| 23 | +heteroscedastic_noise = noise * (0.5 + 0.01 * X) |
| 24 | +y_true = 2.5 * X + 0.02 * X**2 + heteroscedastic_noise + 50 |
| 25 | + |
| 26 | +# Simple linear regression (manual implementation) |
| 27 | +X_mean = np.mean(X) |
| 28 | +y_mean = np.mean(y_true) |
| 29 | +slope = np.sum((X - X_mean) * (y_true - y_mean)) / np.sum((X - X_mean) ** 2) |
| 30 | +intercept = y_mean - slope * X_mean |
| 31 | +y_pred = slope * X + intercept |
| 32 | + |
| 33 | +# Calculate residuals |
| 34 | +residuals = y_true - y_pred |
| 35 | + |
| 36 | +# Calculate residual standard deviation for outlier bands |
| 37 | +residual_std = np.std(residuals) |
| 38 | + |
| 39 | +# Identify outliers (beyond ±2 standard deviations) |
| 40 | +outlier_threshold = 2 * residual_std |
| 41 | +is_outlier = np.abs(residuals) > outlier_threshold |
| 42 | + |
| 43 | +# Create DataFrame for plotting |
| 44 | +df = pd.DataFrame( |
| 45 | + {"Fitted Values": y_pred, "Residuals": residuals, "Outlier": np.where(is_outlier, "Outlier (>2σ)", "Normal")} |
| 46 | +) |
| 47 | + |
| 48 | +# Create residual plot |
| 49 | +plot = ( |
| 50 | + ggplot(df, aes(x="Fitted Values", y="Residuals")) |
| 51 | + # Reference line at y=0 |
| 52 | + + geom_hline(yintercept=0, color="#306998", size=1.5, linetype="solid") |
| 53 | + # Outlier bands at ±2 standard deviations |
| 54 | + + geom_hline(yintercept=outlier_threshold, color="#DC2626", size=1, linetype="dashed", alpha=0.7) |
| 55 | + + geom_hline(yintercept=-outlier_threshold, color="#DC2626", size=1, linetype="dashed", alpha=0.7) |
| 56 | + # Points colored by outlier status |
| 57 | + + geom_point(aes(color="Outlier"), size=5, alpha=0.7) |
| 58 | + # LOWESS smoothing line to detect patterns |
| 59 | + + geom_smooth(method="loess", color="#FFD43B", size=2, se=False, span=0.6) |
| 60 | + # Color scale for outliers |
| 61 | + + scale_color_manual(values=["#306998", "#DC2626"], name="Point Type") |
| 62 | + # Labels |
| 63 | + + labs(title="residual-plot · letsplot · pyplots.ai", x="Fitted Values", y="Residuals (Observed - Predicted)") |
| 64 | + # Theme |
| 65 | + + theme_minimal() |
| 66 | + + theme( |
| 67 | + plot_title=element_text(size=24, face="bold"), |
| 68 | + axis_title=element_text(size=20), |
| 69 | + axis_text=element_text(size=16), |
| 70 | + legend_title=element_text(size=18), |
| 71 | + legend_text=element_text(size=16), |
| 72 | + legend_position="right", |
| 73 | + panel_grid_major=element_line(color="#CCCCCC", size=0.5), |
| 74 | + panel_grid_minor=element_blank(), |
| 75 | + ) |
| 76 | + # Size for export (1600 × 900 base, scaled 3x = 4800 × 2700) |
| 77 | + + ggsize(1600, 900) |
| 78 | +) |
| 79 | + |
| 80 | +# Save as PNG and HTML (path='.' ensures files are saved in current directory) |
| 81 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 82 | +ggsave(plot, "plot.html", path=".") |
0 commit comments