|
| 1 | +""" pyplots.ai |
| 2 | +residual-plot: Residual Plot |
| 3 | +Library: matplotlib 3.10.8 | Python 3.13.11 |
| 4 | +Quality: 93/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import matplotlib.pyplot as plt |
| 8 | +import numpy as np |
| 9 | + |
| 10 | + |
| 11 | +# Data - Generate realistic regression scenario |
| 12 | +np.random.seed(42) |
| 13 | + |
| 14 | +# Independent variable with some structure |
| 15 | +X = np.linspace(0, 10, 150) |
| 16 | + |
| 17 | +# True relationship with some non-linearity to make residuals interesting |
| 18 | +# (quadratic component makes linear model show patterns in residuals) |
| 19 | +y_true = 2.5 * X + 0.3 * X**2 + np.random.randn(150) * 3 |
| 20 | + |
| 21 | +# Fit linear regression manually: y = a + b*x |
| 22 | +# Using least squares formulas |
| 23 | +x_mean = np.mean(X) |
| 24 | +y_mean = np.mean(y_true) |
| 25 | +b = np.sum((X - x_mean) * (y_true - y_mean)) / np.sum((X - x_mean) ** 2) |
| 26 | +a = y_mean - b * x_mean |
| 27 | +y_pred = a + b * X |
| 28 | + |
| 29 | +# Calculate residuals |
| 30 | +residuals = y_true - y_pred |
| 31 | + |
| 32 | +# Identify outliers (beyond 2 standard deviations) |
| 33 | +std_residuals = np.std(residuals) |
| 34 | +outlier_mask = np.abs(residuals) > 2 * std_residuals |
| 35 | + |
| 36 | +# Create plot |
| 37 | +fig, ax = plt.subplots(figsize=(16, 9)) |
| 38 | + |
| 39 | +# Plot normal points |
| 40 | +ax.scatter( |
| 41 | + y_pred[~outlier_mask], |
| 42 | + residuals[~outlier_mask], |
| 43 | + s=150, |
| 44 | + alpha=0.7, |
| 45 | + color="#306998", |
| 46 | + edgecolors="white", |
| 47 | + linewidth=0.5, |
| 48 | + label="Residuals", |
| 49 | +) |
| 50 | + |
| 51 | +# Plot outliers with different color |
| 52 | +ax.scatter( |
| 53 | + y_pred[outlier_mask], |
| 54 | + residuals[outlier_mask], |
| 55 | + s=180, |
| 56 | + alpha=0.9, |
| 57 | + color="#FFD43B", |
| 58 | + edgecolors="#306998", |
| 59 | + linewidth=1.5, |
| 60 | + label="Outliers (>2σ)", |
| 61 | +) |
| 62 | + |
| 63 | +# Reference line at y=0 |
| 64 | +ax.axhline(y=0, color="#333333", linewidth=2, linestyle="-", label="Perfect fit (y=0)") |
| 65 | + |
| 66 | +# Add ±2 standard deviation bands |
| 67 | +ax.axhline(y=2 * std_residuals, color="#888888", linewidth=1.5, linestyle="--", alpha=0.7) |
| 68 | +ax.axhline(y=-2 * std_residuals, color="#888888", linewidth=1.5, linestyle="--", alpha=0.7) |
| 69 | + |
| 70 | +# Get x limits for the band |
| 71 | +xlim = (y_pred.min() - 2, y_pred.max() + 2) |
| 72 | +ax.fill_between(xlim, -2 * std_residuals, 2 * std_residuals, alpha=0.1, color="#306998", label="±2σ band") |
| 73 | + |
| 74 | +# Add trend line using polynomial fit to detect patterns |
| 75 | +z = np.polyfit(y_pred, residuals, 3) |
| 76 | +p = np.poly1d(z) |
| 77 | +x_smooth = np.linspace(y_pred.min(), y_pred.max(), 100) |
| 78 | +ax.plot(x_smooth, p(x_smooth), color="#D62728", linewidth=2.5, linestyle="-", alpha=0.8, label="Trend line") |
| 79 | + |
| 80 | +# Labels and styling |
| 81 | +ax.set_xlabel("Fitted Values", fontsize=20) |
| 82 | +ax.set_ylabel("Residuals (Observed - Predicted)", fontsize=20) |
| 83 | +ax.set_title("residual-plot · matplotlib · pyplots.ai", fontsize=24) |
| 84 | +ax.tick_params(axis="both", labelsize=16) |
| 85 | +ax.legend(fontsize=14, loc="upper left", framealpha=0.9) |
| 86 | +ax.grid(True, alpha=0.3, linestyle="--") |
| 87 | +ax.set_xlim(xlim) |
| 88 | + |
| 89 | +plt.tight_layout() |
| 90 | +plt.savefig("plot.png", dpi=300, bbox_inches="tight") |
0 commit comments