|
| 1 | +""" pyplots.ai |
| 2 | +learning-curve-basic: Model Learning Curve |
| 3 | +Library: seaborn 0.13.2 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import matplotlib.pyplot as plt |
| 8 | +import numpy as np |
| 9 | +import seaborn as sns |
| 10 | + |
| 11 | + |
| 12 | +# Data - Simulating a learning curve with typical patterns |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +# Training set sizes |
| 16 | +train_sizes = np.array([50, 100, 200, 400, 600, 800, 1000, 1200, 1500, 2000]) |
| 17 | +n_sizes = len(train_sizes) |
| 18 | +n_folds = 5 |
| 19 | + |
| 20 | +# Generate realistic learning curve pattern: |
| 21 | +# - Training score starts high and slightly decreases (model fits less perfectly with more data) |
| 22 | +# - Validation score starts low and increases (model generalizes better with more data) |
| 23 | +# - Gap narrows as training size increases |
| 24 | + |
| 25 | +# Training scores - high and slightly decreasing |
| 26 | +train_base = 0.98 - 0.03 * (train_sizes / train_sizes.max()) |
| 27 | +train_scores = np.array([train_base + np.random.normal(0, 0.01, n_sizes) for _ in range(n_folds)]) |
| 28 | +train_scores = np.clip(train_scores, 0.85, 1.0) |
| 29 | + |
| 30 | +# Validation scores - starts lower, increases with more data |
| 31 | +val_base = 0.65 + 0.25 * (1 - np.exp(-train_sizes / 500)) |
| 32 | +validation_scores = np.array([val_base + np.random.normal(0, 0.02, n_sizes) for _ in range(n_folds)]) |
| 33 | +validation_scores = np.clip(validation_scores, 0.55, 0.95) |
| 34 | + |
| 35 | +# Calculate means and standard deviations |
| 36 | +train_mean = train_scores.mean(axis=0) |
| 37 | +train_std = train_scores.std(axis=0) |
| 38 | +val_mean = validation_scores.mean(axis=0) |
| 39 | +val_std = validation_scores.std(axis=0) |
| 40 | + |
| 41 | +# Plot setup |
| 42 | +sns.set_context("talk", font_scale=1.1) |
| 43 | +sns.set_style("whitegrid") |
| 44 | +fig, ax = plt.subplots(figsize=(16, 9)) |
| 45 | + |
| 46 | +# Define colors - Python Blue for training, Python Yellow for validation |
| 47 | +train_color = "#306998" |
| 48 | +val_color = "#FFD43B" |
| 49 | + |
| 50 | +# Plot training curve with confidence band |
| 51 | +ax.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.2, color=train_color) |
| 52 | +sns.lineplot( |
| 53 | + x=train_sizes, |
| 54 | + y=train_mean, |
| 55 | + ax=ax, |
| 56 | + color=train_color, |
| 57 | + linewidth=3, |
| 58 | + marker="o", |
| 59 | + markersize=10, |
| 60 | + label="Training Score", |
| 61 | +) |
| 62 | + |
| 63 | +# Plot validation curve with confidence band |
| 64 | +ax.fill_between(train_sizes, val_mean - val_std, val_mean + val_std, alpha=0.2, color=val_color) |
| 65 | +sns.lineplot( |
| 66 | + x=train_sizes, y=val_mean, ax=ax, color=val_color, linewidth=3, marker="s", markersize=10, label="Validation Score" |
| 67 | +) |
| 68 | + |
| 69 | +# Labels and styling |
| 70 | +ax.set_xlabel("Training Set Size", fontsize=20) |
| 71 | +ax.set_ylabel("Accuracy Score", fontsize=20) |
| 72 | +ax.set_title("learning-curve-basic · seaborn · pyplots.ai", fontsize=24) |
| 73 | +ax.tick_params(axis="both", labelsize=16) |
| 74 | + |
| 75 | +# Set y-axis limits for better visualization |
| 76 | +ax.set_ylim(0.5, 1.02) |
| 77 | + |
| 78 | +# Configure legend |
| 79 | +ax.legend(fontsize=16, loc="lower right", framealpha=0.9) |
| 80 | + |
| 81 | +# Subtle grid |
| 82 | +ax.grid(True, alpha=0.3, linestyle="--") |
| 83 | + |
| 84 | +plt.tight_layout() |
| 85 | +plt.savefig("plot.png", dpi=300, bbox_inches="tight") |
0 commit comments