|
| 1 | +""" pyplots.ai |
| 2 | +learning-curve-basic: Model Learning Curve |
| 3 | +Library: pygal 3.1.0 | Python 3.13.11 |
| 4 | +Quality: 90/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pygal |
| 9 | +from pygal.style import Style |
| 10 | + |
| 11 | + |
| 12 | +# Data - Simulating sklearn's learning_curve output |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +# Training set sizes (as percentages converted to actual samples) |
| 16 | +n_samples_total = 1000 |
| 17 | +train_sizes_pct = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) |
| 18 | +train_sizes = (train_sizes_pct * n_samples_total).astype(int) |
| 19 | + |
| 20 | +# Simulate cross-validation folds (5 folds) |
| 21 | +n_folds = 5 |
| 22 | +n_sizes = len(train_sizes) |
| 23 | + |
| 24 | +# Training scores: start high, remain high (slight overfitting pattern) |
| 25 | +train_scores_base = 0.95 - 0.05 * np.exp(-train_sizes / 200) |
| 26 | +train_scores_std_vals = 0.02 * np.exp(-train_sizes / 300) |
| 27 | +train_scores = np.array( |
| 28 | + [train_scores_base[i] + np.random.randn(n_folds) * train_scores_std_vals[i] for i in range(n_sizes)] |
| 29 | +).T # Shape: (n_folds, n_sizes) |
| 30 | + |
| 31 | +# Validation scores: start lower, converge towards training (gap shows variance) |
| 32 | +val_scores_base = 0.65 + 0.25 * (1 - np.exp(-train_sizes / 400)) |
| 33 | +val_scores_std_vals = 0.04 * np.exp(-train_sizes / 500) + 0.01 |
| 34 | +val_scores = np.array( |
| 35 | + [val_scores_base[i] + np.random.randn(n_folds) * val_scores_std_vals[i] for i in range(n_sizes)] |
| 36 | +).T # Shape: (n_folds, n_sizes) |
| 37 | + |
| 38 | +# Calculate means and standard deviations |
| 39 | +train_mean = np.mean(train_scores, axis=0) |
| 40 | +train_std = np.std(train_scores, axis=0) |
| 41 | +val_mean = np.mean(val_scores, axis=0) |
| 42 | +val_std = np.std(val_scores, axis=0) |
| 43 | + |
| 44 | +# Custom style for pyplots - scaled for 4800x2700 canvas |
| 45 | +custom_style = Style( |
| 46 | + background="white", |
| 47 | + plot_background="white", |
| 48 | + foreground="#333", |
| 49 | + foreground_strong="#333", |
| 50 | + foreground_subtle="#666", |
| 51 | + colors=("#306998", "#FFD43B", "#306998", "#306998", "#FFD43B", "#FFD43B"), |
| 52 | + title_font_size=56, |
| 53 | + label_font_size=36, |
| 54 | + major_label_font_size=32, |
| 55 | + legend_font_size=36, |
| 56 | + value_font_size=28, |
| 57 | + stroke_width=5, |
| 58 | + opacity=0.9, |
| 59 | + opacity_hover=1.0, |
| 60 | +) |
| 61 | + |
| 62 | +# Create XY chart for learning curve |
| 63 | +chart = pygal.XY( |
| 64 | + width=4800, |
| 65 | + height=2700, |
| 66 | + style=custom_style, |
| 67 | + title="learning-curve-basic · pygal · pyplots.ai", |
| 68 | + x_title="Training Set Size (samples)", |
| 69 | + y_title="Accuracy Score", |
| 70 | + show_dots=True, |
| 71 | + dots_size=12, |
| 72 | + stroke_style={"width": 5}, |
| 73 | + show_x_guides=False, |
| 74 | + show_y_guides=True, |
| 75 | + legend_at_bottom=True, |
| 76 | + legend_at_bottom_columns=2, |
| 77 | + truncate_legend=-1, |
| 78 | + range=(0.5, 1.02), |
| 79 | + xrange=(50, 1050), |
| 80 | + x_labels=[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000], |
| 81 | + margin=50, |
| 82 | +) |
| 83 | + |
| 84 | +# Prepare data points as (x, y) tuples |
| 85 | +train_points = [(int(train_sizes[i]), round(train_mean[i], 3)) for i in range(n_sizes)] |
| 86 | +val_points = [(int(train_sizes[i]), round(val_mean[i], 3)) for i in range(n_sizes)] |
| 87 | + |
| 88 | +# Add upper/lower bounds for confidence bands (±1 std) |
| 89 | +train_upper = [(int(train_sizes[i]), round(train_mean[i] + train_std[i], 3)) for i in range(n_sizes)] |
| 90 | +train_lower = [(int(train_sizes[i]), round(train_mean[i] - train_std[i], 3)) for i in range(n_sizes)] |
| 91 | +val_upper = [(int(train_sizes[i]), round(val_mean[i] + val_std[i], 3)) for i in range(n_sizes)] |
| 92 | +val_lower = [(int(train_sizes[i]), round(val_mean[i] - val_std[i], 3)) for i in range(n_sizes)] |
| 93 | + |
| 94 | +# Add main learning curves with larger markers |
| 95 | +chart.add("Training Score (±1σ band)", train_points, stroke_style={"width": 6}) |
| 96 | +chart.add("Validation Score (±1σ band)", val_points, stroke_style={"width": 6}) |
| 97 | + |
| 98 | +# Add confidence bounds as secondary lines (thinner, dashed, no legend) |
| 99 | +chart.add(None, train_upper, show_dots=False, stroke_style={"width": 2, "dasharray": "8, 4"}) |
| 100 | +chart.add(None, train_lower, show_dots=False, stroke_style={"width": 2, "dasharray": "8, 4"}) |
| 101 | +chart.add(None, val_upper, show_dots=False, stroke_style={"width": 2, "dasharray": "8, 4"}) |
| 102 | +chart.add(None, val_lower, show_dots=False, stroke_style={"width": 2, "dasharray": "8, 4"}) |
| 103 | + |
| 104 | +# Save as HTML (interactive) and PNG |
| 105 | +chart.render_to_file("plot.html") |
| 106 | +chart.render_to_png("plot.png") |
0 commit comments