feat(plotnine): implement learning-curve-basic (#2290)

github-actions[bot] · web-flow · commit d67f408f882b · 2025-12-26T17:47:00.000Z
## Implementation: `learning-curve-basic` - plotnine Implements the **plotnine** version of `learning-curve-basic`. **File:** `plots/learning-curve-basic/implementations/plotnine.py` --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20526602205)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
diff --git a/plots/learning-curve-basic/implementations/plotnine.py b/plots/learning-curve-basic/implementations/plotnine.py
@@ -0,0 +1,93 @@
+""" pyplots.ai
+learning-curve-basic: Model Learning Curve
+Library: plotnine 0.15.2 | Python 3.13.11
+Quality: 92/100 | Created: 2025-12-26
+"""
+
+import numpy as np
+import pandas as pd
+from plotnine import (
+    aes,
+    element_text,
+    geom_line,
+    geom_ribbon,
+    ggplot,
+    labs,
+    scale_color_manual,
+    scale_fill_manual,
+    theme,
+    theme_minimal,
+)
+
+
+# Data - Simulating learning curve with typical ML model behavior
+np.random.seed(42)
+
+# Training set sizes (10 points from 50 to 800 samples)
+train_sizes = np.linspace(50, 800, 10).astype(int)
+
+# Simulate cross-validation folds (5 folds)
+n_folds = 5
+
+# Training scores: start high, stay high (model learns training data well)
+train_mean = 0.99 - 0.15 * np.exp(-train_sizes / 150)
+train_std = 0.02 * np.exp(-train_sizes / 300) + 0.005
+
+# Validation scores: start lower, improve with more data (learning pattern)
+val_mean = 0.65 + 0.25 * (1 - np.exp(-train_sizes / 250))
+val_std = 0.08 * np.exp(-train_sizes / 400) + 0.01
+
+# Create DataFrame for plotting
+df_train = pd.DataFrame(
+    {
+        "Training Set Size": train_sizes,
+        "Score": train_mean,
+        "Score_low": train_mean - train_std,
+        "Score_high": train_mean + train_std,
+        "Type": "Training Score",
+    }
+)
+
+df_val = pd.DataFrame(
+    {
+        "Training Set Size": train_sizes,
+        "Score": val_mean,
+        "Score_low": val_mean - val_std,
+        "Score_high": val_mean + val_std,
+        "Type": "Validation Score",
+    }
+)
+
+df = pd.concat([df_train, df_val], ignore_index=True)
+
+# Colors: Python Blue for training, Python Yellow for validation
+colors = {"Training Score": "#306998", "Validation Score": "#FFD43B"}
+
+# Create plot
+plot = (
+    ggplot(df, aes(x="Training Set Size", y="Score", color="Type", fill="Type"))
+    + geom_ribbon(aes(ymin="Score_low", ymax="Score_high"), alpha=0.25, color="none")
+    + geom_line(size=2)
+    + scale_color_manual(values=colors)
+    + scale_fill_manual(values=colors)
+    + labs(
+        x="Training Set Size",
+        y="Accuracy Score",
+        title="learning-curve-basic · plotnine · pyplots.ai",
+        color="",
+        fill="",
+    )
+    + theme_minimal()
+    + theme(
+        figure_size=(16, 9),
+        text=element_text(size=14),
+        axis_title=element_text(size=20),
+        axis_text=element_text(size=16),
+        plot_title=element_text(size=24),
+        legend_text=element_text(size=16),
+        legend_position=(0.85, 0.25),
+    )
+)
+
+# Save
+plot.save("plot.png", dpi=300)
diff --git a/plots/learning-curve-basic/metadata/plotnine.yaml b/plots/learning-curve-basic/metadata/plotnine.yaml
@@ -0,0 +1,25 @@
+library: plotnine
+specification_id: learning-curve-basic
+created: '2025-12-26T17:37:26Z'
+updated: '2025-12-26T17:45:22Z'
+generated_by: claude-opus-4-5-20251101
+workflow_run: 20526602205
+issue: 0
+python_version: 3.13.11
+library_version: 0.15.2
+preview_url: https://storage.googleapis.com/pyplots-images/plots/learning-curve-basic/plotnine/plot.png
+preview_thumb: https://storage.googleapis.com/pyplots-images/plots/learning-curve-basic/plotnine/plot_thumb.png
+preview_html: null
+quality_score: 92
+review:
+  strengths:
+  - Excellent use of plotnine grammar of graphics with geom_ribbon for confidence
+    bands
+  - Clean, well-structured code following KISS principles
+  - Color scheme is visually appealing and colorblind-safe (blue/gold contrast)
+  - Legend positioning works well within the plot area
+  - Realistic ML learning curve behavior accurately depicted
+  - Proper title format following pyplots.ai conventions
+  weaknesses:
+  - Missing grid lines which would help read exact values from the plot
+  - Y-axis label could include units or range indicator