feat(pygal): implement logistic-regression (#3575)

github-actions[bot] · web-flow · commit de3121ec6b39 · 2026-01-09T23:35:59.000Z
## Implementation: `logistic-regression` - pygal Implements the **pygal** version of `logistic-regression`. **File:** `plots/logistic-regression/implementations/pygal.py` **Parent Issue:** #3550 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20868565603)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
diff --git a/plots/logistic-regression/implementations/pygal.py b/plots/logistic-regression/implementations/pygal.py
@@ -0,0 +1,122 @@
+""" pyplots.ai
+logistic-regression: Logistic Regression Curve Plot
+Library: pygal 3.1.0 | Python 3.13.11
+Quality: 91/100 | Created: 2026-01-09
+"""
+
+import numpy as np
+import pygal
+from pygal.style import Style
+
+
+# Data - Credit approval example based on credit score
+np.random.seed(42)
+n_samples = 150
+
+# Generate credit scores (300-850 range)
+credit_scores = np.concatenate(
+    [
+        np.random.normal(520, 70, n_samples // 2),  # Lower scores (more rejections)
+        np.random.normal(720, 60, n_samples // 2),  # Higher scores (more approvals)
+    ]
+)
+credit_scores = np.clip(credit_scores, 300, 850)
+
+# Generate binary outcomes with logistic probability
+true_probs = 1 / (1 + np.exp(-0.015 * (credit_scores - 600)))
+y = (np.random.random(n_samples) < true_probs).astype(int)
+
+# Fit logistic regression using gradient descent (numpy only)
+X = (credit_scores - credit_scores.mean()) / credit_scores.std()  # Normalize for stability
+b0, b1 = 0.0, 0.0
+learning_rate = 0.1
+for _ in range(1000):
+    z = b0 + b1 * X
+    p = 1 / (1 + np.exp(-np.clip(z, -500, 500)))
+    grad_b0 = np.mean(p - y)
+    grad_b1 = np.mean((p - y) * X)
+    b0 -= learning_rate * grad_b0
+    b1 -= learning_rate * grad_b1
+
+# Generate smooth curve for predictions
+x_curve = np.linspace(300, 850, 100)
+x_curve_norm = (x_curve - credit_scores.mean()) / credit_scores.std()
+y_proba = 1 / (1 + np.exp(-np.clip(b0 + b1 * x_curve_norm, -500, 500)))
+
+# Confidence interval (approximate using binomial SE)
+se = np.sqrt(y_proba * (1 - y_proba) / n_samples) * 1.5
+ci_lower = np.clip(y_proba - 1.96 * se, 0, 1)
+ci_upper = np.clip(y_proba + 1.96 * se, 0, 1)
+
+# Jitter y values for visibility
+y_jittered = y + np.random.uniform(-0.025, 0.025, n_samples)
+
+# Custom style for large canvas
+custom_style = Style(
+    background="white",
+    plot_background="white",
+    foreground="#333333",
+    foreground_strong="#333333",
+    foreground_subtle="#666666",
+    colors=("#306998", "#306998", "#306998", "#888888", "#E74C3C", "#FFD43B"),
+    title_font_size=56,
+    label_font_size=36,
+    major_label_font_size=32,
+    legend_font_size=32,
+    value_font_size=24,
+    stroke_width=4,
+    opacity=0.7,
+    opacity_hover=0.95,
+    font_family="sans-serif",
+)
+
+# Create XY chart
+chart = pygal.XY(
+    width=4800,
+    height=2700,
+    style=custom_style,
+    title="logistic-regression · pygal · pyplots.ai",
+    x_title="Credit Score",
+    y_title="Probability of Approval",
+    show_dots=True,
+    stroke=True,
+    show_x_guides=True,
+    show_y_guides=True,
+    dots_size=10,
+    stroke_style={"width": 4},
+    range=(0, 1.05),
+    xrange=(280, 870),
+    explicit_size=True,
+    legend_at_bottom=True,
+    legend_box_size=28,
+    truncate_legend=-1,
+    print_values=False,
+)
+
+# Add logistic regression curve (main feature)
+curve_points = [(float(x_curve[i]), float(y_proba[i])) for i in range(len(x_curve))]
+chart.add("Logistic Fit", curve_points, stroke_style={"width": 5}, dots_size=0, show_dots=False)
+
+# Add confidence interval bounds
+ci_upper_pts = [(float(x_curve[i]), float(ci_upper[i])) for i in range(0, len(x_curve), 2)]
+ci_lower_pts = [(float(x_curve[i]), float(ci_lower[i])) for i in range(0, len(x_curve), 2)]
+chart.add("95% CI Upper", ci_upper_pts, stroke_style={"width": 2, "dasharray": "8,4"}, dots_size=0, show_dots=False)
+chart.add("95% CI Lower", ci_lower_pts, stroke_style={"width": 2, "dasharray": "8,4"}, dots_size=0, show_dots=False)
+
+# Add decision threshold line (y = 0.5)
+threshold_pts = [(300.0, 0.5), (850.0, 0.5)]
+chart.add(
+    "Threshold (p=0.5)", threshold_pts, stroke_style={"width": 3, "dasharray": "12,6"}, dots_size=0, show_dots=False
+)
+
+# Add data points - Rejected (Class 0)
+rejected_pts = [(float(credit_scores[i]), float(y_jittered[i])) for i in range(n_samples) if y[i] == 0]
+chart.add("Rejected (0)", rejected_pts, stroke=False, dots_size=14)
+
+# Add data points - Approved (Class 1)
+approved_pts = [(float(credit_scores[i]), float(y_jittered[i])) for i in range(n_samples) if y[i] == 1]
+chart.add("Approved (1)", approved_pts, stroke=False, dots_size=14)
+
+# Save as PNG and HTML
+chart.render_to_png("plot.png")
+chart.render_to_file("plot.html")
diff --git a/plots/logistic-regression/metadata/pygal.yaml b/plots/logistic-regression/metadata/pygal.yaml
@@ -0,0 +1,213 @@
+library: pygal
+specification_id: logistic-regression
+created: '2026-01-09T23:32:39Z'
+updated: '2026-01-09T23:35:42Z'
+generated_by: claude-opus-4-5-20251101
+workflow_run: 20868565603
+issue: 3550
+python_version: 3.13.11
+library_version: 3.1.0
+preview_url: https://storage.googleapis.com/pyplots-images/plots/logistic-regression/pygal/plot.png
+preview_thumb: https://storage.googleapis.com/pyplots-images/plots/logistic-regression/pygal/plot_thumb.png
+preview_html: https://storage.googleapis.com/pyplots-images/plots/logistic-regression/pygal/plot.html
+quality_score: 91
+review:
+  strengths:
+  - Excellent sigmoid curve representation with clear S-shape transition
+  - Good use of custom pygal Style with appropriate font sizes for large canvas
+  - Realistic credit approval scenario with proper jittering of binary outcomes
+  - Clean implementation of gradient descent for logistic regression without external
+    dependencies
+  - Decision threshold line clearly visible at p=0.5
+  - Proper title format following pyplots.ai conventions
+  weaknesses:
+  - Confidence interval shown as dashed lines rather than semi-transparent shaded
+    band (spec preference, though pygal has limited fill capabilities)
+  - All blue elements (logistic curve, CI bounds) use same color making them less
+    visually distinct
+  - Legend uses square markers for all items instead of line symbols for line-based
+    elements
+  image_description: 'The plot displays a logistic regression visualization for credit
+    approval based on credit score. It shows a characteristic S-shaped (sigmoid) curve
+    in blue representing the fitted logistic model. The x-axis displays "Credit Score"
+    ranging from 300 to 850, and the y-axis shows "Probability of Approval" from 0
+    to 1. Red dots represent rejected applicants (Class 0), clustered near y=0 with
+    jittering, while yellow/gold dots represent approved applicants (Class 1), clustered
+    near y=1. Two dashed blue lines show the 95% confidence interval bounds around
+    the main curve. A horizontal dashed gray line at y=0.5 indicates the decision
+    threshold. The legend at the bottom shows six items: Logistic Fit, 95% CI Upper,
+    95% CI Lower, Threshold (p=0.5), Rejected (0), and Approved (1). The title follows
+    the correct format: "logistic-regression · pygal · pyplots.ai".'
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+      - id: VQ-01
+        name: Text Legibility
+        score: 9
+        max: 10
+        passed: true
+        comment: All text readable, good font sizes for large canvas
+      - id: VQ-02
+        name: No Overlap
+        score: 8
+        max: 8
+        passed: true
+        comment: No overlapping text elements
+      - id: VQ-03
+        name: Element Visibility
+        score: 7
+        max: 8
+        passed: true
+        comment: Markers visible with good size, though same color for all blue elements
+          makes CI bounds less distinct
+      - id: VQ-04
+        name: Color Accessibility
+        score: 4
+        max: 5
+        passed: true
+        comment: Red/yellow distinction is clear, but using same blue for all lines
+          reduces differentiation
+      - id: VQ-05
+        name: Layout Balance
+        score: 5
+        max: 5
+        passed: true
+        comment: Good use of canvas, balanced margins
+      - id: VQ-06
+        name: Axis Labels
+        score: 2
+        max: 2
+        passed: true
+        comment: Clear descriptive labels Credit Score and Probability of Approval
+      - id: VQ-07
+        name: Grid & Legend
+        score: 1
+        max: 2
+        passed: true
+        comment: Legend at bottom is functional but legend items use colored squares
+          rather than line symbols
+    spec_compliance:
+      score: 24
+      max: 25
+      items:
+      - id: SC-01
+        name: Plot Type
+        score: 8
+        max: 8
+        passed: true
+        comment: Correct logistic regression curve with sigmoid shape
+      - id: SC-02
+        name: Data Mapping
+        score: 5
+        max: 5
+        passed: true
+        comment: x=credit score, y=probability/binary outcome correctly mapped
+      - id: SC-03
+        name: Required Features
+        score: 4
+        max: 5
+        passed: true
+        comment: Has curve, CI, threshold line, jittered points; spec suggested semi-transparent
+          shading for CI but implementation uses dashed lines
+      - id: SC-04
+        name: Data Range
+        score: 3
+        max: 3
+        passed: true
+        comment: Axes show full data range appropriately
+      - id: SC-05
+        name: Legend Accuracy
+        score: 2
+        max: 2
+        passed: true
+        comment: Legend labels correctly identify all elements
+      - id: SC-06
+        name: Title Format
+        score: 2
+        max: 2
+        passed: true
+        comment: Correct format logistic-regression · pygal · pyplots.ai
+    data_quality:
+      score: 19
+      max: 20
+      items:
+      - id: DQ-01
+        name: Feature Coverage
+        score: 7
+        max: 8
+        passed: true
+        comment: Shows both classes, sigmoid transition, but density of rejected cases
+          at higher scores could be more balanced
+      - id: DQ-02
+        name: Realistic Context
+        score: 7
+        max: 7
+        passed: true
+        comment: Credit score approval is a real, neutral, comprehensible scenario
+      - id: DQ-03
+        name: Appropriate Scale
+        score: 5
+        max: 5
+        passed: true
+        comment: Credit scores 300-850 and probability 0-1 are realistic ranges
+    code_quality:
+      score: 9
+      max: 10
+      items:
+      - id: CQ-01
+        name: KISS Structure
+        score: 3
+        max: 3
+        passed: true
+        comment: Flat script structure, no functions/classes
+      - id: CQ-02
+        name: Reproducibility
+        score: 3
+        max: 3
+        passed: true
+        comment: np.random.seed(42) set
+      - id: CQ-03
+        name: Clean Imports
+        score: 2
+        max: 2
+        passed: true
+        comment: Only necessary imports used
+      - id: CQ-04
+        name: No Deprecated API
+        score: 1
+        max: 1
+        passed: true
+        comment: Uses current pygal API
+      - id: CQ-05
+        name: Output Correct
+        score: 0
+        max: 1
+        passed: false
+        comment: Saves both plot.png and plot.html which is correct for pygal
+    library_features:
+      score: 3
+      max: 5
+      items:
+      - id: LF-01
+        name: Distinctive Features
+        score: 3
+        max: 5
+        passed: true
+        comment: Uses XY chart, custom Style, stroke_style, but CI as dashed lines
+          rather than filled area (pygal limitation)
+  verdict: APPROVED
+impl_tags:
+  dependencies: []
+  techniques:
+  - html-export
+  patterns:
+  - data-generation
+  - iteration-over-groups
+  dataprep:
+  - normalization
+  - regression
+  styling:
+  - alpha-blending
+  - grid-styling