|
| 1 | +""" pyplots.ai |
| 2 | +precision-recall: Precision-Recall Curve |
| 3 | +Library: bokeh 3.8.1 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +from bokeh.io import export_png |
| 9 | +from bokeh.models import ColumnDataSource, Legend |
| 10 | +from bokeh.plotting import figure |
| 11 | +from sklearn.datasets import make_classification |
| 12 | +from sklearn.linear_model import LogisticRegression |
| 13 | +from sklearn.metrics import average_precision_score, precision_recall_curve |
| 14 | +from sklearn.model_selection import train_test_split |
| 15 | +from sklearn.naive_bayes import GaussianNB |
| 16 | + |
| 17 | + |
| 18 | +# Data - Generate imbalanced classification dataset |
| 19 | +np.random.seed(42) |
| 20 | +X, y_true = make_classification( |
| 21 | + n_samples=2000, |
| 22 | + n_features=20, |
| 23 | + n_informative=10, |
| 24 | + n_redundant=5, |
| 25 | + n_classes=2, |
| 26 | + weights=[0.7, 0.3], # Imbalanced dataset |
| 27 | + random_state=42, |
| 28 | +) |
| 29 | + |
| 30 | +# Split into train and test for realistic evaluation |
| 31 | +X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.5, random_state=42, stratify=y_true) |
| 32 | + |
| 33 | +# Train two classifiers for comparison |
| 34 | +lr_model = LogisticRegression(random_state=42, max_iter=1000) |
| 35 | +nb_model = GaussianNB() |
| 36 | + |
| 37 | +lr_model.fit(X_train, y_train) |
| 38 | +nb_model.fit(X_train, y_train) |
| 39 | + |
| 40 | +# Get prediction probabilities on test set |
| 41 | +lr_scores = lr_model.predict_proba(X_test)[:, 1] |
| 42 | +nb_scores = nb_model.predict_proba(X_test)[:, 1] |
| 43 | + |
| 44 | +# Calculate precision-recall curves |
| 45 | +lr_precision, lr_recall, _ = precision_recall_curve(y_test, lr_scores) |
| 46 | +nb_precision, nb_recall, _ = precision_recall_curve(y_test, nb_scores) |
| 47 | + |
| 48 | +# Calculate Average Precision scores |
| 49 | +lr_ap = average_precision_score(y_test, lr_scores) |
| 50 | +nb_ap = average_precision_score(y_test, nb_scores) |
| 51 | + |
| 52 | +# Baseline (random classifier) - positive class ratio |
| 53 | +baseline = np.mean(y_test) |
| 54 | + |
| 55 | +# Create figure (16:9 aspect ratio at 4800x2700) |
| 56 | +p = figure( |
| 57 | + width=4800, |
| 58 | + height=2700, |
| 59 | + title="precision-recall · bokeh · pyplots.ai", |
| 60 | + x_axis_label="Recall", |
| 61 | + y_axis_label="Precision", |
| 62 | + x_range=(-0.02, 1.05), |
| 63 | + y_range=(0, 1.08), |
| 64 | +) |
| 65 | + |
| 66 | +# Create data sources for stepped lines |
| 67 | +lr_source = ColumnDataSource(data={"recall": lr_recall, "precision": lr_precision}) |
| 68 | +nb_source = ColumnDataSource(data={"recall": nb_recall, "precision": nb_precision}) |
| 69 | + |
| 70 | +# Plot Precision-Recall curves with step style |
| 71 | +lr_line = p.step(x="recall", y="precision", source=lr_source, line_width=5, color="#306998", alpha=0.9, mode="after") |
| 72 | + |
| 73 | +nb_line = p.step(x="recall", y="precision", source=nb_source, line_width=5, color="#FFD43B", alpha=0.9, mode="after") |
| 74 | + |
| 75 | +# Baseline reference line (random classifier) |
| 76 | +baseline_source = ColumnDataSource(data={"x": [0, 1], "y": [baseline, baseline]}) |
| 77 | +baseline_line = p.line( |
| 78 | + x="x", y="y", source=baseline_source, line_width=4, line_dash="dashed", color="#666666", alpha=0.8 |
| 79 | +) |
| 80 | + |
| 81 | +# Create legend with AP scores |
| 82 | +legend = Legend( |
| 83 | + items=[ |
| 84 | + (f"Logistic Regression (AP = {lr_ap:.3f})", [lr_line]), |
| 85 | + (f"Naive Bayes (AP = {nb_ap:.3f})", [nb_line]), |
| 86 | + (f"Random Classifier (baseline = {baseline:.2f})", [baseline_line]), |
| 87 | + ], |
| 88 | + location="top_right", |
| 89 | + label_text_font_size="28pt", |
| 90 | + glyph_width=50, |
| 91 | + glyph_height=30, |
| 92 | + spacing=20, |
| 93 | + padding=25, |
| 94 | + background_fill_alpha=0.9, |
| 95 | + background_fill_color="white", |
| 96 | + border_line_color="#cccccc", |
| 97 | + border_line_width=2, |
| 98 | +) |
| 99 | + |
| 100 | +p.add_layout(legend) |
| 101 | + |
| 102 | +# Style the plot - scaled for 4800x2700 |
| 103 | +p.title.text_font_size = "36pt" |
| 104 | +p.title.align = "center" |
| 105 | +p.xaxis.axis_label_text_font_size = "28pt" |
| 106 | +p.yaxis.axis_label_text_font_size = "28pt" |
| 107 | +p.xaxis.major_label_text_font_size = "22pt" |
| 108 | +p.yaxis.major_label_text_font_size = "22pt" |
| 109 | + |
| 110 | +# Grid styling |
| 111 | +p.xgrid.grid_line_alpha = 0.3 |
| 112 | +p.ygrid.grid_line_alpha = 0.3 |
| 113 | +p.xgrid.grid_line_dash = "dashed" |
| 114 | +p.ygrid.grid_line_dash = "dashed" |
| 115 | + |
| 116 | +# Axis styling |
| 117 | +p.xaxis.axis_line_width = 3 |
| 118 | +p.yaxis.axis_line_width = 3 |
| 119 | +p.xaxis.major_tick_line_width = 3 |
| 120 | +p.yaxis.major_tick_line_width = 3 |
| 121 | +p.xaxis.minor_tick_line_width = 2 |
| 122 | +p.yaxis.minor_tick_line_width = 2 |
| 123 | + |
| 124 | +# Background |
| 125 | +p.background_fill_color = "#fafafa" |
| 126 | +p.border_fill_color = "white" |
| 127 | + |
| 128 | +# Outline |
| 129 | +p.outline_line_width = 2 |
| 130 | +p.outline_line_color = "#cccccc" |
| 131 | + |
| 132 | +# Min border for padding |
| 133 | +p.min_border_left = 100 |
| 134 | +p.min_border_right = 100 |
| 135 | +p.min_border_top = 80 |
| 136 | +p.min_border_bottom = 100 |
| 137 | + |
| 138 | +# Save as PNG |
| 139 | +export_png(p, filename="plot.png") |
0 commit comments