|
| 1 | +""" pyplots.ai |
| 2 | +roc-curve: ROC Curve with AUC |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-26 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from lets_plot import * # noqa: F403 |
| 10 | +from lets_plot import ggsave |
| 11 | + |
| 12 | +LetsPlot.setup_html() # noqa: F405 |
| 13 | + |
| 14 | +# Data - Generate ROC curve data for multiple classifiers |
| 15 | +np.random.seed(42) |
| 16 | + |
| 17 | +# Generate synthetic classification scores |
| 18 | +n_samples = 1000 |
| 19 | +y_true = np.concatenate([np.zeros(500), np.ones(500)]) |
| 20 | + |
| 21 | +# Model A - Good classifier (AUC ~0.92) |
| 22 | +scores_a = np.concatenate( |
| 23 | + [ |
| 24 | + np.random.beta(2, 5, 500), # Negative class |
| 25 | + np.random.beta(5, 2, 500), # Positive class |
| 26 | + ] |
| 27 | +) |
| 28 | + |
| 29 | +# Model B - Moderate classifier (AUC ~0.78) |
| 30 | +scores_b = np.concatenate( |
| 31 | + [ |
| 32 | + np.random.beta(2, 3, 500), # Negative class |
| 33 | + np.random.beta(3, 2, 500), # Positive class |
| 34 | + ] |
| 35 | +) |
| 36 | + |
| 37 | + |
| 38 | +# Calculate ROC curve points |
| 39 | +def compute_roc(y_true, scores): |
| 40 | + thresholds = np.linspace(0, 1, 200) |
| 41 | + tpr_list = [] |
| 42 | + fpr_list = [] |
| 43 | + for thresh in thresholds: |
| 44 | + predictions = (scores >= thresh).astype(int) |
| 45 | + tp = np.sum((predictions == 1) & (y_true == 1)) |
| 46 | + fn = np.sum((predictions == 0) & (y_true == 1)) |
| 47 | + fp = np.sum((predictions == 1) & (y_true == 0)) |
| 48 | + tn = np.sum((predictions == 0) & (y_true == 0)) |
| 49 | + tpr = tp / (tp + fn) if (tp + fn) > 0 else 0 |
| 50 | + fpr = fp / (fp + tn) if (fp + tn) > 0 else 0 |
| 51 | + tpr_list.append(tpr) |
| 52 | + fpr_list.append(fpr) |
| 53 | + return np.array(fpr_list), np.array(tpr_list) |
| 54 | + |
| 55 | + |
| 56 | +# Compute ROC curves |
| 57 | +fpr_a, tpr_a = compute_roc(y_true, scores_a) |
| 58 | +fpr_b, tpr_b = compute_roc(y_true, scores_b) |
| 59 | + |
| 60 | +# Calculate AUC using trapezoidal rule |
| 61 | +auc_a = -np.trapezoid(tpr_a, fpr_a) |
| 62 | +auc_b = -np.trapezoid(tpr_b, fpr_b) |
| 63 | + |
| 64 | +# Create DataFrames for plotting |
| 65 | +df_model_a = pd.DataFrame({"fpr": fpr_a, "tpr": tpr_a, "model": f"Model A (AUC = {auc_a:.2f})"}) |
| 66 | + |
| 67 | +df_model_b = pd.DataFrame({"fpr": fpr_b, "tpr": tpr_b, "model": f"Model B (AUC = {auc_b:.2f})"}) |
| 68 | + |
| 69 | +# Random classifier reference line |
| 70 | +df_random = pd.DataFrame({"fpr": [0, 1], "tpr": [0, 1], "model": "Random (AUC = 0.50)"}) |
| 71 | + |
| 72 | +# Combine all data |
| 73 | +df = pd.concat([df_model_a, df_model_b, df_random], ignore_index=True) |
| 74 | + |
| 75 | +# Plot |
| 76 | +plot = ( |
| 77 | + ggplot(df, aes(x="fpr", y="tpr", color="model")) |
| 78 | + + geom_line(size=2) |
| 79 | + + scale_color_manual(values=["#306998", "#FFD43B", "#888888"]) |
| 80 | + + scale_x_continuous(limits=[0, 1]) |
| 81 | + + scale_y_continuous(limits=[0, 1]) |
| 82 | + + coord_fixed(ratio=1) |
| 83 | + + labs( |
| 84 | + x="False Positive Rate", y="True Positive Rate", title="roc-curve · letsplot · pyplots.ai", color="Classifier" |
| 85 | + ) |
| 86 | + + theme_minimal() |
| 87 | + + theme( |
| 88 | + plot_title=element_text(size=24), |
| 89 | + axis_title=element_text(size=20), |
| 90 | + axis_text=element_text(size=16), |
| 91 | + legend_text=element_text(size=16), |
| 92 | + legend_title=element_text(size=18), |
| 93 | + legend_position="bottom", |
| 94 | + ) |
| 95 | + + ggsize(1600, 900) |
| 96 | +) |
| 97 | + |
| 98 | +# Save as PNG (scale 3x = 4800 x 2700 px) and HTML |
| 99 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 100 | +ggsave(plot, "plot.html", path=".") |
0 commit comments