|
| 1 | +""" pyplots.ai |
| 2 | +volcano-basic: Volcano Plot for Statistical Significance |
| 3 | +Library: matplotlib 3.10.8 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import matplotlib.pyplot as plt |
| 8 | +import numpy as np |
| 9 | + |
| 10 | + |
| 11 | +# Data - simulated differential expression results |
| 12 | +np.random.seed(42) |
| 13 | +n_genes = 2000 |
| 14 | + |
| 15 | +# Generate log2 fold changes (centered around 0) |
| 16 | +log2_fc = np.random.normal(0, 1.5, n_genes) |
| 17 | + |
| 18 | +# Generate p-values (most non-significant, some significant) |
| 19 | +# Use exponential distribution for realistic p-value spread |
| 20 | +base_pvalues = np.random.exponential(0.3, n_genes) |
| 21 | +base_pvalues = np.clip(base_pvalues, 1e-50, 1.0) |
| 22 | + |
| 23 | +# Make genes with large fold changes more likely to be significant |
| 24 | +significance_boost = np.abs(log2_fc) / 3 |
| 25 | +pvalues = base_pvalues * np.exp(-significance_boost * 5) |
| 26 | +pvalues = np.clip(pvalues, 1e-50, 1.0) |
| 27 | + |
| 28 | +# Convert to -log10(p-value) |
| 29 | +neg_log10_pval = -np.log10(pvalues) |
| 30 | + |
| 31 | +# Significance thresholds |
| 32 | +pval_threshold = 1.3 # -log10(0.05) |
| 33 | +fc_threshold = 1.0 # log2(2) = 1 |
| 34 | + |
| 35 | +# Classify points |
| 36 | +sig_up = (neg_log10_pval > pval_threshold) & (log2_fc > fc_threshold) |
| 37 | +sig_down = (neg_log10_pval > pval_threshold) & (log2_fc < -fc_threshold) |
| 38 | +non_sig = ~sig_up & ~sig_down |
| 39 | + |
| 40 | +# Create figure |
| 41 | +fig, ax = plt.subplots(figsize=(16, 9)) |
| 42 | + |
| 43 | +# Plot non-significant points first (gray) |
| 44 | +ax.scatter( |
| 45 | + log2_fc[non_sig], neg_log10_pval[non_sig], c="#888888", s=50, alpha=0.5, label="Not significant", edgecolors="none" |
| 46 | +) |
| 47 | + |
| 48 | +# Plot significant down-regulated (blue - Python Blue) |
| 49 | +ax.scatter( |
| 50 | + log2_fc[sig_down], neg_log10_pval[sig_down], c="#306998", s=80, alpha=0.7, label="Down-regulated", edgecolors="none" |
| 51 | +) |
| 52 | + |
| 53 | +# Plot significant up-regulated (gold - Python Yellow) |
| 54 | +ax.scatter( |
| 55 | + log2_fc[sig_up], |
| 56 | + neg_log10_pval[sig_up], |
| 57 | + c="#FFD43B", |
| 58 | + s=80, |
| 59 | + alpha=0.7, |
| 60 | + label="Up-regulated", |
| 61 | + edgecolors="white", |
| 62 | + linewidths=0.5, |
| 63 | +) |
| 64 | + |
| 65 | +# Add threshold lines |
| 66 | +ax.axhline(y=pval_threshold, color="#333333", linestyle="--", linewidth=2, alpha=0.7) |
| 67 | +ax.axvline(x=fc_threshold, color="#333333", linestyle="--", linewidth=2, alpha=0.7) |
| 68 | +ax.axvline(x=-fc_threshold, color="#333333", linestyle="--", linewidth=2, alpha=0.7) |
| 69 | + |
| 70 | +# Label top significant genes |
| 71 | +top_up_idx = np.where(sig_up)[0] |
| 72 | +if len(top_up_idx) > 0: |
| 73 | + top_up_scores = neg_log10_pval[top_up_idx] + np.abs(log2_fc[top_up_idx]) |
| 74 | + top_up = top_up_idx[np.argsort(top_up_scores)[-5:]] |
| 75 | + for idx in top_up: |
| 76 | + ax.annotate( |
| 77 | + f"Gene_{idx}", |
| 78 | + (log2_fc[idx], neg_log10_pval[idx]), |
| 79 | + fontsize=12, |
| 80 | + ha="left", |
| 81 | + va="bottom", |
| 82 | + xytext=(5, 5), |
| 83 | + textcoords="offset points", |
| 84 | + ) |
| 85 | + |
| 86 | +top_down_idx = np.where(sig_down)[0] |
| 87 | +if len(top_down_idx) > 0: |
| 88 | + top_down_scores = neg_log10_pval[top_down_idx] + np.abs(log2_fc[top_down_idx]) |
| 89 | + top_down = top_down_idx[np.argsort(top_down_scores)[-5:]] |
| 90 | + for idx in top_down: |
| 91 | + ax.annotate( |
| 92 | + f"Gene_{idx}", |
| 93 | + (log2_fc[idx], neg_log10_pval[idx]), |
| 94 | + fontsize=12, |
| 95 | + ha="right", |
| 96 | + va="bottom", |
| 97 | + xytext=(-5, 5), |
| 98 | + textcoords="offset points", |
| 99 | + ) |
| 100 | + |
| 101 | +# Styling |
| 102 | +ax.set_xlabel("Log₂ Fold Change", fontsize=20) |
| 103 | +ax.set_ylabel("-Log₁₀ (p-value)", fontsize=20) |
| 104 | +ax.set_title("volcano-basic · matplotlib · pyplots.ai", fontsize=24) |
| 105 | +ax.tick_params(axis="both", labelsize=16) |
| 106 | +ax.legend(fontsize=16, loc="upper right", framealpha=0.9) |
| 107 | +ax.grid(True, alpha=0.3, linestyle="--") |
| 108 | + |
| 109 | +# Set axis limits with padding |
| 110 | +x_max = max(abs(log2_fc.min()), abs(log2_fc.max())) * 1.1 |
| 111 | +ax.set_xlim(-x_max, x_max) |
| 112 | +ax.set_ylim(0, neg_log10_pval.max() * 1.1) |
| 113 | + |
| 114 | +plt.tight_layout() |
| 115 | +plt.savefig("plot.png", dpi=300, bbox_inches="tight") |
0 commit comments