|
| 1 | +"""pyplots.ai |
| 2 | +volcano-basic: Volcano Plot for Statistical Significance |
| 3 | +Library: pygal 3.1.0 | Python 3.13.11 |
| 4 | +Quality: 75/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pygal |
| 9 | +from pygal.style import Style |
| 10 | + |
| 11 | + |
| 12 | +# Data - Simulated differential gene expression results |
| 13 | +np.random.seed(42) |
| 14 | +n_genes = 500 |
| 15 | + |
| 16 | +# Gene names for annotations |
| 17 | +gene_names = [f"GENE{i:03d}" for i in range(n_genes)] |
| 18 | + |
| 19 | +# Generate fold changes (mostly near zero, some with larger effects) |
| 20 | +log2_fc = np.concatenate( |
| 21 | + [ |
| 22 | + np.random.normal(0, 0.5, 400), # Non-significant genes |
| 23 | + np.random.normal(2.5, 0.5, 50), # Up-regulated |
| 24 | + np.random.normal(-2.5, 0.5, 50), # Down-regulated |
| 25 | + ] |
| 26 | +) |
| 27 | + |
| 28 | +# Generate p-values (correlated with effect size) |
| 29 | +base_pval = np.random.uniform(0.001, 0.9, n_genes) |
| 30 | +# Significant genes have lower p-values |
| 31 | +base_pval[400:] = np.random.uniform(0.0001, 0.01, 100) |
| 32 | +neg_log10_pval = -np.log10(base_pval) |
| 33 | + |
| 34 | +# Classification thresholds |
| 35 | +fc_threshold = 1.0 # log2 fold change threshold (2-fold) |
| 36 | +pval_threshold = 1.3 # -log10(0.05) ≈ 1.3 |
| 37 | + |
| 38 | +# Classify genes |
| 39 | +up_regulated = (log2_fc > fc_threshold) & (neg_log10_pval > pval_threshold) |
| 40 | +down_regulated = (log2_fc < -fc_threshold) & (neg_log10_pval > pval_threshold) |
| 41 | +not_significant = ~(up_regulated | down_regulated) |
| 42 | + |
| 43 | +# Colorblind-safe palette: gray, orange, blue, dark gray for threshold lines |
| 44 | +custom_style = Style( |
| 45 | + background="white", |
| 46 | + plot_background="white", |
| 47 | + foreground="#333333", |
| 48 | + foreground_strong="#333333", |
| 49 | + foreground_subtle="#666666", |
| 50 | + colors=("#888888", "#E69F00", "#0072B2", "#555555", "#555555", "#555555"), # Gray, Orange, Blue, Dark gray x3 |
| 51 | + title_font_size=56, |
| 52 | + label_font_size=36, |
| 53 | + major_label_font_size=32, |
| 54 | + legend_font_size=40, |
| 55 | + value_font_size=24, |
| 56 | + stroke_width=2, |
| 57 | + opacity=0.7, |
| 58 | + opacity_hover=1.0, |
| 59 | + font_family="DejaVu Sans", |
| 60 | +) |
| 61 | + |
| 62 | +# Calculate axis ranges - tight fitting to data (y starts at 0) |
| 63 | +y_max = float(np.ceil(max(neg_log10_pval) + 0.3)) |
| 64 | +x_min = float(np.floor(min(log2_fc) - 0.3)) |
| 65 | +x_max = float(np.ceil(max(log2_fc) + 0.3)) |
| 66 | + |
| 67 | +# Generate y-axis labels from 0 to max (positive only) |
| 68 | +y_labels = [i * 0.5 for i in range(int(y_max / 0.5) + 2)] |
| 69 | + |
| 70 | +# Create XY chart (scatter plot) |
| 71 | +chart = pygal.XY( |
| 72 | + width=4800, |
| 73 | + height=2700, |
| 74 | + style=custom_style, |
| 75 | + title="volcano-basic · pygal · pyplots.ai", |
| 76 | + x_title="Log₂ Fold Change", |
| 77 | + y_title="-Log₁₀(p-value)", |
| 78 | + show_legend=True, |
| 79 | + legend_at_bottom=True, |
| 80 | + legend_box_size=32, |
| 81 | + dots_size=10, |
| 82 | + stroke=False, |
| 83 | + show_x_guides=True, |
| 84 | + show_y_guides=True, |
| 85 | + x_label_rotation=0, |
| 86 | + range=(x_min, x_max), |
| 87 | + include_x_axis=True, |
| 88 | + explicit_size=True, |
| 89 | + truncate_legend=-1, |
| 90 | + spacing=40, |
| 91 | + margin=30, |
| 92 | + margin_bottom=120, |
| 93 | +) |
| 94 | + |
| 95 | +# Set y-axis labels to start from 0 (all data is positive) |
| 96 | +chart.y_labels = y_labels |
| 97 | + |
| 98 | +# Prepare data points for each category with gene labels for tooltips |
| 99 | +not_sig_points = [ |
| 100 | + {"value": (float(log2_fc[i]), float(neg_log10_pval[i])), "label": gene_names[i]} |
| 101 | + for i in range(n_genes) |
| 102 | + if not_significant[i] |
| 103 | +] |
| 104 | +up_points = [ |
| 105 | + {"value": (float(log2_fc[i]), float(neg_log10_pval[i])), "label": gene_names[i]} |
| 106 | + for i in range(n_genes) |
| 107 | + if up_regulated[i] |
| 108 | +] |
| 109 | +down_points = [ |
| 110 | + {"value": (float(log2_fc[i]), float(neg_log10_pval[i])), "label": gene_names[i]} |
| 111 | + for i in range(n_genes) |
| 112 | + if down_regulated[i] |
| 113 | +] |
| 114 | + |
| 115 | +# Add data series |
| 116 | +chart.add("Not Significant", not_sig_points) |
| 117 | +chart.add("Up-regulated", up_points) |
| 118 | +chart.add("Down-regulated", down_points) |
| 119 | + |
| 120 | +# Add threshold lines as line series (dashed lines for significance cutoffs) |
| 121 | +# Horizontal line at p-value threshold (y = 1.3) |
| 122 | +h_line_points = [(x_min, pval_threshold), (x_max, pval_threshold)] |
| 123 | +chart.add("p=0.05", h_line_points, stroke=True, show_dots=False, stroke_style={"width": 4, "dasharray": "12, 6"}) |
| 124 | + |
| 125 | +# Vertical lines at fold change thresholds (x = ±1, representing 2-fold change) |
| 126 | +# Each vertical line as separate series to avoid diagonal connections |
| 127 | +v_line_neg = [(float(-fc_threshold), 0.0), (float(-fc_threshold), float(y_max))] |
| 128 | +v_line_pos = [(float(fc_threshold), 0.0), (float(fc_threshold), float(y_max))] |
| 129 | +chart.add("FC=-2", v_line_neg, stroke=True, show_dots=False, stroke_style={"width": 4, "dasharray": "12, 6"}) |
| 130 | +chart.add("FC=+2", v_line_pos, stroke=True, show_dots=False, stroke_style={"width": 4, "dasharray": "12, 6"}) |
| 131 | + |
| 132 | +# Save as PNG and HTML |
| 133 | +chart.render_to_png("plot.png") |
| 134 | +chart.render_to_file("plot.html") |
0 commit comments