|
| 1 | +""" pyplots.ai |
| 2 | +ma-differential-expression: MA Plot for Differential Expression |
| 3 | +Library: plotly 6.6.0 | Python 3.14.3 |
| 4 | +Quality: 90/100 | Created: 2026-03-20 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import plotly.graph_objects as go |
| 9 | +from scipy.signal import savgol_filter |
| 10 | + |
| 11 | + |
| 12 | +# Data - Simulated RNA-seq differential expression results |
| 13 | +np.random.seed(42) |
| 14 | +n_genes = 15000 |
| 15 | + |
| 16 | +# Mean expression (A values) - log2 scale, bimodal distribution |
| 17 | +mean_expression = np.concatenate([np.random.normal(4, 1.5, 5000), np.random.normal(9, 2.5, 10000)]) |
| 18 | +mean_expression = np.clip(mean_expression, 0.5, 16) |
| 19 | + |
| 20 | +# Log fold change (M values) - most genes near zero, some DE genes |
| 21 | +log_fold_change = np.random.normal(0, 0.3, n_genes) |
| 22 | + |
| 23 | +# Add truly differentially expressed genes (~8%) |
| 24 | +n_de = 1200 |
| 25 | +de_indices = np.random.choice(n_genes, n_de, replace=False) |
| 26 | +log_fold_change[de_indices] = np.random.choice([-1, 1], n_de) * (np.random.exponential(0.8, n_de) + 1.0) |
| 27 | + |
| 28 | +# Expression-dependent variance (higher variance at low expression) |
| 29 | +noise_scale = 0.5 / (1 + mean_expression * 0.3) |
| 30 | +log_fold_change += np.random.normal(0, noise_scale) |
| 31 | + |
| 32 | +# Significance (adjusted p-value < 0.05) |
| 33 | +significant = np.abs(log_fold_change) > 1.0 |
| 34 | +significant &= mean_expression > 2.0 |
| 35 | +significant[de_indices] = np.abs(log_fold_change[de_indices]) > 0.8 |
| 36 | + |
| 37 | +# Gene names for top DE genes |
| 38 | +gene_names = [f"Gene{i}" for i in range(n_genes)] |
| 39 | +top_gene_names = ["BRCA1", "TP53", "MYC", "EGFR", "VEGFA", "IL6", "TNF", "STAT3", "KRAS", "CDK2"] |
| 40 | +top_de = np.argsort(np.abs(log_fold_change[significant]))[-10:] |
| 41 | +sig_indices = np.where(significant)[0] |
| 42 | +for i, name in zip(top_de, top_gene_names, strict=False): |
| 43 | + gene_names[sig_indices[i]] = name |
| 44 | + |
| 45 | +# Separate data |
| 46 | +non_sig_mask = ~significant |
| 47 | +sig_mask = significant |
| 48 | + |
| 49 | +# LOESS-like smoothing curve |
| 50 | +sort_idx = np.argsort(mean_expression) |
| 51 | +sorted_expr = mean_expression[sort_idx] |
| 52 | +sorted_lfc = log_fold_change[sort_idx] |
| 53 | +window = min(501, len(sorted_lfc) // 4 * 2 + 1) |
| 54 | +smoothed = savgol_filter(sorted_lfc, window, 3) |
| 55 | + |
| 56 | +# Color palette - refined, distinctive |
| 57 | +COLOR_NONSIG = "#B0B8C4" |
| 58 | +COLOR_SIG = "#0E8A7A" |
| 59 | +COLOR_LOESS = "#306998" |
| 60 | +COLOR_REFLINE = "#5A5A6E" |
| 61 | +COLOR_THRESHOLD = "#9E9EAE" |
| 62 | +COLOR_LABEL = "#2B2B3D" |
| 63 | + |
| 64 | +# Plot |
| 65 | +fig = go.Figure() |
| 66 | + |
| 67 | +# Non-significant genes |
| 68 | +fig.add_trace( |
| 69 | + go.Scatter( |
| 70 | + x=mean_expression[non_sig_mask], |
| 71 | + y=log_fold_change[non_sig_mask], |
| 72 | + mode="markers", |
| 73 | + marker={"size": 8, "color": COLOR_NONSIG, "opacity": 0.25, "line": {"width": 0}}, |
| 74 | + name="Not significant", |
| 75 | + hovertemplate="A: %{x:.1f}<br>M: %{y:.2f}<extra>Not significant</extra>", |
| 76 | + ) |
| 77 | +) |
| 78 | + |
| 79 | +# Significant genes |
| 80 | +fig.add_trace( |
| 81 | + go.Scatter( |
| 82 | + x=mean_expression[sig_mask], |
| 83 | + y=log_fold_change[sig_mask], |
| 84 | + mode="markers", |
| 85 | + marker={"size": 10, "color": COLOR_SIG, "opacity": 0.6, "line": {"width": 0.5, "color": "white"}}, |
| 86 | + name="Significant (padj < 0.05)", |
| 87 | + hovertemplate="A: %{x:.1f}<br>M: %{y:.2f}<extra>Significant</extra>", |
| 88 | + ) |
| 89 | +) |
| 90 | + |
| 91 | +# Smoothing curve |
| 92 | +fig.add_trace( |
| 93 | + go.Scatter( |
| 94 | + x=sorted_expr, |
| 95 | + y=smoothed, |
| 96 | + mode="lines", |
| 97 | + line={"color": COLOR_LOESS, "width": 3.5}, |
| 98 | + name="LOESS trend", |
| 99 | + hoverinfo="skip", |
| 100 | + ) |
| 101 | +) |
| 102 | + |
| 103 | +# Reference line at y=0 (distinct from LOESS) |
| 104 | +fig.add_hline(y=0, line={"color": COLOR_REFLINE, "width": 1.5, "dash": "dot"}) |
| 105 | + |
| 106 | +# Fold-change threshold lines with annotations |
| 107 | +fig.add_hline(y=1, line={"color": COLOR_THRESHOLD, "width": 1.5, "dash": "dash"}) |
| 108 | +fig.add_hline(y=-1, line={"color": COLOR_THRESHOLD, "width": 1.5, "dash": "dash"}) |
| 109 | + |
| 110 | +fig.add_annotation( |
| 111 | + x=15.5, |
| 112 | + y=1, |
| 113 | + text="2-fold up", |
| 114 | + showarrow=False, |
| 115 | + font={"size": 14, "color": COLOR_THRESHOLD}, |
| 116 | + xanchor="right", |
| 117 | + yshift=14, |
| 118 | +) |
| 119 | +fig.add_annotation( |
| 120 | + x=15.5, |
| 121 | + y=-1, |
| 122 | + text="2-fold down", |
| 123 | + showarrow=False, |
| 124 | + font={"size": 14, "color": COLOR_THRESHOLD}, |
| 125 | + xanchor="right", |
| 126 | + yshift=-14, |
| 127 | +) |
| 128 | + |
| 129 | +# Label top DE genes |
| 130 | +label_indices = [sig_indices[i] for i in top_de] |
| 131 | +fig.add_trace( |
| 132 | + go.Scatter( |
| 133 | + x=mean_expression[label_indices], |
| 134 | + y=log_fold_change[label_indices], |
| 135 | + mode="text", |
| 136 | + text=[gene_names[i] for i in label_indices], |
| 137 | + textposition="top center", |
| 138 | + textfont={"size": 16, "color": COLOR_LABEL, "family": "Arial Black, sans-serif"}, |
| 139 | + showlegend=False, |
| 140 | + hoverinfo="skip", |
| 141 | + ) |
| 142 | +) |
| 143 | + |
| 144 | +# Layout |
| 145 | +fig.update_layout( |
| 146 | + title={ |
| 147 | + "text": "ma-differential-expression · plotly · pyplots.ai", |
| 148 | + "font": {"size": 28, "color": "#2B2B3D", "family": "Arial, sans-serif"}, |
| 149 | + "x": 0.5, |
| 150 | + "xanchor": "center", |
| 151 | + }, |
| 152 | + xaxis={ |
| 153 | + "title": {"text": "Mean Expression (A)", "font": {"size": 22, "color": "#3A3A4A"}}, |
| 154 | + "tickfont": {"size": 18, "color": "#4A4A5A"}, |
| 155 | + "showgrid": False, |
| 156 | + "zeroline": False, |
| 157 | + }, |
| 158 | + yaxis={ |
| 159 | + "title": {"text": "Log₂ Fold Change (M)", "font": {"size": 22, "color": "#3A3A4A"}}, |
| 160 | + "tickfont": {"size": 18, "color": "#4A4A5A"}, |
| 161 | + "gridcolor": "rgba(0,0,0,0.06)", |
| 162 | + "showgrid": True, |
| 163 | + "zeroline": False, |
| 164 | + }, |
| 165 | + template="plotly_white", |
| 166 | + legend={ |
| 167 | + "font": {"size": 16, "color": "#3A3A4A"}, |
| 168 | + "x": 0.98, |
| 169 | + "y": 0.98, |
| 170 | + "xanchor": "right", |
| 171 | + "bgcolor": "rgba(255,255,255,0.85)", |
| 172 | + "bordercolor": "rgba(0,0,0,0.1)", |
| 173 | + "borderwidth": 1, |
| 174 | + }, |
| 175 | + margin={"l": 100, "r": 80, "t": 100, "b": 100}, |
| 176 | + plot_bgcolor="white", |
| 177 | +) |
| 178 | + |
| 179 | +# Save |
| 180 | +fig.write_image("plot.png", width=1600, height=900, scale=3) |
| 181 | +fig.write_html("plot.html", include_plotlyjs="cdn") |
0 commit comments