Skip to content

Commit b82108e

Browse files
feat(pygal): implement ma-differential-expression (#5081)
## Implementation: `ma-differential-expression` - pygal Implements the **pygal** version of `ma-differential-expression`. **File:** `plots/ma-differential-expression/implementations/pygal.py` **Parent Issue:** #4420 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/23339077886)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 397bf36 commit b82108e

2 files changed

Lines changed: 448 additions & 0 deletions

File tree

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
""" pyplots.ai
2+
ma-differential-expression: MA Plot for Differential Expression
3+
Library: pygal 3.1.0 | Python 3.14.3
4+
Quality: 84/100 | Created: 2026-03-20
5+
"""
6+
7+
import numpy as np
8+
import pygal
9+
from pygal.style import Style
10+
11+
12+
# Data - Simulated RNA-seq differential expression results
13+
np.random.seed(42)
14+
n_genes = 15000
15+
16+
# Mean expression (A values) - log2 scale, typical RNA-seq range
17+
mean_expression = np.random.exponential(scale=3, size=n_genes) + 1
18+
19+
# Log fold change (M values) - most genes near zero, some truly differentially expressed
20+
log_fold_change = np.random.normal(0, 0.3, n_genes)
21+
22+
# Add truly differentially expressed genes (~8% upregulated, ~7% downregulated)
23+
n_up = 1200
24+
n_down = 1050
25+
up_idx = np.random.choice(n_genes, n_up, replace=False)
26+
remaining = np.setdiff1d(np.arange(n_genes), up_idx)
27+
down_idx = np.random.choice(remaining, n_down, replace=False)
28+
29+
log_fold_change[up_idx] = np.random.normal(2.5, 0.8, n_up)
30+
log_fold_change[down_idx] = np.random.normal(-2.2, 0.7, n_down)
31+
32+
# Simulate p-values (significant for DE genes, noise for others)
33+
p_values = np.ones(n_genes)
34+
p_values[up_idx] = 10 ** (-np.random.uniform(2, 10, n_up))
35+
p_values[down_idx] = 10 ** (-np.random.uniform(2, 10, n_down))
36+
noise_idx = np.setdiff1d(np.arange(n_genes), np.concatenate([up_idx, down_idx]))
37+
p_values[noise_idx] = np.random.uniform(0.01, 1.0, len(noise_idx))
38+
39+
# Significance threshold
40+
significant = p_values < 0.05
41+
42+
# Gene names for top DE genes
43+
gene_names = [f"Gene{i}" for i in range(n_genes)]
44+
top_genes = ["BRCA1", "TP53", "MYC", "EGFR", "KRAS", "PTEN", "CDK2", "RB1", "AKT1", "VEGFA"]
45+
top_idx = np.argsort(p_values)[: len(top_genes)]
46+
for i, name in zip(top_idx, top_genes, strict=False):
47+
gene_names[i] = name
48+
49+
# LOESS-like smoothing curve (binned moving average with additional smoothing)
50+
sort_order = np.argsort(mean_expression)
51+
sorted_a = mean_expression[sort_order]
52+
sorted_m = log_fold_change[sort_order]
53+
54+
n_bins = 30
55+
bin_edges = np.percentile(sorted_a, np.linspace(0, 100, n_bins + 1))
56+
raw_x = []
57+
raw_y = []
58+
for b in range(n_bins):
59+
mask = (sorted_a >= bin_edges[b]) & (sorted_a < bin_edges[b + 1])
60+
if mask.sum() > 20:
61+
raw_x.append(float(np.median(sorted_a[mask])))
62+
raw_y.append(float(np.mean(sorted_m[mask])))
63+
64+
# Apply moving average smoothing
65+
smooth_y = np.array(raw_y)
66+
for _ in range(4):
67+
smoothed = np.copy(smooth_y)
68+
for j in range(1, len(smoothed) - 1):
69+
smoothed[j] = (smooth_y[j - 1] + smooth_y[j] + smooth_y[j + 1]) / 3
70+
smooth_y = smoothed
71+
smooth_x = raw_x
72+
73+
# Refined style - publication-quality aesthetics
74+
custom_style = Style(
75+
background="#FFFFFF",
76+
plot_background="#FAFBFC",
77+
foreground="#34495E",
78+
foreground_strong="#2C3E50",
79+
foreground_subtle="#E8ECF0",
80+
colors=(
81+
"#B0C4DE", # non-significant: light steel blue (muted background)
82+
"#E74C3C", # upregulated: vibrant red
83+
"#2980B9", # downregulated: ocean blue
84+
"#2C3E50", # M=0 line: charcoal
85+
"#7F8C8D", # +1 threshold: warm gray
86+
"#7F8C8D", # -1 threshold: warm gray
87+
"#E67E22", # LOESS curve: bold orange
88+
"#1A1A2E", # top DE genes: near-black
89+
),
90+
title_font_size=52,
91+
label_font_size=40,
92+
major_label_font_size=38,
93+
legend_font_size=28,
94+
value_font_size=22,
95+
tooltip_font_size=26,
96+
stroke_width=2,
97+
opacity=0.6,
98+
opacity_hover=0.95,
99+
)
100+
101+
# Subsample for performance (pygal renders each point as SVG element)
102+
np.random.seed(42)
103+
sig_mask = significant
104+
nonsig_mask = ~significant
105+
106+
sig_indices = np.where(sig_mask)[0]
107+
nonsig_indices = np.where(nonsig_mask)[0]
108+
n_nonsig_display = 1800
109+
nonsig_sample = np.random.choice(nonsig_indices, min(n_nonsig_display, len(nonsig_indices)), replace=False)
110+
111+
# Split significant into up/down for visual storytelling
112+
sig_up_points = []
113+
sig_down_points = []
114+
nonsig_points = []
115+
116+
for i in nonsig_sample:
117+
nonsig_points.append(
118+
{
119+
"value": (round(float(mean_expression[i]), 2), round(float(log_fold_change[i]), 2)),
120+
"label": f"{gene_names[i]} | A={mean_expression[i]:.1f}, M={log_fold_change[i]:.2f}, p={p_values[i]:.2e}",
121+
}
122+
)
123+
124+
top_idx_set = set(top_idx.tolist())
125+
for i in sig_indices:
126+
if i in top_idx_set:
127+
continue
128+
point = {
129+
"value": (round(float(mean_expression[i]), 2), round(float(log_fold_change[i]), 2)),
130+
"label": f"{gene_names[i]} | A={mean_expression[i]:.1f}, M={log_fold_change[i]:.2f}, p={p_values[i]:.2e}",
131+
}
132+
if log_fold_change[i] > 0:
133+
sig_up_points.append(point)
134+
else:
135+
sig_down_points.append(point)
136+
137+
# Top 10 most significant genes with star markers in tooltips
138+
labeled_points = []
139+
for i in top_idx:
140+
labeled_points.append(
141+
{
142+
"value": (round(float(mean_expression[i]), 2), round(float(log_fold_change[i]), 2)),
143+
"label": f"\u2605 {gene_names[i]} | A={mean_expression[i]:.1f}, M={log_fold_change[i]:.2f}, p={p_values[i]:.2e}",
144+
}
145+
)
146+
147+
# Reference line endpoints
148+
x_min = 0
149+
x_max = float(np.percentile(mean_expression, 99.5))
150+
151+
# Create XY chart
152+
chart = pygal.XY(
153+
width=4800,
154+
height=2700,
155+
style=custom_style,
156+
title="ma-differential-expression \u00b7 pygal \u00b7 pyplots.ai",
157+
x_title="Mean Expression (A)",
158+
y_title="Log\u2082 Fold Change (M)",
159+
show_legend=True,
160+
legend_at_bottom=True,
161+
legend_at_bottom_columns=8,
162+
legend_box_size=22,
163+
dots_size=5,
164+
stroke=False,
165+
show_x_guides=False,
166+
show_y_guides=False,
167+
truncate_legend=-1,
168+
print_values=False,
169+
dynamic_print_values=True,
170+
js=[],
171+
x_label_rotation=0,
172+
margin_bottom=80,
173+
)
174+
175+
# Non-significant genes - small muted dots for background density
176+
chart.add("Not Significant", nonsig_points, dots_size=4)
177+
178+
# Upregulated significant - vibrant red
179+
chart.add("Upregulated (p<0.05)", sig_up_points, dots_size=7)
180+
181+
# Downregulated significant - ocean blue
182+
chart.add("Downregulated (p<0.05)", sig_down_points, dots_size=7)
183+
184+
# M = 0 line (no change)
185+
chart.add("M = 0", [(x_min, 0), (x_max, 0)], stroke=True, show_dots=False, stroke_style={"width": 5})
186+
187+
# M = +1 threshold (2-fold up)
188+
chart.add(
189+
"+2-fold", [(x_min, 1), (x_max, 1)], stroke=True, show_dots=False, stroke_style={"width": 3, "dasharray": "14, 8"}
190+
)
191+
192+
# M = -1 threshold (2-fold down)
193+
chart.add(
194+
"\u22122-fold",
195+
[(x_min, -1), (x_max, -1)],
196+
stroke=True,
197+
show_dots=False,
198+
stroke_style={"width": 3, "dasharray": "14, 8"},
199+
)
200+
201+
# LOESS smoothing curve - bold orange
202+
chart.add(
203+
"LOESS trend",
204+
[(round(x, 2), round(y, 3)) for x, y in zip(smooth_x, smooth_y, strict=False)],
205+
stroke=True,
206+
show_dots=True,
207+
dots_size=5,
208+
stroke_style={"width": 7},
209+
)
210+
211+
# Top DE genes - large prominent dots
212+
chart.add("Top DE genes", labeled_points, dots_size=16, stroke=False)
213+
214+
# Save
215+
chart.render_to_file("plot.html")
216+
chart.render_to_png("plot.png")

0 commit comments

Comments
 (0)