Skip to content

Commit 63bf6f4

Browse files
feat(seaborn): implement ma-differential-expression (#5085)
## Implementation: `ma-differential-expression` - seaborn Implements the **seaborn** version of `ma-differential-expression`. **File:** `plots/ma-differential-expression/implementations/seaborn.py` **Parent Issue:** #4420 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/23339077516)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 6f652c0 commit 63bf6f4

2 files changed

Lines changed: 414 additions & 0 deletions

File tree

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
""" pyplots.ai
2+
ma-differential-expression: MA Plot for Differential Expression
3+
Library: seaborn 0.13.2 | Python 3.14.3
4+
Quality: 94/100 | Created: 2026-03-20
5+
"""
6+
7+
import matplotlib.pyplot as plt
8+
import numpy as np
9+
import pandas as pd
10+
import seaborn as sns
11+
12+
13+
# Seaborn theme and style
14+
sns.set_theme(style="ticks", context="talk", font_scale=1.1)
15+
palette = sns.color_palette("colorblind")
16+
TEAL = palette[0] # colorblind-safe blue/teal
17+
GRAY = "#C0C0C0"
18+
ACCENT = palette[2] # green for LOESS
19+
DARK = "#2C3E50"
20+
THRESHOLD_COLOR = palette[4] # muted purple for threshold lines
21+
22+
# Data - Simulated RNA-seq differential expression results (~15,000 genes)
23+
np.random.seed(42)
24+
n_genes = 15000
25+
26+
mean_expression = np.random.exponential(scale=3, size=n_genes) + 1
27+
log_fold_change = np.random.normal(0, 0.5, n_genes)
28+
29+
# Add truly differentially expressed genes (~8% of genes)
30+
n_de = int(n_genes * 0.08)
31+
de_indices = np.random.choice(n_genes, n_de, replace=False)
32+
log_fold_change[de_indices] += np.random.choice([-1, 1], n_de) * np.random.uniform(1.5, 4, n_de)
33+
34+
# Simulate p-values: DE genes get small p-values, others get uniform
35+
p_values = np.ones(n_genes)
36+
p_values[de_indices] = 10 ** (-np.random.uniform(2, 10, n_de))
37+
p_values[~np.isin(np.arange(n_genes), de_indices)] = np.random.uniform(0.01, 1.0, n_genes - n_de)
38+
39+
significant = p_values < 0.05
40+
41+
# Categorize genes for seaborn hue mapping
42+
status = np.where(
43+
~significant,
44+
"Not significant",
45+
np.where(log_fold_change > 1, "Up-regulated", np.where(log_fold_change < -1, "Down-regulated", "Significant")),
46+
)
47+
48+
# Gene names for top hits - spread across expression range for better storytelling
49+
gene_names = [None] * n_genes
50+
top_gene_labels = ["BRCA1", "TP53", "MYC", "EGFR", "VEGFA", "IL6"]
51+
sig_de_mask = significant & (np.abs(log_fold_change) > 1)
52+
sig_de_indices = np.where(sig_de_mask)[0]
53+
sig_de_expr = mean_expression[sig_de_indices]
54+
sig_de_abs_lfc = np.abs(log_fold_change[sig_de_indices])
55+
56+
# Use evenly-spaced expression bins across full range to spread labels
57+
expr_min, expr_max = sig_de_expr.min(), sig_de_expr.max()
58+
n_labels = len(top_gene_labels)
59+
expr_edges = np.linspace(expr_min, expr_max + 0.01, n_labels + 1)
60+
top_sig = []
61+
for b in range(n_labels):
62+
in_bin = (sig_de_expr >= expr_edges[b]) & (sig_de_expr < expr_edges[b + 1])
63+
if not np.any(in_bin):
64+
continue
65+
bin_idx = np.where(in_bin)[0]
66+
best = bin_idx[np.argmax(sig_de_abs_lfc[bin_idx])]
67+
top_sig.append(sig_de_indices[best])
68+
69+
for i, idx in enumerate(top_sig[:n_labels]):
70+
gene_names[idx] = top_gene_labels[i]
71+
72+
df = pd.DataFrame(
73+
{
74+
"Mean Expression (A)": mean_expression,
75+
"Log₂ Fold Change (M)": log_fold_change,
76+
"Status": pd.Categorical(
77+
status, categories=["Not significant", "Significant", "Up-regulated", "Down-regulated"]
78+
),
79+
"gene_name": gene_names,
80+
}
81+
)
82+
83+
# Color palette for categories - colorblind-safe
84+
status_palette = {
85+
"Not significant": GRAY,
86+
"Significant": sns.desaturate(TEAL, 0.6),
87+
"Up-regulated": TEAL,
88+
"Down-regulated": palette[3], # red from colorblind palette
89+
}
90+
91+
# Plot using seaborn's hue-based scatter
92+
fig, ax = plt.subplots(figsize=(16, 9))
93+
94+
sns.scatterplot(
95+
data=df,
96+
x="Mean Expression (A)",
97+
y="Log₂ Fold Change (M)",
98+
hue="Status",
99+
hue_order=["Not significant", "Significant", "Up-regulated", "Down-regulated"],
100+
palette=status_palette,
101+
size="Status",
102+
sizes={"Not significant": 20, "Significant": 40, "Up-regulated": 55, "Down-regulated": 55},
103+
alpha=0.3,
104+
edgecolor="none",
105+
legend="full",
106+
ax=ax,
107+
)
108+
109+
# Emphasize up/down-regulated with white edges via a second scatter layer
110+
de_data = df[df["Status"].isin(["Up-regulated", "Down-regulated"])]
111+
sns.scatterplot(
112+
data=de_data,
113+
x="Mean Expression (A)",
114+
y="Log₂ Fold Change (M)",
115+
hue="Status",
116+
hue_order=["Up-regulated", "Down-regulated"],
117+
palette={"Up-regulated": TEAL, "Down-regulated": palette[3]},
118+
s=55,
119+
alpha=0.6,
120+
edgecolor="white",
121+
linewidth=0.4,
122+
legend=False,
123+
ax=ax,
124+
)
125+
126+
# Reference lines
127+
ax.axhline(y=0, color=DARK, linewidth=2, alpha=0.5)
128+
ax.axhline(y=1, color=THRESHOLD_COLOR, linewidth=1.5, linestyle="--", alpha=0.7)
129+
ax.axhline(y=-1, color=THRESHOLD_COLOR, linewidth=1.5, linestyle="--", alpha=0.7)
130+
ax.text(ax.get_xlim()[1] * 0.97, 1.12, "2-fold ↑", fontsize=12, color=THRESHOLD_COLOR, ha="right", fontstyle="italic")
131+
ax.text(ax.get_xlim()[1] * 0.97, -1.35, "2-fold ↓", fontsize=12, color=THRESHOLD_COLOR, ha="right", fontstyle="italic")
132+
133+
# LOESS smoothing curve using seaborn regplot with lowess
134+
sns.regplot(
135+
data=df,
136+
x="Mean Expression (A)",
137+
y="Log₂ Fold Change (M)",
138+
lowess=True,
139+
scatter=False,
140+
line_kws={"color": ACCENT, "linewidth": 3, "alpha": 0.85, "label": "LOESS trend"},
141+
ax=ax,
142+
)
143+
144+
# Label top differentially expressed genes - spread across expression range
145+
labeled = df[df["gene_name"].notna()].copy()
146+
label_positions = []
147+
for _, row in labeled.iterrows():
148+
x_val = row["Mean Expression (A)"]
149+
y_val = row["Log₂ Fold Change (M)"]
150+
# Place labels outward from the dense center: above for up-regulated, below for down
151+
y_off = -30 if y_val > 0 else 30
152+
x_off = 25 if x_val < df["Mean Expression (A)"].median() else -25
153+
# Avoid overlapping previously placed labels
154+
for px, py in label_positions:
155+
if abs(x_val - px) < 2 and abs(y_val - py) < 1:
156+
y_off = y_off + (35 if y_off > 0 else -35)
157+
break
158+
label_positions.append((x_val, y_val))
159+
ax.annotate(
160+
row["gene_name"],
161+
xy=(x_val, y_val),
162+
xytext=(x_off, y_off),
163+
textcoords="offset points",
164+
fontsize=13,
165+
fontweight="bold",
166+
color=DARK,
167+
arrowprops={"arrowstyle": "->", "color": DARK, "lw": 1.2, "connectionstyle": "arc3,rad=0.2"},
168+
bbox={"boxstyle": "round,pad=0.25", "facecolor": "white", "edgecolor": DARK, "alpha": 0.85, "linewidth": 0.8},
169+
)
170+
171+
# Style - use seaborn's despine
172+
sns.despine(ax=ax)
173+
ax.set_xlabel("Mean Expression (A)", fontsize=20)
174+
ax.set_ylabel("Log₂ Fold Change (M)", fontsize=20)
175+
ax.set_title("ma-differential-expression · seaborn · pyplots.ai", fontsize=24, fontweight="medium", pad=15)
176+
ax.tick_params(axis="both", labelsize=16)
177+
ax.yaxis.grid(True, alpha=0.12, linewidth=0.8, color="#888888")
178+
179+
# Refine legend with seaborn
180+
sns.move_legend(ax, "upper right", fontsize=13, framealpha=0.92, title="Gene Status", title_fontsize=14)
181+
182+
plt.tight_layout()
183+
plt.savefig("plot.png", dpi=300, bbox_inches="tight")

0 commit comments

Comments
 (0)