Skip to content

Commit a3fbc9b

Browse files
feat(letsplot): implement ma-differential-expression (#5083)
## Implementation: `ma-differential-expression` - letsplot Implements the **letsplot** version of `ma-differential-expression`. **File:** `plots/ma-differential-expression/implementations/letsplot.py` **Parent Issue:** #4420 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/23339077546)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent e257b58 commit a3fbc9b

2 files changed

Lines changed: 390 additions & 0 deletions

File tree

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
""" pyplots.ai
2+
ma-differential-expression: MA Plot for Differential Expression
3+
Library: letsplot 4.9.0 | Python 3.14.3
4+
Quality: 90/100 | Created: 2026-03-20
5+
"""
6+
7+
import numpy as np
8+
import pandas as pd
9+
from lets_plot import * # noqa: F403
10+
from lets_plot.export import ggsave as export_ggsave
11+
12+
13+
LetsPlot.setup_html() # noqa: F405
14+
15+
# Data
16+
np.random.seed(42)
17+
n_genes = 15000
18+
19+
mean_expression = np.random.uniform(0.5, 15, n_genes)
20+
21+
log_fold_change = np.random.normal(0, 0.3, n_genes)
22+
low_expr_bias = 0.4 * np.exp(-0.3 * mean_expression)
23+
log_fold_change += np.random.normal(0, low_expr_bias)
24+
25+
n_up = 420
26+
n_down = 380
27+
up_idx = np.random.choice(np.where(mean_expression > 2)[0], n_up, replace=False)
28+
down_idx = np.random.choice(np.setdiff1d(np.where(mean_expression > 2)[0], up_idx), n_down, replace=False)
29+
log_fold_change[up_idx] = np.random.uniform(1.2, 5.5, n_up)
30+
log_fold_change[down_idx] = np.random.uniform(-5.5, -1.2, n_down)
31+
32+
p_values = np.ones(n_genes)
33+
p_values[up_idx] = np.random.uniform(1e-20, 0.01, n_up)
34+
p_values[down_idx] = np.random.uniform(1e-20, 0.01, n_down)
35+
36+
significant = p_values < 0.05
37+
status = np.where(~significant, "Not significant", np.where(log_fold_change > 0, "Up-regulated", "Down-regulated"))
38+
39+
# Realistic gene symbols for top genes
40+
real_gene_names_up = ["FOXM1", "CDK1", "MYC", "EGFR"]
41+
real_gene_names_down = ["CDKN1A", "RB1", "BRCA2", "TP53"]
42+
43+
top_up = up_idx[np.argsort(log_fold_change[up_idx])[-4:]]
44+
top_down = down_idx[np.argsort(log_fold_change[down_idx])[:4]]
45+
46+
# Unified DataFrame
47+
df = pd.DataFrame(
48+
{
49+
"A": mean_expression,
50+
"M": log_fold_change,
51+
"status": pd.Categorical(status, categories=["Down-regulated", "Not significant", "Up-regulated"]),
52+
"neg_log10p": -np.log10(np.clip(p_values, 1e-300, 1)),
53+
}
54+
)
55+
56+
# Labels DataFrame with staggered nudge to avoid overlap
57+
top_genes_idx = np.concatenate([top_up, top_down])
58+
gene_labels = real_gene_names_up + real_gene_names_down
59+
# Custom per-label nudge to prevent overlap (sorted by M descending for up, ascending for down)
60+
# Up: idx order from argsort[-4:] gives ascending M, so positions 0-3
61+
# Down: idx order from argsort[:4] gives ascending M (most negative first)
62+
up_nudges = [(-1.2, 0.6), (0.8, 0.9), (-0.6, 1.2), (1.0, 0.5)] # spread apart close genes
63+
down_nudges = [(0.8, -0.6), (-1.0, -0.9), (-0.5, -1.2), (0.8, -0.5)]
64+
all_nudges = up_nudges + down_nudges
65+
nudge_x = [n[0] for n in all_nudges]
66+
nudge_y = [n[1] for n in all_nudges]
67+
68+
df_labels = pd.DataFrame(
69+
{
70+
"A": mean_expression[top_genes_idx],
71+
"M": log_fold_change[top_genes_idx],
72+
"gene": gene_labels,
73+
"nudge_x": nudge_x,
74+
"nudge_y": nudge_y,
75+
"label_y": log_fold_change[top_genes_idx] + np.array(nudge_y),
76+
"label_x": mean_expression[top_genes_idx] + np.array(nudge_x),
77+
}
78+
)
79+
80+
# Separate for layered plotting with unified data
81+
df_nonsig = df[df["status"] == "Not significant"]
82+
df_sig = df[df["status"] != "Not significant"]
83+
84+
# Plot
85+
plot = (
86+
ggplot() # noqa: F405
87+
+ geom_hline(yintercept=0, color="#3C3C3C", size=0.8) # noqa: F405
88+
+ geom_hline(yintercept=1, color="#999999", size=0.5, linetype="dashed") # noqa: F405
89+
+ geom_hline(yintercept=-1, color="#999999", size=0.5, linetype="dashed") # noqa: F405
90+
+ geom_point( # noqa: F405
91+
aes(x="A", y="M"), # noqa: F405
92+
data=df_nonsig,
93+
color="#D5D5D5",
94+
size=1.2,
95+
alpha=0.25,
96+
)
97+
+ geom_point( # noqa: F405
98+
aes(x="A", y="M", color="status"), # noqa: F405
99+
data=df_sig,
100+
size=2.5,
101+
alpha=0.65,
102+
tooltips=layer_tooltips() # noqa: F405
103+
.line("@status")
104+
.line("Mean expr: @A")
105+
.line("Log₂FC: @M")
106+
.format("A", ".1f")
107+
.format("M", ".2f"),
108+
)
109+
+ geom_smooth( # noqa: F405
110+
aes(x="A", y="M"), # noqa: F405
111+
data=df,
112+
color="#D4881C",
113+
size=1.8,
114+
se=False,
115+
method="loess",
116+
span=0.3,
117+
)
118+
+ geom_segment( # noqa: F405
119+
aes(x="A", y="M", xend="label_x", yend="label_y"), # noqa: F405
120+
data=df_labels,
121+
color="#555555",
122+
size=0.4,
123+
linetype="dotted",
124+
)
125+
+ geom_label( # noqa: F405
126+
aes(x="label_x", y="label_y", label="gene", color="regulation"), # noqa: F405
127+
data=df_labels.assign(regulation=["Up-regulated"] * 4 + ["Down-regulated"] * 4),
128+
size=8,
129+
fill="white",
130+
alpha=0.85,
131+
label_padding=0.3,
132+
label_r=0.2,
133+
label_size=0.5,
134+
show_legend=False,
135+
)
136+
+ scale_color_manual( # noqa: F405
137+
values={"Up-regulated": "#C23B22", "Down-regulated": "#306998"}, name="Regulation"
138+
)
139+
+ labs( # noqa: F405
140+
x="Mean Expression (A)",
141+
y="Log\u2082 Fold Change (M)",
142+
title="ma-differential-expression \u00b7 letsplot \u00b7 pyplots.ai",
143+
)
144+
+ coord_cartesian(xlim=[0, 16]) # noqa: F405
145+
+ ggsize(1600, 900) # noqa: F405
146+
+ theme_minimal() # noqa: F405
147+
+ theme( # noqa: F405
148+
plot_title=element_text(size=24, face="bold"), # noqa: F405
149+
axis_title=element_text(size=20), # noqa: F405
150+
axis_text=element_text(size=16), # noqa: F405
151+
legend_title=element_text(size=16, face="bold"), # noqa: F405
152+
legend_text=element_text(size=15), # noqa: F405
153+
legend_position="bottom",
154+
panel_grid_major_x=element_blank(), # noqa: F405
155+
panel_grid_minor=element_blank(), # noqa: F405
156+
plot_margin=[30, 20, 10, 20],
157+
)
158+
)
159+
160+
# Save
161+
export_ggsave(plot, filename="plot.png", path=".", scale=3)
162+
export_ggsave(plot, filename="plot.html", path=".")
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
library: letsplot
2+
specification_id: ma-differential-expression
3+
created: '2026-03-20T10:40:13Z'
4+
updated: '2026-03-20T10:53:21Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 23339077546
7+
issue: 4420
8+
python_version: 3.14.3
9+
library_version: 4.9.0
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/ma-differential-expression/letsplot/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/ma-differential-expression/letsplot/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/ma-differential-expression/letsplot/plot.html
13+
quality_score: 90
14+
review:
15+
strengths:
16+
- Excellent data storytelling with strong visual hierarchy (muted non-sig, colored
17+
significant, labeled top genes)
18+
- Realistic cancer biology context with real gene names and appropriate data scales
19+
- All spec features fully implemented including LOESS curve and fold-change threshold
20+
lines
21+
- Good use of lets-plot specific features (layer_tooltips, HTML export)
22+
- Clean, well-structured code with proper reproducibility
23+
weaknesses:
24+
- Gene label placement could be slightly improved (FOXM1/MYC proximity)
25+
- Could leverage lets-plot ggplot(df) single-data approach more idiomatically instead
26+
of splitting into separate DataFrames
27+
image_description: The plot displays an MA plot for differential gene expression
28+
with Mean Expression (A) on the x-axis (0-16) and Log2 Fold Change (M) on the
29+
y-axis (-7 to +7). Non-significant genes appear as small, light gray dots with
30+
low opacity forming a characteristic funnel shape (wider variance at low expression).
31+
Up-regulated significant genes are shown in coral/red and down-regulated in steel
32+
blue, both with higher opacity and larger markers. A solid black horizontal line
33+
marks M=0, with gray dashed lines at M=+/-1 (2-fold change thresholds). An amber/gold
34+
LOESS smoothing curve runs nearly flat along M=0. Eight labeled genes (FOXM1,
35+
MYC, CDK1, EGFR up-regulated; BRCA2, TP53, RB1, CDKN1A down-regulated) have white-background
36+
label boxes connected to their data points by dotted leader lines. The legend
37+
at the bottom shows Regulation with Up-regulated and Down-regulated categories.
38+
The title reads ma-differential-expression · letsplot · pyplots.ai in bold.
39+
criteria_checklist:
40+
visual_quality:
41+
score: 28
42+
max: 30
43+
items:
44+
- id: VQ-01
45+
name: Text Legibility
46+
score: 8
47+
max: 8
48+
passed: true
49+
comment: 'All font sizes explicitly set: title 24pt, axis titles 20pt, tick
50+
text 16pt, legend 15-16pt'
51+
- id: VQ-02
52+
name: No Overlap
53+
score: 5
54+
max: 6
55+
passed: true
56+
comment: Custom per-label nudge with leader lines; FOXM1/MYC slightly close
57+
but readable
58+
- id: VQ-03
59+
name: Element Visibility
60+
score: 5
61+
max: 6
62+
passed: true
63+
comment: Non-sig at size 1.2/alpha 0.25 handles 15K density well; sig points
64+
clearly visible at size 2.5/alpha 0.65
65+
- id: VQ-04
66+
name: Color Accessibility
67+
score: 4
68+
max: 4
69+
passed: true
70+
comment: Red-blue diverging scheme is colorblind-safe with good contrast
71+
- id: VQ-05
72+
name: Layout & Canvas
73+
score: 4
74+
max: 4
75+
passed: true
76+
comment: ggsize(1600,900) scaled 3x fills canvas well with balanced margins
77+
- id: VQ-06
78+
name: Axis Labels & Title
79+
score: 2
80+
max: 2
81+
passed: true
82+
comment: 'Descriptive labels with notation: Mean Expression (A), Log2 Fold
83+
Change (M)'
84+
design_excellence:
85+
score: 15
86+
max: 20
87+
items:
88+
- id: DE-01
89+
name: Aesthetic Sophistication
90+
score: 6
91+
max: 8
92+
passed: true
93+
comment: Intentional color hierarchy, white-background gene labels with colored
94+
text, amber LOESS curve
95+
- id: DE-02
96+
name: Visual Refinement
97+
score: 4
98+
max: 6
99+
passed: true
100+
comment: theme_minimal, removed x-grid and minor grid, bottom legend, good
101+
whitespace
102+
- id: DE-03
103+
name: Data Storytelling
104+
score: 5
105+
max: 6
106+
passed: true
107+
comment: 'Strong visual hierarchy: muted non-sig, colored significant, labeled
108+
top cancer biology genes'
109+
spec_compliance:
110+
score: 15
111+
max: 15
112+
items:
113+
- id: SC-01
114+
name: Plot Type
115+
score: 5
116+
max: 5
117+
passed: true
118+
comment: Correct MA plot with M vs A
119+
- id: SC-02
120+
name: Required Features
121+
score: 4
122+
max: 4
123+
passed: true
124+
comment: 'All features: significant highlighting, reference lines, LOESS,
125+
alpha, gene labels'
126+
- id: SC-03
127+
name: Data Mapping
128+
score: 3
129+
max: 3
130+
passed: true
131+
comment: Mean expression on x, log fold change on y, full data range shown
132+
- id: SC-04
133+
name: Title & Legend
134+
score: 3
135+
max: 3
136+
passed: true
137+
comment: Correct title format and legend labels
138+
data_quality:
139+
score: 15
140+
max: 15
141+
items:
142+
- id: DQ-01
143+
name: Feature Coverage
144+
score: 6
145+
max: 6
146+
passed: true
147+
comment: Shows up/down/non-sig genes, expression-dependent variance, range
148+
of fold changes
149+
- id: DQ-02
150+
name: Realistic Context
151+
score: 5
152+
max: 5
153+
passed: true
154+
comment: Real cancer biology gene names, realistic RNA-seq scenario with 15K
155+
genes
156+
- id: DQ-03
157+
name: Appropriate Scale
158+
score: 4
159+
max: 4
160+
passed: true
161+
comment: Expression 0.5-15, fold changes up to +/-5.5, ~800 significant out
162+
of 15K
163+
code_quality:
164+
score: 10
165+
max: 10
166+
items:
167+
- id: CQ-01
168+
name: KISS Structure
169+
score: 3
170+
max: 3
171+
passed: true
172+
comment: Clean Imports-Data-Plot-Save structure, no functions/classes
173+
- id: CQ-02
174+
name: Reproducibility
175+
score: 2
176+
max: 2
177+
passed: true
178+
comment: np.random.seed(42) set at top
179+
- id: CQ-03
180+
name: Clean Imports
181+
score: 2
182+
max: 2
183+
passed: true
184+
comment: All imports used
185+
- id: CQ-04
186+
name: Code Elegance
187+
score: 2
188+
max: 2
189+
passed: true
190+
comment: Clean and well-organized, custom nudge necessary for label placement
191+
- id: CQ-05
192+
name: Output & API
193+
score: 1
194+
max: 1
195+
passed: true
196+
comment: Saves via export_ggsave with scale=3, also exports HTML
197+
library_mastery:
198+
score: 7
199+
max: 10
200+
items:
201+
- id: LM-01
202+
name: Idiomatic Usage
203+
score: 4
204+
max: 5
205+
passed: true
206+
comment: 'Good ggplot grammar usage; minor: separate DataFrames instead of
207+
single ggplot(df)'
208+
- id: LM-02
209+
name: Distinctive Features
210+
score: 3
211+
max: 5
212+
passed: true
213+
comment: Uses layer_tooltips() for interactive hover, HTML export alongside
214+
PNG
215+
verdict: APPROVED
216+
impl_tags:
217+
dependencies: []
218+
techniques:
219+
- annotations
220+
- layer-composition
221+
- hover-tooltips
222+
- html-export
223+
patterns:
224+
- data-generation
225+
dataprep: []
226+
styling:
227+
- alpha-blending
228+
- grid-styling

0 commit comments

Comments
 (0)