Skip to content

Commit faea52a

Browse files
feat(altair): implement manhattan-gwas (#2953)
## Implementation: `manhattan-gwas` - altair Implements the **altair** version of `manhattan-gwas`. **File:** `plots/manhattan-gwas/implementations/altair.py` **Parent Issue:** #2925 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20612790460)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 0164622 commit faea52a

2 files changed

Lines changed: 196 additions & 0 deletions

File tree

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
""" pyplots.ai
2+
manhattan-gwas: Manhattan Plot for GWAS
3+
Library: altair 6.0.0 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-31
5+
"""
6+
7+
import altair as alt
8+
import numpy as np
9+
import pandas as pd
10+
11+
12+
# Data - Simulated GWAS data with random p-values and significant peaks
13+
np.random.seed(42)
14+
15+
# Chromosome lengths (approximate human chromosome sizes in Mb)
16+
chrom_lengths = {
17+
"1": 249,
18+
"2": 243,
19+
"3": 198,
20+
"4": 191,
21+
"5": 182,
22+
"6": 171,
23+
"7": 159,
24+
"8": 145,
25+
"9": 138,
26+
"10": 134,
27+
"11": 135,
28+
"12": 133,
29+
"13": 114,
30+
"14": 107,
31+
"15": 102,
32+
"16": 90,
33+
"17": 83,
34+
"18": 80,
35+
"19": 59,
36+
"20": 64,
37+
"21": 47,
38+
"22": 51,
39+
}
40+
41+
# Generate SNPs for each chromosome
42+
data = []
43+
cumulative_pos = 0
44+
chrom_centers = {}
45+
46+
for chrom, length in chrom_lengths.items():
47+
# Number of SNPs proportional to chromosome length
48+
n_snps = int(length * 20) # ~20 SNPs per Mb
49+
positions = np.sort(np.random.randint(1, length * 1_000_000, n_snps))
50+
51+
# Base p-values (mostly non-significant)
52+
pvalues = np.random.uniform(0.001, 1.0, n_snps)
53+
54+
# Add some significant peaks on specific chromosomes
55+
if chrom in ["2", "6", "8", "15"]:
56+
n_sig = np.random.randint(5, 15)
57+
sig_indices = np.random.choice(n_snps, n_sig, replace=False)
58+
pvalues[sig_indices] = 10 ** np.random.uniform(-10, -7, n_sig)
59+
60+
# Add suggestive hits on other chromosomes
61+
if chrom in ["3", "11", "19"]:
62+
n_sug = np.random.randint(3, 8)
63+
sug_indices = np.random.choice(n_snps, n_sug, replace=False)
64+
pvalues[sug_indices] = 10 ** np.random.uniform(-6, -5, n_sug)
65+
66+
# Calculate cumulative positions
67+
cum_positions = positions + cumulative_pos
68+
chrom_centers[chrom] = cumulative_pos + (length * 1_000_000) / 2
69+
70+
for pos, cum_pos, pval in zip(positions, cum_positions, pvalues, strict=True):
71+
data.append(
72+
{
73+
"chromosome": chrom,
74+
"position": pos,
75+
"cumulative_position": cum_pos,
76+
"p_value": pval,
77+
"neg_log_p": -np.log10(pval),
78+
}
79+
)
80+
81+
cumulative_pos += length * 1_000_000
82+
83+
df = pd.DataFrame(data)
84+
85+
# Threshold values
86+
genome_wide_threshold = -np.log10(5e-8) # ~7.3
87+
suggestive_threshold = -np.log10(1e-5) # 5.0
88+
89+
# Create chromosome label data
90+
chrom_label_df = pd.DataFrame(
91+
[{"chrom_label": chrom, "center": center, "y_pos": -0.5} for chrom, center in chrom_centers.items()]
92+
)
93+
94+
# Define color scheme - alternating for chromosomes
95+
color_scale = alt.Scale(
96+
domain=[str(i) for i in range(1, 23)],
97+
range=["#306998", "#7F7F7F"] * 11, # Alternating Python blue and gray
98+
)
99+
100+
# Main scatter plot
101+
points = (
102+
alt.Chart(df)
103+
.mark_circle(opacity=0.7)
104+
.encode(
105+
x=alt.X(
106+
"cumulative_position:Q",
107+
axis=alt.Axis(title="Chromosome", labels=False, ticks=False, titleFontSize=22),
108+
scale=alt.Scale(domain=[0, cumulative_pos]),
109+
),
110+
y=alt.Y(
111+
"neg_log_p:Q",
112+
title="-log₁₀(p-value)",
113+
axis=alt.Axis(titleFontSize=22, labelFontSize=16),
114+
scale=alt.Scale(domain=[0, max(df["neg_log_p"]) + 1]),
115+
),
116+
color=alt.Color("chromosome:N", scale=color_scale, legend=None),
117+
size=alt.condition(
118+
alt.datum.neg_log_p > genome_wide_threshold,
119+
alt.value(100), # Larger for significant hits
120+
alt.value(30), # Smaller for others
121+
),
122+
tooltip=["chromosome:N", "position:Q", "p_value:Q", "neg_log_p:Q"],
123+
)
124+
)
125+
126+
# Genome-wide significance threshold line
127+
gw_line = (
128+
alt.Chart(pd.DataFrame({"y": [genome_wide_threshold]}))
129+
.mark_rule(strokeDash=[8, 4], color="#E74C3C", strokeWidth=2)
130+
.encode(y="y:Q")
131+
)
132+
133+
# Suggestive threshold line
134+
sug_line = (
135+
alt.Chart(pd.DataFrame({"y": [suggestive_threshold]}))
136+
.mark_rule(strokeDash=[4, 4], color="#F39C12", strokeWidth=2)
137+
.encode(y="y:Q")
138+
)
139+
140+
# Chromosome labels as text marks at bottom
141+
chrom_text = (
142+
alt.Chart(chrom_label_df)
143+
.mark_text(fontSize=14, fontWeight="bold", baseline="top", dy=5)
144+
.encode(x=alt.X("center:Q", scale=alt.Scale(domain=[0, cumulative_pos])), text="chrom_label:N")
145+
)
146+
147+
# Combine layers - points with threshold lines
148+
main_chart = alt.layer(points, gw_line, sug_line).properties(width=1500, height=750)
149+
150+
# Chromosome labels at bottom
151+
labels_chart = (
152+
alt.Chart(chrom_label_df)
153+
.mark_text(fontSize=16, baseline="middle")
154+
.encode(x=alt.X("center:Q", scale=alt.Scale(domain=[0, cumulative_pos]), axis=None), text="chrom_label:N")
155+
.properties(width=1500, height=30)
156+
)
157+
158+
# Vertical concat with shared x-axis
159+
chart = (
160+
alt.vconcat(main_chart, labels_chart, spacing=0)
161+
.properties(title=alt.Title("manhattan-gwas · altair · pyplots.ai", fontSize=28, anchor="middle"))
162+
.configure_axis(labelFontSize=16, titleFontSize=22, grid=True, gridOpacity=0.3)
163+
.configure_view(strokeWidth=0)
164+
)
165+
166+
# Save as PNG and HTML
167+
chart.save("plot.png", scale_factor=3.0)
168+
chart.save("plot.html")
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
library: altair
2+
specification_id: manhattan-gwas
3+
created: '2025-12-31T05:34:14Z'
4+
updated: '2025-12-31T05:45:37Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20612790460
7+
issue: 2925
8+
python_version: 3.13.11
9+
library_version: 6.0.0
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/altair/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/altair/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/altair/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent Manhattan plot with clear alternating chromosome colors (blue/gray)
17+
for visual distinction
18+
- Both genome-wide (red dashed) and suggestive (orange dashed) threshold lines correctly
19+
positioned at -log10(5e-8) and -log10(1e-5)
20+
- Smart use of conditional sizing - significant hits above threshold are larger
21+
(100px) vs non-significant (30px)
22+
- Chromosome labels clearly positioned below the plot with proper centering
23+
- 'Good use of Altair features: layered charts, declarative encoding, tooltips for
24+
interactivity'
25+
- 'Title follows exact format specification: manhattan-gwas · altair · pyplots.ai'
26+
weaknesses:
27+
- Y-axis label uses subscript notation (-log₁₀) which displays well, but some tick
28+
labels on the right side of smaller chromosomes (19-22) appear slightly cramped

0 commit comments

Comments
 (0)