Skip to content

Commit edf2412

Browse files
feat(bokeh): implement manhattan-gwas (#2946)
## Implementation: `manhattan-gwas` - bokeh Implements the **bokeh** version of `manhattan-gwas`. **File:** `plots/manhattan-gwas/implementations/bokeh.py` **Parent Issue:** #2925 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20612790523)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent bfe3681 commit edf2412

2 files changed

Lines changed: 208 additions & 0 deletions

File tree

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
""" pyplots.ai
2+
manhattan-gwas: Manhattan Plot for GWAS
3+
Library: bokeh 3.8.1 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-31
5+
"""
6+
7+
import numpy as np
8+
import pandas as pd
9+
from bokeh.io import export_png, output_file, save
10+
from bokeh.models import ColumnDataSource, Label, Span
11+
from bokeh.plotting import figure
12+
13+
14+
# Data - Simulated GWAS data with significant peaks
15+
np.random.seed(42)
16+
17+
# Chromosome sizes (simplified, in millions of base pairs)
18+
chrom_sizes = {
19+
"1": 248,
20+
"2": 242,
21+
"3": 198,
22+
"4": 190,
23+
"5": 181,
24+
"6": 170,
25+
"7": 159,
26+
"8": 145,
27+
"9": 138,
28+
"10": 133,
29+
"11": 135,
30+
"12": 133,
31+
"13": 114,
32+
"14": 107,
33+
"15": 101,
34+
"16": 90,
35+
"17": 83,
36+
"18": 80,
37+
"19": 58,
38+
"20": 64,
39+
"21": 46,
40+
"22": 50,
41+
}
42+
43+
# Generate SNP data
44+
n_snps_per_chrom = 2000
45+
data = []
46+
cumulative_pos = 0
47+
chrom_centers = {}
48+
49+
for chrom, size in chrom_sizes.items():
50+
# Random positions within chromosome
51+
positions = np.sort(np.random.randint(1, size * 1_000_000, n_snps_per_chrom))
52+
53+
# Base p-values (mostly non-significant)
54+
p_values = np.random.uniform(0.001, 1.0, n_snps_per_chrom)
55+
56+
# Add some significant peaks for certain chromosomes
57+
if chrom in ["2", "6", "11", "17"]:
58+
# Add 5-15 significant SNPs per peak chromosome
59+
n_significant = np.random.randint(5, 15)
60+
peak_indices = np.random.choice(n_snps_per_chrom, n_significant, replace=False)
61+
p_values[peak_indices] = 10 ** np.random.uniform(-12, -8, n_significant)
62+
63+
# Calculate cumulative position
64+
cumulative_positions = positions + cumulative_pos
65+
chrom_centers[chrom] = cumulative_pos + (size * 1_000_000) / 2
66+
67+
for i in range(n_snps_per_chrom):
68+
data.append(
69+
{
70+
"chromosome": chrom,
71+
"position": positions[i],
72+
"cumulative_pos": cumulative_positions[i],
73+
"p_value": p_values[i],
74+
"neg_log_p": -np.log10(p_values[i]),
75+
}
76+
)
77+
78+
cumulative_pos += size * 1_000_000
79+
80+
df = pd.DataFrame(data)
81+
82+
# Assign colors based on chromosome (alternating)
83+
colors = ["#306998", "#7BA3C9"] # Python Blue and lighter shade
84+
df["color"] = df["chromosome"].apply(lambda x: colors[int(x) % 2])
85+
86+
# Highlight significant SNPs (above genome-wide threshold)
87+
significance_threshold = -np.log10(5e-8) # ~7.3
88+
significant_mask = df["neg_log_p"] >= significance_threshold
89+
df.loc[significant_mask, "color"] = "#FFD43B" # Python Yellow for significant
90+
91+
# Adjust point sizes - smaller for non-significant, larger for significant
92+
df["size"] = 6
93+
df.loc[significant_mask, "size"] = 12
94+
95+
# Create plot
96+
source = ColumnDataSource(df)
97+
98+
p = figure(
99+
width=4800,
100+
height=2700,
101+
title="manhattan-gwas · bokeh · pyplots.ai",
102+
x_axis_label="Genomic Position",
103+
y_axis_label="-log₁₀(p-value)",
104+
tools="pan,wheel_zoom,box_zoom,reset,save",
105+
)
106+
107+
# Scatter plot
108+
p.scatter(x="cumulative_pos", y="neg_log_p", source=source, size="size", color="color", alpha=0.7, line_color=None)
109+
110+
# Genome-wide significance threshold line (p < 5e-8)
111+
significance_line = Span(
112+
location=significance_threshold, dimension="width", line_color="#E63946", line_dash="dashed", line_width=3
113+
)
114+
p.add_layout(significance_line)
115+
116+
# Suggestive threshold line (p < 1e-5)
117+
suggestive_threshold = -np.log10(1e-5) # = 5
118+
suggestive_line = Span(
119+
location=suggestive_threshold, dimension="width", line_color="#2A9D8F", line_dash="dotted", line_width=2
120+
)
121+
p.add_layout(suggestive_line)
122+
123+
# Add threshold labels
124+
sig_label = Label(
125+
x=cumulative_pos * 0.98,
126+
y=significance_threshold + 0.3,
127+
text="p = 5×10⁻⁸",
128+
text_font_size="16pt",
129+
text_color="#E63946",
130+
)
131+
p.add_layout(sig_label)
132+
133+
sug_label = Label(
134+
x=cumulative_pos * 0.98,
135+
y=suggestive_threshold + 0.3,
136+
text="p = 1×10⁻⁵",
137+
text_font_size="16pt",
138+
text_color="#2A9D8F",
139+
)
140+
p.add_layout(sug_label)
141+
142+
# Style the plot
143+
p.title.text_font_size = "28pt"
144+
p.xaxis.axis_label_text_font_size = "22pt"
145+
p.yaxis.axis_label_text_font_size = "22pt"
146+
p.xaxis.major_label_text_font_size = "16pt"
147+
p.yaxis.major_label_text_font_size = "18pt"
148+
149+
# Hide x-axis tick labels (we'll add chromosome labels instead)
150+
p.xaxis.major_label_text_font_size = "0pt"
151+
p.xaxis.major_tick_line_color = None
152+
p.xaxis.minor_tick_line_color = None
153+
154+
# Add chromosome labels below the plot
155+
for chrom, center in chrom_centers.items():
156+
chrom_label = Label(x=center, y=-0.8, text=chrom, text_font_size="14pt", text_align="center", text_color="#333333")
157+
p.add_layout(chrom_label)
158+
159+
# Grid styling
160+
p.xgrid.grid_line_alpha = 0.3
161+
p.ygrid.grid_line_alpha = 0.3
162+
163+
# Background
164+
p.background_fill_color = "#FAFAFA"
165+
p.border_fill_color = "#FFFFFF"
166+
167+
# Axis styling
168+
p.yaxis.axis_line_width = 2
169+
p.xaxis.axis_line_width = 2
170+
171+
# Set y-axis range to show chromosome labels
172+
p.y_range.start = -1.5
173+
p.y_range.end = df["neg_log_p"].max() + 1
174+
175+
# Save PNG
176+
export_png(p, filename="plot.png")
177+
178+
# Save HTML for interactivity
179+
output_file("plot.html")
180+
save(p)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
library: bokeh
2+
specification_id: manhattan-gwas
3+
created: '2025-12-31T05:32:51Z'
4+
updated: '2025-12-31T05:42:50Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20612790523
7+
issue: 2925
8+
python_version: 3.13.11
9+
library_version: 3.8.1
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/bokeh/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/bokeh/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/bokeh/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent implementation of Manhattan plot with proper chromosome coloring and
17+
cumulative positioning
18+
- Good use of Bokeh interactive tools (pan, zoom, save) for exploration of GWAS
19+
data
20+
- Clear threshold lines with labels for genome-wide and suggestive significance
21+
levels
22+
- Appropriate point sizing differentiation between significant and non-significant
23+
SNPs
24+
- Clean, KISS-compliant code structure with proper reproducibility
25+
weaknesses:
26+
- Missing legend to explain color coding (blue/light blue for chromosomes, yellow
27+
for significant)
28+
- Could add HoverTool to show SNP details on mouseover, which is a key Bokeh strength

0 commit comments

Comments
 (0)