Skip to content

Commit 7eb4237

Browse files
feat(pygal): implement manhattan-gwas (#2972)
## Implementation: `manhattan-gwas` - pygal Implements the **pygal** version of `manhattan-gwas`. **File:** `plots/manhattan-gwas/implementations/pygal.py` **Parent Issue:** #2925 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20612794558)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 1d24d1a commit 7eb4237

2 files changed

Lines changed: 247 additions & 0 deletions

File tree

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
""" pyplots.ai
2+
manhattan-gwas: Manhattan Plot for GWAS
3+
Library: pygal 3.1.0 | Python 3.13.11
4+
Quality: 82/100 | Created: 2025-12-31
5+
"""
6+
7+
import numpy as np
8+
import pygal
9+
from pygal.style import Style
10+
11+
12+
# Seed for reproducibility
13+
np.random.seed(42)
14+
15+
# Chromosome lengths (simplified, in Mb scale)
16+
chrom_lengths = {
17+
"1": 249,
18+
"2": 243,
19+
"3": 198,
20+
"4": 191,
21+
"5": 182,
22+
"6": 171,
23+
"7": 159,
24+
"8": 146,
25+
"9": 141,
26+
"10": 136,
27+
"11": 135,
28+
"12": 134,
29+
"13": 115,
30+
"14": 107,
31+
"15": 102,
32+
"16": 90,
33+
"17": 83,
34+
"18": 80,
35+
"19": 59,
36+
"20": 64,
37+
"21": 47,
38+
"22": 51,
39+
}
40+
41+
chromosomes = list(chrom_lengths.keys())
42+
n_snps_per_chrom = 500
43+
44+
# Storage for data
45+
all_chroms = []
46+
all_cumulative_pos = []
47+
all_pvalues = []
48+
49+
# Track chromosome boundaries for x-axis labels
50+
chrom_midpoints = []
51+
cumulative_offset = 0
52+
53+
for chrom in chromosomes:
54+
length = chrom_lengths[chrom]
55+
positions = np.sort(np.random.uniform(0, length, n_snps_per_chrom))
56+
57+
# Store chromosome midpoint for x-axis label
58+
chrom_midpoints.append(cumulative_offset + length / 2)
59+
60+
# Generate p-values: mostly uniform with some significant hits
61+
pvalues = np.random.uniform(0, 1, n_snps_per_chrom)
62+
63+
# Add significant peaks on selected chromosomes
64+
if chrom in ["6", "11", "16"]:
65+
n_sig = np.random.randint(3, 8)
66+
sig_indices = np.random.choice(n_snps_per_chrom, n_sig, replace=False)
67+
pvalues[sig_indices] = 10 ** (-np.random.uniform(8, 15, n_sig))
68+
69+
# Add suggestive signals
70+
n_sugg = np.random.randint(5, 15)
71+
sugg_indices = np.random.choice(n_snps_per_chrom, n_sugg, replace=False)
72+
pvalues[sugg_indices] = 10 ** (-np.random.uniform(5, 8, n_sugg))
73+
74+
cumulative_positions = positions + cumulative_offset
75+
all_chroms.extend([chrom] * n_snps_per_chrom)
76+
all_cumulative_pos.extend(cumulative_positions)
77+
all_pvalues.extend(pvalues)
78+
79+
cumulative_offset += length
80+
81+
# Convert to -log10 p-values
82+
neg_log_pvalues = -np.log10(np.array(all_pvalues))
83+
84+
# Thresholds
85+
genome_wide_sig = -np.log10(5e-8) # ~7.3
86+
suggestive_sig = 5.0 # -log10(1e-5)
87+
88+
# Custom style with explicit colors for threshold lines
89+
custom_style = Style(
90+
background="white",
91+
plot_background="white",
92+
foreground="#333333",
93+
foreground_strong="#333333",
94+
foreground_subtle="#666666",
95+
colors=(
96+
"#306998", # Blue - odd chromosomes
97+
"#888888", # Gray - even chromosomes
98+
"#D62728", # Red - significant points above threshold
99+
"#D62728", # Red - genome-wide threshold line
100+
"#FF7F0E", # Orange - suggestive threshold line
101+
),
102+
title_font_size=56,
103+
label_font_size=36,
104+
major_label_font_size=32,
105+
legend_font_size=32,
106+
value_font_size=20,
107+
font_family="sans-serif",
108+
)
109+
110+
# Create XY scatter chart
111+
chart = pygal.XY(
112+
width=4800,
113+
height=2700,
114+
style=custom_style,
115+
title="manhattan-gwas · pygal · pyplots.ai",
116+
x_title="Chromosome",
117+
y_title="-log₁₀(p-value)",
118+
show_legend=True,
119+
legend_at_bottom=True,
120+
legend_at_bottom_columns=5,
121+
legend_box_size=24,
122+
stroke=False,
123+
dots_size=5,
124+
show_x_guides=False,
125+
show_y_guides=True,
126+
truncate_label=-1,
127+
print_values=False,
128+
x_label_rotation=0,
129+
range=(0, 16),
130+
xrange=(0, cumulative_offset),
131+
tooltip_border_radius=10,
132+
explicit_size=True,
133+
spacing=30,
134+
margin=60,
135+
margin_bottom=200,
136+
margin_top=100,
137+
)
138+
139+
# Create x_labels with chromosome numbers at midpoints
140+
# Use string labels for chromosomes 1-22
141+
chart.x_labels = [str(i + 1) for i in range(len(chromosomes))]
142+
chart.x_labels_major_count = len(chromosomes)
143+
144+
# Map positions to display label indices
145+
# We need to normalize x values to show chromosome labels
146+
# Pygal XY chart needs numeric x_labels for scatter, so we'll use custom formatter
147+
label_positions = chrom_midpoints.copy()
148+
149+
150+
def format_x_label(x_val):
151+
"""Find closest chromosome midpoint and return chromosome number."""
152+
if not label_positions:
153+
return ""
154+
min_dist = float("inf")
155+
closest_idx = 0
156+
for i, pos in enumerate(label_positions):
157+
dist = abs(x_val - pos)
158+
if dist < min_dist:
159+
min_dist = dist
160+
closest_idx = i
161+
# Only return label if close to midpoint (within half chromosome width)
162+
if min_dist < 50:
163+
return str(closest_idx + 1)
164+
return ""
165+
166+
167+
# Set numeric x_labels at chromosome midpoints
168+
chart.x_labels = chrom_midpoints
169+
chart.x_value_formatter = lambda x: format_x_label(x) if x in chrom_midpoints else ""
170+
171+
# Prepare data by chromosome with alternating colors
172+
odd_chrom_points = []
173+
even_chrom_points = []
174+
significant_points = [] # Points above genome-wide significance
175+
176+
for idx, chrom in enumerate(chromosomes):
177+
chrom_mask = [c == chrom for c in all_chroms]
178+
chrom_x = [all_cumulative_pos[j] for j in range(len(all_chroms)) if chrom_mask[j]]
179+
chrom_y = [neg_log_pvalues[j] for j in range(len(all_chroms)) if chrom_mask[j]]
180+
181+
for x, y in zip(chrom_x, chrom_y, strict=True):
182+
point = {"value": (x, y), "label": f"Chr {chrom}: {x:.1f} Mb, -log₁₀(p)={y:.2f}"}
183+
if y >= genome_wide_sig:
184+
significant_points.append(point)
185+
elif idx % 2 == 0:
186+
odd_chrom_points.append(point)
187+
else:
188+
even_chrom_points.append(point)
189+
190+
# Add chromosome data series (blue for odd, gray for even)
191+
chart.add("Odd chromosomes", odd_chrom_points, stroke=False, show_dots=True)
192+
chart.add("Even chromosomes", even_chrom_points, stroke=False, show_dots=True)
193+
194+
# Add significant points as separate series (highlighted)
195+
chart.add("Significant (p<5×10⁻⁸)", significant_points, stroke=False, show_dots=True)
196+
197+
# Add threshold lines as dense scatter points (works better in PNG than stroke)
198+
# Using many small dots to create visible line effect
199+
n_line_points = 200
200+
threshold_x = np.linspace(10, cumulative_offset - 10, n_line_points)
201+
202+
# Genome-wide significance threshold line (y ≈ 7.3)
203+
gw_line_points = [
204+
{"value": (x, genome_wide_sig), "label": f"Genome-wide threshold: -log₁₀(5×10⁻⁸) = {genome_wide_sig:.1f}"}
205+
for x in threshold_x
206+
]
207+
chart.add("p = 5×10⁻⁸ threshold", gw_line_points, stroke=True, show_dots=True, dots_size=2)
208+
209+
# Suggestive threshold line (y = 5)
210+
sugg_line_points = [
211+
{"value": (x, suggestive_sig), "label": f"Suggestive threshold: -log₁₀(1×10⁻⁵) = {suggestive_sig:.1f}"}
212+
for x in threshold_x
213+
]
214+
chart.add("p = 1×10⁻⁵ threshold", sugg_line_points, stroke=True, show_dots=True, dots_size=2)
215+
216+
# Save outputs
217+
chart.render_to_file("plot.html")
218+
chart.render_to_png("plot.png")
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
library: pygal
2+
specification_id: manhattan-gwas
3+
created: '2025-12-31T05:37:41Z'
4+
updated: '2025-12-31T15:21:52Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20612794558
7+
issue: 2925
8+
python_version: 3.13.11
9+
library_version: 3.1.0
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/pygal/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/pygal/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/pygal/plot.html
13+
quality_score: 82
14+
review:
15+
strengths:
16+
- Excellent alternating blue/gray color scheme for chromosome distinction that is
17+
colorblind-friendly
18+
- Both genome-wide significance (p=5×10⁻⁸) and suggestive (p=1×10⁻⁵) threshold lines
19+
are clearly visible
20+
- Good use of pygal's tooltip system with informative labels showing chromosome,
21+
position, and p-value
22+
- Significant SNPs are properly highlighted in red as a separate series
23+
- Clean chromosome labeling at midpoints with all 22 chromosomes visible
24+
weaknesses:
25+
- Contains a helper function (format_x_label) which violates the KISS principle
26+
requiring flat script structure
27+
- Data point markers are somewhat small for the 11,000+ point dataset; larger dots_size
28+
would improve visibility
29+
- Legend at bottom takes substantial vertical space; could be more compact

0 commit comments

Comments
 (0)