Skip to content

Commit 71f7f29

Browse files
feat(matplotlib): implement manhattan-gwas (#2938)
## Implementation: `manhattan-gwas` - matplotlib Implements the **matplotlib** version of `manhattan-gwas`. **File:** `plots/manhattan-gwas/implementations/matplotlib.py` **Parent Issue:** #2925 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20612787947)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 722d472 commit 71f7f29

2 files changed

Lines changed: 199 additions & 0 deletions

File tree

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
""" pyplots.ai
2+
manhattan-gwas: Manhattan Plot for GWAS
3+
Library: matplotlib 3.10.8 | Python 3.13.11
4+
Quality: 92/100 | Created: 2025-12-31
5+
"""
6+
7+
import matplotlib.pyplot as plt
8+
import numpy as np
9+
import pandas as pd
10+
11+
12+
# Data - Simulate GWAS results for 22 chromosomes
13+
np.random.seed(42)
14+
15+
# Define chromosome sizes (approximate in Mb, scaled down for simulation)
16+
chrom_sizes = {
17+
1: 249,
18+
2: 243,
19+
3: 198,
20+
4: 191,
21+
5: 182,
22+
6: 171,
23+
7: 159,
24+
8: 146,
25+
9: 141,
26+
10: 136,
27+
11: 135,
28+
12: 134,
29+
13: 115,
30+
14: 107,
31+
15: 103,
32+
16: 90,
33+
17: 81,
34+
18: 78,
35+
19: 59,
36+
20: 63,
37+
21: 48,
38+
22: 51,
39+
}
40+
41+
# Generate SNPs for each chromosome
42+
chromosomes = []
43+
positions = []
44+
p_values = []
45+
46+
for chrom, size in chrom_sizes.items():
47+
n_snps = int(size * 40) # ~40 SNPs per Mb
48+
chrom_positions = np.sort(np.random.randint(1, size * 1_000_000, n_snps))
49+
50+
# Generate p-values (mostly non-significant, with some significant peaks)
51+
chrom_pvals = np.random.uniform(0, 1, n_snps)
52+
53+
# Add some significant SNPs in certain chromosomes (simulating real signals)
54+
if chrom in [2, 6, 11, 16]:
55+
peak_idx = np.random.choice(n_snps, size=np.random.randint(3, 8), replace=False)
56+
chrom_pvals[peak_idx] = 10 ** (-np.random.uniform(8, 15, len(peak_idx)))
57+
58+
# Add suggestive hits in more chromosomes
59+
if chrom in [1, 3, 8, 12, 19]:
60+
suggestive_idx = np.random.choice(n_snps, size=np.random.randint(2, 5), replace=False)
61+
chrom_pvals[suggestive_idx] = 10 ** (-np.random.uniform(5, 7.5, len(suggestive_idx)))
62+
63+
chromosomes.extend([chrom] * n_snps)
64+
positions.extend(chrom_positions)
65+
p_values.extend(chrom_pvals)
66+
67+
# Create DataFrame
68+
df = pd.DataFrame({"chromosome": chromosomes, "position": positions, "p_value": p_values})
69+
70+
# Calculate -log10(p-value)
71+
df["-log10p"] = -np.log10(df["p_value"])
72+
73+
# Calculate cumulative position for x-axis
74+
df["chrom_num"] = df["chromosome"]
75+
df = df.sort_values(["chrom_num", "position"])
76+
77+
# Add cumulative position offset
78+
cumulative_offset = 0
79+
chrom_centers = {}
80+
for chrom in sorted(df["chrom_num"].unique()):
81+
chrom_mask = df["chrom_num"] == chrom
82+
df.loc[chrom_mask, "cumulative_pos"] = df.loc[chrom_mask, "position"] + cumulative_offset
83+
chrom_centers[chrom] = cumulative_offset + df.loc[chrom_mask, "position"].median()
84+
cumulative_offset += df.loc[chrom_mask, "position"].max() + 10_000_000 # Gap between chromosomes
85+
86+
# Define thresholds
87+
genome_wide_threshold = -np.log10(5e-8) # ~7.3
88+
suggestive_threshold = -np.log10(1e-5) # 5
89+
90+
# Define colors
91+
colors = ["#306998", "#6699CC"] # Python Blue and lighter blue for alternating
92+
93+
# Create plot
94+
fig, ax = plt.subplots(figsize=(16, 9))
95+
96+
# Plot points by chromosome with alternating colors
97+
for i, chrom in enumerate(sorted(df["chrom_num"].unique())):
98+
chrom_data = df[df["chrom_num"] == chrom]
99+
color = colors[i % 2]
100+
101+
# Smaller markers for dense data, slightly larger for significant hits
102+
significant_mask = chrom_data["-log10p"] >= genome_wide_threshold
103+
regular_data = chrom_data[~significant_mask]
104+
significant_data = chrom_data[significant_mask]
105+
106+
# Plot regular points
107+
ax.scatter(
108+
regular_data["cumulative_pos"],
109+
regular_data["-log10p"],
110+
c=color,
111+
s=15,
112+
alpha=0.6,
113+
edgecolors="none",
114+
rasterized=True,
115+
)
116+
117+
# Plot significant points with emphasis
118+
if len(significant_data) > 0:
119+
ax.scatter(
120+
significant_data["cumulative_pos"],
121+
significant_data["-log10p"],
122+
c="#E74C3C", # Red for significant hits
123+
s=50,
124+
alpha=0.9,
125+
edgecolors="white",
126+
linewidths=0.5,
127+
zorder=5,
128+
rasterized=True,
129+
)
130+
131+
# Add threshold lines
132+
ax.axhline(
133+
y=genome_wide_threshold,
134+
color="#E74C3C",
135+
linestyle="--",
136+
linewidth=2,
137+
label="Genome-wide significance (p < 5×10⁻⁸)",
138+
alpha=0.8,
139+
)
140+
ax.axhline(
141+
y=suggestive_threshold,
142+
color="#FFD43B",
143+
linestyle="--",
144+
linewidth=2,
145+
label="Suggestive threshold (p < 1×10⁻⁵)",
146+
alpha=0.8,
147+
)
148+
149+
# Set x-axis with chromosome labels
150+
ax.set_xticks([chrom_centers[c] for c in sorted(chrom_centers.keys())])
151+
ax.set_xticklabels([str(c) for c in sorted(chrom_centers.keys())], fontsize=14)
152+
ax.set_xlim(0, df["cumulative_pos"].max() * 1.01)
153+
154+
# Set y-axis
155+
ax.set_ylim(0, df["-log10p"].max() * 1.1)
156+
157+
# Labels and styling
158+
ax.set_xlabel("Chromosome", fontsize=20)
159+
ax.set_ylabel("-log₁₀(p-value)", fontsize=20)
160+
ax.set_title("manhattan-gwas · matplotlib · pyplots.ai", fontsize=24)
161+
ax.tick_params(axis="y", labelsize=16)
162+
ax.tick_params(axis="x", labelsize=14)
163+
164+
# Legend
165+
ax.legend(fontsize=14, loc="upper right", framealpha=0.9)
166+
167+
# Remove top and right spines for cleaner look
168+
ax.spines["top"].set_visible(False)
169+
ax.spines["right"].set_visible(False)
170+
171+
plt.tight_layout()
172+
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
library: matplotlib
2+
specification_id: manhattan-gwas
3+
created: '2025-12-31T05:31:23Z'
4+
updated: '2025-12-31T05:37:51Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20612787947
7+
issue: 2925
8+
python_version: 3.13.11
9+
library_version: 3.10.8
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/matplotlib/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/matplotlib/plot_thumb.png
12+
preview_html: null
13+
quality_score: 92
14+
review:
15+
strengths:
16+
- Excellent chromosome visualization with alternating blue colors providing clear
17+
visual distinction between chromosomes
18+
- Proper highlighting of significant SNPs with larger red markers and white edges
19+
that stand out clearly
20+
- Realistic GWAS data simulation with appropriate chromosome sizes, SNP densities,
21+
and significant peaks on specific chromosomes
22+
- Clean code structure with good use of rasterized=True for performance optimization
23+
with 110k+ data points
24+
- Both genome-wide and suggestive threshold lines included with clear legend
25+
weaknesses:
26+
- No grid lines present (minor - acceptable for Manhattan plots to reduce clutter)
27+
- Y-axis label could include clearer notation

0 commit comments

Comments
 (0)