Skip to content

Commit 3bef521

Browse files
feat(seaborn): implement manhattan-gwas (#2941)
## Implementation: `manhattan-gwas` - seaborn Implements the **seaborn** version of `manhattan-gwas`. **File:** `plots/manhattan-gwas/implementations/seaborn.py` **Parent Issue:** #2925 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20612790719)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 13ba55d commit 3bef521

2 files changed

Lines changed: 165 additions & 0 deletions

File tree

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
""" pyplots.ai
2+
manhattan-gwas: Manhattan Plot for GWAS
3+
Library: seaborn 0.13.2 | Python 3.13.11
4+
Quality: 92/100 | Created: 2025-12-31
5+
"""
6+
7+
import matplotlib.pyplot as plt
8+
import numpy as np
9+
import pandas as pd
10+
import seaborn as sns
11+
12+
13+
# Data - Simulate GWAS data with realistic structure
14+
np.random.seed(42)
15+
16+
# Define chromosomes with approximate sizes (in Mb)
17+
chromosomes = [str(i) for i in range(1, 23)]
18+
chrom_sizes = [250, 243, 198, 190, 182, 171, 159, 145, 138, 133, 135, 133, 114, 107, 102, 90, 83, 80, 59, 64, 47, 51]
19+
20+
# Generate SNPs for each chromosome
21+
data = []
22+
cumulative_pos = 0
23+
chrom_centers = {}
24+
chrom_boundaries = [0]
25+
26+
for chrom, size in zip(chromosomes, chrom_sizes, strict=True):
27+
# Number of SNPs proportional to chromosome size
28+
n_snps = int(size * 40) # ~40 SNPs per Mb, total ~10k SNPs
29+
30+
# Random positions along chromosome
31+
positions = np.sort(np.random.randint(0, size * 1_000_000, n_snps))
32+
33+
# Generate p-values - mostly non-significant with some peaks
34+
# Use beta distribution to get realistic p-value distribution
35+
p_values = np.random.beta(1, 1, n_snps)
36+
37+
# Add significant peaks on specific chromosomes
38+
if chrom == "6": # Major peak on chr6 (like MHC region)
39+
peak_region = (positions > 25_000_000) & (positions < 35_000_000)
40+
p_values[peak_region] = 10 ** (-np.random.uniform(7, 12, peak_region.sum()))
41+
elif chrom == "11": # Moderate peak
42+
peak_region = (positions > 60_000_000) & (positions < 70_000_000)
43+
p_values[peak_region] = 10 ** (-np.random.uniform(6, 9, peak_region.sum()))
44+
elif chrom == "2": # Smaller peak
45+
peak_region = (positions > 100_000_000) & (positions < 110_000_000)
46+
p_values[peak_region] = 10 ** (-np.random.uniform(5.5, 8, peak_region.sum()))
47+
48+
# Calculate cumulative position
49+
cumulative_positions = positions + cumulative_pos
50+
51+
# Store center for axis label
52+
chrom_centers[chrom] = cumulative_pos + (size * 1_000_000) / 2
53+
54+
for pos, cum_pos, pval in zip(positions, cumulative_positions, p_values, strict=True):
55+
data.append(
56+
{
57+
"chromosome": chrom,
58+
"position": pos,
59+
"cumulative_position": cum_pos,
60+
"p_value": pval,
61+
"neg_log_p": -np.log10(pval),
62+
}
63+
)
64+
65+
cumulative_pos += size * 1_000_000
66+
chrom_boundaries.append(cumulative_pos)
67+
68+
df = pd.DataFrame(data)
69+
70+
# Create alternating color groups for chromosomes
71+
df["color_group"] = df["chromosome"].apply(lambda x: int(x) % 2)
72+
73+
# Plot
74+
fig, ax = plt.subplots(figsize=(16, 9))
75+
76+
# Create custom palette - Python Blue and a muted gray
77+
palette = {0: "#306998", 1: "#8B9DC3"}
78+
79+
# Plot points using seaborn scatterplot with hue for alternating colors
80+
sns.scatterplot(
81+
data=df,
82+
x="cumulative_position",
83+
y="neg_log_p",
84+
hue="color_group",
85+
palette=palette,
86+
s=15,
87+
alpha=0.7,
88+
edgecolor="none",
89+
legend=False,
90+
ax=ax,
91+
)
92+
93+
# Highlight significant SNPs (above genome-wide threshold)
94+
significant = df[df["neg_log_p"] > 7.3]
95+
if len(significant) > 0:
96+
ax.scatter(
97+
significant["cumulative_position"],
98+
significant["neg_log_p"],
99+
c="#FFD43B", # Python Yellow for significant hits
100+
s=40,
101+
edgecolor="black",
102+
linewidth=0.5,
103+
zorder=5,
104+
)
105+
106+
# Genome-wide significance threshold
107+
ax.axhline(y=7.3, color="#D62728", linestyle="--", linewidth=2, label="Genome-wide significance (p < 5×10⁻⁸)")
108+
109+
# Suggestive threshold
110+
ax.axhline(y=5, color="#7F7F7F", linestyle=":", linewidth=1.5, label="Suggestive (p < 1×10⁻⁵)")
111+
112+
# Set x-axis ticks at chromosome centers
113+
ax.set_xticks([chrom_centers[c] for c in chromosomes])
114+
ax.set_xticklabels(chromosomes)
115+
116+
# Styling
117+
ax.set_xlabel("Chromosome", fontsize=20)
118+
ax.set_ylabel("-log₁₀(p-value)", fontsize=20)
119+
ax.set_title("manhattan-gwas · seaborn · pyplots.ai", fontsize=24)
120+
ax.tick_params(axis="both", labelsize=14)
121+
ax.tick_params(axis="x", labelsize=12)
122+
123+
# Set axis limits
124+
ax.set_xlim(0, cumulative_pos)
125+
ax.set_ylim(0, max(df["neg_log_p"]) * 1.05)
126+
127+
# Remove top and right spines for cleaner look
128+
ax.spines["top"].set_visible(False)
129+
ax.spines["right"].set_visible(False)
130+
131+
# Add legend
132+
ax.legend(loc="upper right", fontsize=14, framealpha=0.9)
133+
134+
# Subtle grid on y-axis only
135+
ax.yaxis.grid(True, alpha=0.3, linestyle="-")
136+
ax.xaxis.grid(False)
137+
138+
plt.tight_layout()
139+
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
library: seaborn
2+
specification_id: manhattan-gwas
3+
created: '2025-12-31T05:32:05Z'
4+
updated: '2025-12-31T05:38:41Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20612790719
7+
issue: 2925
8+
python_version: 3.13.11
9+
library_version: 0.13.2
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/seaborn/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/manhattan-gwas/seaborn/plot_thumb.png
12+
preview_html: null
13+
quality_score: 92
14+
review:
15+
strengths:
16+
- Excellent biological realism with chromosome-specific significant peaks mimicking
17+
real GWAS patterns (MHC region on chr6)
18+
- Clean visual design with alternating chromosome colors and clear significance
19+
thresholds
20+
- Proper handling of dense data with appropriate marker size and transparency
21+
- Correct title format following pyplots.ai conventions
22+
- Yellow highlighting of significant SNPs provides excellent visual emphasis
23+
weaknesses:
24+
- Y-axis grid could be styled more subtly (current alpha=0.3 is slightly prominent)
25+
- Significant SNPs highlighting uses raw matplotlib instead of seaborn; could use
26+
a second scatterplot call for consistency

0 commit comments

Comments
 (0)