|
| 1 | +""" pyplots.ai |
| 2 | +manhattan-gwas: Manhattan Plot for GWAS |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from lets_plot import * |
| 10 | + |
| 11 | + |
| 12 | +LetsPlot.setup_html() |
| 13 | + |
| 14 | +# Set seed for reproducibility |
| 15 | +np.random.seed(42) |
| 16 | + |
| 17 | +# Generate simulated GWAS data |
| 18 | +n_snps_per_chrom = 2000 |
| 19 | +chromosomes = [str(i) for i in range(1, 23)] |
| 20 | + |
| 21 | +data = [] |
| 22 | +cumulative_pos = 0 |
| 23 | +chrom_centers = {} |
| 24 | + |
| 25 | +for i, chrom in enumerate(chromosomes): |
| 26 | + # Random positions within chromosome (scaled by chromosome "size") |
| 27 | + chrom_size = 250_000_000 - i * 5_000_000 # Varying chromosome sizes |
| 28 | + positions = np.sort(np.random.randint(1, chrom_size, n_snps_per_chrom)) |
| 29 | + |
| 30 | + # Generate p-values - mostly non-significant with some peaks |
| 31 | + p_values = np.random.uniform(0, 1, n_snps_per_chrom) |
| 32 | + |
| 33 | + # Add some significant peaks (simulate real GWAS signals) |
| 34 | + if chrom in ["2", "6", "11", "17"]: |
| 35 | + peak_idx = np.random.choice(n_snps_per_chrom, size=15, replace=False) |
| 36 | + p_values[peak_idx] = 10 ** (-np.random.uniform(6, 12, 15)) |
| 37 | + |
| 38 | + # Add suggestive signals to more chromosomes |
| 39 | + if chrom in ["1", "5", "8", "14", "19"]: |
| 40 | + suggestive_idx = np.random.choice(n_snps_per_chrom, size=10, replace=False) |
| 41 | + p_values[suggestive_idx] = 10 ** (-np.random.uniform(4.5, 7, 10)) |
| 42 | + |
| 43 | + # Calculate cumulative position for x-axis |
| 44 | + cumulative_positions = positions + cumulative_pos |
| 45 | + |
| 46 | + # Store chromosome center for labeling |
| 47 | + chrom_centers[chrom] = cumulative_pos + chrom_size / 2 |
| 48 | + |
| 49 | + for pos, cum_pos, pval in zip(positions, cumulative_positions, p_values): |
| 50 | + data.append( |
| 51 | + { |
| 52 | + "chromosome": chrom, |
| 53 | + "position": pos, |
| 54 | + "cumulative_pos": cum_pos, |
| 55 | + "p_value": pval, |
| 56 | + "neg_log10_p": -np.log10(pval), |
| 57 | + } |
| 58 | + ) |
| 59 | + |
| 60 | + cumulative_pos += chrom_size |
| 61 | + |
| 62 | +df = pd.DataFrame(data) |
| 63 | + |
| 64 | +# Create chromosome index for coloring (alternating pattern) |
| 65 | +df["chrom_idx"] = df["chromosome"].astype(int) % 2 |
| 66 | + |
| 67 | +# Assign colors based on alternating pattern |
| 68 | +df["color_group"] = df["chrom_idx"].map({0: "even", 1: "odd"}) |
| 69 | + |
| 70 | +# Significance thresholds |
| 71 | +genome_wide_threshold = -np.log10(5e-8) # ~7.3 |
| 72 | +suggestive_threshold = -np.log10(1e-5) # 5 |
| 73 | + |
| 74 | +# Mark significant SNPs |
| 75 | +df["significant"] = df["neg_log10_p"] >= genome_wide_threshold |
| 76 | + |
| 77 | +# Create x-axis tick positions and labels |
| 78 | +tick_positions = [chrom_centers[c] for c in chromosomes] |
| 79 | + |
| 80 | +# Create the Manhattan plot |
| 81 | +plot = ( |
| 82 | + ggplot(df, aes(x="cumulative_pos", y="neg_log10_p", color="color_group")) |
| 83 | + + geom_point(size=1.5, alpha=0.7) |
| 84 | + + scale_color_manual(values=["#306998", "#7A9BBD"], guide="none") |
| 85 | + # Highlight significant points |
| 86 | + + geom_point( |
| 87 | + data=df[df["significant"]], mapping=aes(x="cumulative_pos", y="neg_log10_p"), color="#DC2626", size=3, alpha=0.9 |
| 88 | + ) |
| 89 | + # Genome-wide significance threshold line |
| 90 | + + geom_hline(yintercept=genome_wide_threshold, linetype="dashed", color="#DC2626", size=0.8) |
| 91 | + # Suggestive threshold line |
| 92 | + + geom_hline(yintercept=suggestive_threshold, linetype="dotted", color="#666666", size=0.6) |
| 93 | + + labs(title="manhattan-gwas · letsplot · pyplots.ai", x="Chromosome", y="-log₁₀(p-value)") |
| 94 | + + scale_x_continuous(breaks=tick_positions, labels=chromosomes) |
| 95 | + + theme_minimal() |
| 96 | + + theme( |
| 97 | + plot_title=element_text(size=28, face="bold"), |
| 98 | + axis_title_x=element_text(size=22), |
| 99 | + axis_title_y=element_text(size=22), |
| 100 | + axis_text_x=element_text(size=14), |
| 101 | + axis_text_y=element_text(size=16), |
| 102 | + panel_grid_major_x=element_blank(), |
| 103 | + panel_grid_minor=element_blank(), |
| 104 | + ) |
| 105 | + + ggsize(1600, 900) |
| 106 | +) |
| 107 | + |
| 108 | +# Save as PNG (scale 3x for 4800x2700) |
| 109 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 110 | + |
| 111 | +# Save interactive HTML version |
| 112 | +ggsave(plot, "plot.html", path=".") |
0 commit comments