|
| 1 | +""" pyplots.ai |
| 2 | +manhattan-gwas: Manhattan Plot for GWAS |
| 3 | +Library: altair 6.0.0 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import altair as alt |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | + |
| 12 | +# Data - Simulated GWAS data with random p-values and significant peaks |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +# Chromosome lengths (approximate human chromosome sizes in Mb) |
| 16 | +chrom_lengths = { |
| 17 | + "1": 249, |
| 18 | + "2": 243, |
| 19 | + "3": 198, |
| 20 | + "4": 191, |
| 21 | + "5": 182, |
| 22 | + "6": 171, |
| 23 | + "7": 159, |
| 24 | + "8": 145, |
| 25 | + "9": 138, |
| 26 | + "10": 134, |
| 27 | + "11": 135, |
| 28 | + "12": 133, |
| 29 | + "13": 114, |
| 30 | + "14": 107, |
| 31 | + "15": 102, |
| 32 | + "16": 90, |
| 33 | + "17": 83, |
| 34 | + "18": 80, |
| 35 | + "19": 59, |
| 36 | + "20": 64, |
| 37 | + "21": 47, |
| 38 | + "22": 51, |
| 39 | +} |
| 40 | + |
| 41 | +# Generate SNPs for each chromosome |
| 42 | +data = [] |
| 43 | +cumulative_pos = 0 |
| 44 | +chrom_centers = {} |
| 45 | + |
| 46 | +for chrom, length in chrom_lengths.items(): |
| 47 | + # Number of SNPs proportional to chromosome length |
| 48 | + n_snps = int(length * 20) # ~20 SNPs per Mb |
| 49 | + positions = np.sort(np.random.randint(1, length * 1_000_000, n_snps)) |
| 50 | + |
| 51 | + # Base p-values (mostly non-significant) |
| 52 | + pvalues = np.random.uniform(0.001, 1.0, n_snps) |
| 53 | + |
| 54 | + # Add some significant peaks on specific chromosomes |
| 55 | + if chrom in ["2", "6", "8", "15"]: |
| 56 | + n_sig = np.random.randint(5, 15) |
| 57 | + sig_indices = np.random.choice(n_snps, n_sig, replace=False) |
| 58 | + pvalues[sig_indices] = 10 ** np.random.uniform(-10, -7, n_sig) |
| 59 | + |
| 60 | + # Add suggestive hits on other chromosomes |
| 61 | + if chrom in ["3", "11", "19"]: |
| 62 | + n_sug = np.random.randint(3, 8) |
| 63 | + sug_indices = np.random.choice(n_snps, n_sug, replace=False) |
| 64 | + pvalues[sug_indices] = 10 ** np.random.uniform(-6, -5, n_sug) |
| 65 | + |
| 66 | + # Calculate cumulative positions |
| 67 | + cum_positions = positions + cumulative_pos |
| 68 | + chrom_centers[chrom] = cumulative_pos + (length * 1_000_000) / 2 |
| 69 | + |
| 70 | + for pos, cum_pos, pval in zip(positions, cum_positions, pvalues, strict=True): |
| 71 | + data.append( |
| 72 | + { |
| 73 | + "chromosome": chrom, |
| 74 | + "position": pos, |
| 75 | + "cumulative_position": cum_pos, |
| 76 | + "p_value": pval, |
| 77 | + "neg_log_p": -np.log10(pval), |
| 78 | + } |
| 79 | + ) |
| 80 | + |
| 81 | + cumulative_pos += length * 1_000_000 |
| 82 | + |
| 83 | +df = pd.DataFrame(data) |
| 84 | + |
| 85 | +# Threshold values |
| 86 | +genome_wide_threshold = -np.log10(5e-8) # ~7.3 |
| 87 | +suggestive_threshold = -np.log10(1e-5) # 5.0 |
| 88 | + |
| 89 | +# Create chromosome label data |
| 90 | +chrom_label_df = pd.DataFrame( |
| 91 | + [{"chrom_label": chrom, "center": center, "y_pos": -0.5} for chrom, center in chrom_centers.items()] |
| 92 | +) |
| 93 | + |
| 94 | +# Define color scheme - alternating for chromosomes |
| 95 | +color_scale = alt.Scale( |
| 96 | + domain=[str(i) for i in range(1, 23)], |
| 97 | + range=["#306998", "#7F7F7F"] * 11, # Alternating Python blue and gray |
| 98 | +) |
| 99 | + |
| 100 | +# Main scatter plot |
| 101 | +points = ( |
| 102 | + alt.Chart(df) |
| 103 | + .mark_circle(opacity=0.7) |
| 104 | + .encode( |
| 105 | + x=alt.X( |
| 106 | + "cumulative_position:Q", |
| 107 | + axis=alt.Axis(title="Chromosome", labels=False, ticks=False, titleFontSize=22), |
| 108 | + scale=alt.Scale(domain=[0, cumulative_pos]), |
| 109 | + ), |
| 110 | + y=alt.Y( |
| 111 | + "neg_log_p:Q", |
| 112 | + title="-log₁₀(p-value)", |
| 113 | + axis=alt.Axis(titleFontSize=22, labelFontSize=16), |
| 114 | + scale=alt.Scale(domain=[0, max(df["neg_log_p"]) + 1]), |
| 115 | + ), |
| 116 | + color=alt.Color("chromosome:N", scale=color_scale, legend=None), |
| 117 | + size=alt.condition( |
| 118 | + alt.datum.neg_log_p > genome_wide_threshold, |
| 119 | + alt.value(100), # Larger for significant hits |
| 120 | + alt.value(30), # Smaller for others |
| 121 | + ), |
| 122 | + tooltip=["chromosome:N", "position:Q", "p_value:Q", "neg_log_p:Q"], |
| 123 | + ) |
| 124 | +) |
| 125 | + |
| 126 | +# Genome-wide significance threshold line |
| 127 | +gw_line = ( |
| 128 | + alt.Chart(pd.DataFrame({"y": [genome_wide_threshold]})) |
| 129 | + .mark_rule(strokeDash=[8, 4], color="#E74C3C", strokeWidth=2) |
| 130 | + .encode(y="y:Q") |
| 131 | +) |
| 132 | + |
| 133 | +# Suggestive threshold line |
| 134 | +sug_line = ( |
| 135 | + alt.Chart(pd.DataFrame({"y": [suggestive_threshold]})) |
| 136 | + .mark_rule(strokeDash=[4, 4], color="#F39C12", strokeWidth=2) |
| 137 | + .encode(y="y:Q") |
| 138 | +) |
| 139 | + |
| 140 | +# Chromosome labels as text marks at bottom |
| 141 | +chrom_text = ( |
| 142 | + alt.Chart(chrom_label_df) |
| 143 | + .mark_text(fontSize=14, fontWeight="bold", baseline="top", dy=5) |
| 144 | + .encode(x=alt.X("center:Q", scale=alt.Scale(domain=[0, cumulative_pos])), text="chrom_label:N") |
| 145 | +) |
| 146 | + |
| 147 | +# Combine layers - points with threshold lines |
| 148 | +main_chart = alt.layer(points, gw_line, sug_line).properties(width=1500, height=750) |
| 149 | + |
| 150 | +# Chromosome labels at bottom |
| 151 | +labels_chart = ( |
| 152 | + alt.Chart(chrom_label_df) |
| 153 | + .mark_text(fontSize=16, baseline="middle") |
| 154 | + .encode(x=alt.X("center:Q", scale=alt.Scale(domain=[0, cumulative_pos]), axis=None), text="chrom_label:N") |
| 155 | + .properties(width=1500, height=30) |
| 156 | +) |
| 157 | + |
| 158 | +# Vertical concat with shared x-axis |
| 159 | +chart = ( |
| 160 | + alt.vconcat(main_chart, labels_chart, spacing=0) |
| 161 | + .properties(title=alt.Title("manhattan-gwas · altair · pyplots.ai", fontSize=28, anchor="middle")) |
| 162 | + .configure_axis(labelFontSize=16, titleFontSize=22, grid=True, gridOpacity=0.3) |
| 163 | + .configure_view(strokeWidth=0) |
| 164 | +) |
| 165 | + |
| 166 | +# Save as PNG and HTML |
| 167 | +chart.save("plot.png", scale_factor=3.0) |
| 168 | +chart.save("plot.html") |
0 commit comments