|
| 1 | +""" pyplots.ai |
| 2 | +manhattan-gwas: Manhattan Plot for GWAS |
| 3 | +Library: bokeh 3.8.1 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from bokeh.io import export_png, output_file, save |
| 10 | +from bokeh.models import ColumnDataSource, Label, Span |
| 11 | +from bokeh.plotting import figure |
| 12 | + |
| 13 | + |
| 14 | +# Data - Simulated GWAS data with significant peaks |
| 15 | +np.random.seed(42) |
| 16 | + |
| 17 | +# Chromosome sizes (simplified, in millions of base pairs) |
| 18 | +chrom_sizes = { |
| 19 | + "1": 248, |
| 20 | + "2": 242, |
| 21 | + "3": 198, |
| 22 | + "4": 190, |
| 23 | + "5": 181, |
| 24 | + "6": 170, |
| 25 | + "7": 159, |
| 26 | + "8": 145, |
| 27 | + "9": 138, |
| 28 | + "10": 133, |
| 29 | + "11": 135, |
| 30 | + "12": 133, |
| 31 | + "13": 114, |
| 32 | + "14": 107, |
| 33 | + "15": 101, |
| 34 | + "16": 90, |
| 35 | + "17": 83, |
| 36 | + "18": 80, |
| 37 | + "19": 58, |
| 38 | + "20": 64, |
| 39 | + "21": 46, |
| 40 | + "22": 50, |
| 41 | +} |
| 42 | + |
| 43 | +# Generate SNP data |
| 44 | +n_snps_per_chrom = 2000 |
| 45 | +data = [] |
| 46 | +cumulative_pos = 0 |
| 47 | +chrom_centers = {} |
| 48 | + |
| 49 | +for chrom, size in chrom_sizes.items(): |
| 50 | + # Random positions within chromosome |
| 51 | + positions = np.sort(np.random.randint(1, size * 1_000_000, n_snps_per_chrom)) |
| 52 | + |
| 53 | + # Base p-values (mostly non-significant) |
| 54 | + p_values = np.random.uniform(0.001, 1.0, n_snps_per_chrom) |
| 55 | + |
| 56 | + # Add some significant peaks for certain chromosomes |
| 57 | + if chrom in ["2", "6", "11", "17"]: |
| 58 | + # Add 5-15 significant SNPs per peak chromosome |
| 59 | + n_significant = np.random.randint(5, 15) |
| 60 | + peak_indices = np.random.choice(n_snps_per_chrom, n_significant, replace=False) |
| 61 | + p_values[peak_indices] = 10 ** np.random.uniform(-12, -8, n_significant) |
| 62 | + |
| 63 | + # Calculate cumulative position |
| 64 | + cumulative_positions = positions + cumulative_pos |
| 65 | + chrom_centers[chrom] = cumulative_pos + (size * 1_000_000) / 2 |
| 66 | + |
| 67 | + for i in range(n_snps_per_chrom): |
| 68 | + data.append( |
| 69 | + { |
| 70 | + "chromosome": chrom, |
| 71 | + "position": positions[i], |
| 72 | + "cumulative_pos": cumulative_positions[i], |
| 73 | + "p_value": p_values[i], |
| 74 | + "neg_log_p": -np.log10(p_values[i]), |
| 75 | + } |
| 76 | + ) |
| 77 | + |
| 78 | + cumulative_pos += size * 1_000_000 |
| 79 | + |
| 80 | +df = pd.DataFrame(data) |
| 81 | + |
| 82 | +# Assign colors based on chromosome (alternating) |
| 83 | +colors = ["#306998", "#7BA3C9"] # Python Blue and lighter shade |
| 84 | +df["color"] = df["chromosome"].apply(lambda x: colors[int(x) % 2]) |
| 85 | + |
| 86 | +# Highlight significant SNPs (above genome-wide threshold) |
| 87 | +significance_threshold = -np.log10(5e-8) # ~7.3 |
| 88 | +significant_mask = df["neg_log_p"] >= significance_threshold |
| 89 | +df.loc[significant_mask, "color"] = "#FFD43B" # Python Yellow for significant |
| 90 | + |
| 91 | +# Adjust point sizes - smaller for non-significant, larger for significant |
| 92 | +df["size"] = 6 |
| 93 | +df.loc[significant_mask, "size"] = 12 |
| 94 | + |
| 95 | +# Create plot |
| 96 | +source = ColumnDataSource(df) |
| 97 | + |
| 98 | +p = figure( |
| 99 | + width=4800, |
| 100 | + height=2700, |
| 101 | + title="manhattan-gwas · bokeh · pyplots.ai", |
| 102 | + x_axis_label="Genomic Position", |
| 103 | + y_axis_label="-log₁₀(p-value)", |
| 104 | + tools="pan,wheel_zoom,box_zoom,reset,save", |
| 105 | +) |
| 106 | + |
| 107 | +# Scatter plot |
| 108 | +p.scatter(x="cumulative_pos", y="neg_log_p", source=source, size="size", color="color", alpha=0.7, line_color=None) |
| 109 | + |
| 110 | +# Genome-wide significance threshold line (p < 5e-8) |
| 111 | +significance_line = Span( |
| 112 | + location=significance_threshold, dimension="width", line_color="#E63946", line_dash="dashed", line_width=3 |
| 113 | +) |
| 114 | +p.add_layout(significance_line) |
| 115 | + |
| 116 | +# Suggestive threshold line (p < 1e-5) |
| 117 | +suggestive_threshold = -np.log10(1e-5) # = 5 |
| 118 | +suggestive_line = Span( |
| 119 | + location=suggestive_threshold, dimension="width", line_color="#2A9D8F", line_dash="dotted", line_width=2 |
| 120 | +) |
| 121 | +p.add_layout(suggestive_line) |
| 122 | + |
| 123 | +# Add threshold labels |
| 124 | +sig_label = Label( |
| 125 | + x=cumulative_pos * 0.98, |
| 126 | + y=significance_threshold + 0.3, |
| 127 | + text="p = 5×10⁻⁸", |
| 128 | + text_font_size="16pt", |
| 129 | + text_color="#E63946", |
| 130 | +) |
| 131 | +p.add_layout(sig_label) |
| 132 | + |
| 133 | +sug_label = Label( |
| 134 | + x=cumulative_pos * 0.98, |
| 135 | + y=suggestive_threshold + 0.3, |
| 136 | + text="p = 1×10⁻⁵", |
| 137 | + text_font_size="16pt", |
| 138 | + text_color="#2A9D8F", |
| 139 | +) |
| 140 | +p.add_layout(sug_label) |
| 141 | + |
| 142 | +# Style the plot |
| 143 | +p.title.text_font_size = "28pt" |
| 144 | +p.xaxis.axis_label_text_font_size = "22pt" |
| 145 | +p.yaxis.axis_label_text_font_size = "22pt" |
| 146 | +p.xaxis.major_label_text_font_size = "16pt" |
| 147 | +p.yaxis.major_label_text_font_size = "18pt" |
| 148 | + |
| 149 | +# Hide x-axis tick labels (we'll add chromosome labels instead) |
| 150 | +p.xaxis.major_label_text_font_size = "0pt" |
| 151 | +p.xaxis.major_tick_line_color = None |
| 152 | +p.xaxis.minor_tick_line_color = None |
| 153 | + |
| 154 | +# Add chromosome labels below the plot |
| 155 | +for chrom, center in chrom_centers.items(): |
| 156 | + chrom_label = Label(x=center, y=-0.8, text=chrom, text_font_size="14pt", text_align="center", text_color="#333333") |
| 157 | + p.add_layout(chrom_label) |
| 158 | + |
| 159 | +# Grid styling |
| 160 | +p.xgrid.grid_line_alpha = 0.3 |
| 161 | +p.ygrid.grid_line_alpha = 0.3 |
| 162 | + |
| 163 | +# Background |
| 164 | +p.background_fill_color = "#FAFAFA" |
| 165 | +p.border_fill_color = "#FFFFFF" |
| 166 | + |
| 167 | +# Axis styling |
| 168 | +p.yaxis.axis_line_width = 2 |
| 169 | +p.xaxis.axis_line_width = 2 |
| 170 | + |
| 171 | +# Set y-axis range to show chromosome labels |
| 172 | +p.y_range.start = -1.5 |
| 173 | +p.y_range.end = df["neg_log_p"].max() + 1 |
| 174 | + |
| 175 | +# Save PNG |
| 176 | +export_png(p, filename="plot.png") |
| 177 | + |
| 178 | +# Save HTML for interactivity |
| 179 | +output_file("plot.html") |
| 180 | +save(p) |
0 commit comments