|
| 1 | +""" pyplots.ai |
| 2 | +manhattan-gwas: Manhattan Plot for GWAS |
| 3 | +Library: pygal 3.1.0 | Python 3.13.11 |
| 4 | +Quality: 82/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pygal |
| 9 | +from pygal.style import Style |
| 10 | + |
| 11 | + |
| 12 | +# Seed for reproducibility |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +# Chromosome lengths (simplified, in Mb scale) |
| 16 | +chrom_lengths = { |
| 17 | + "1": 249, |
| 18 | + "2": 243, |
| 19 | + "3": 198, |
| 20 | + "4": 191, |
| 21 | + "5": 182, |
| 22 | + "6": 171, |
| 23 | + "7": 159, |
| 24 | + "8": 146, |
| 25 | + "9": 141, |
| 26 | + "10": 136, |
| 27 | + "11": 135, |
| 28 | + "12": 134, |
| 29 | + "13": 115, |
| 30 | + "14": 107, |
| 31 | + "15": 102, |
| 32 | + "16": 90, |
| 33 | + "17": 83, |
| 34 | + "18": 80, |
| 35 | + "19": 59, |
| 36 | + "20": 64, |
| 37 | + "21": 47, |
| 38 | + "22": 51, |
| 39 | +} |
| 40 | + |
| 41 | +chromosomes = list(chrom_lengths.keys()) |
| 42 | +n_snps_per_chrom = 500 |
| 43 | + |
| 44 | +# Storage for data |
| 45 | +all_chroms = [] |
| 46 | +all_cumulative_pos = [] |
| 47 | +all_pvalues = [] |
| 48 | + |
| 49 | +# Track chromosome boundaries for x-axis labels |
| 50 | +chrom_midpoints = [] |
| 51 | +cumulative_offset = 0 |
| 52 | + |
| 53 | +for chrom in chromosomes: |
| 54 | + length = chrom_lengths[chrom] |
| 55 | + positions = np.sort(np.random.uniform(0, length, n_snps_per_chrom)) |
| 56 | + |
| 57 | + # Store chromosome midpoint for x-axis label |
| 58 | + chrom_midpoints.append(cumulative_offset + length / 2) |
| 59 | + |
| 60 | + # Generate p-values: mostly uniform with some significant hits |
| 61 | + pvalues = np.random.uniform(0, 1, n_snps_per_chrom) |
| 62 | + |
| 63 | + # Add significant peaks on selected chromosomes |
| 64 | + if chrom in ["6", "11", "16"]: |
| 65 | + n_sig = np.random.randint(3, 8) |
| 66 | + sig_indices = np.random.choice(n_snps_per_chrom, n_sig, replace=False) |
| 67 | + pvalues[sig_indices] = 10 ** (-np.random.uniform(8, 15, n_sig)) |
| 68 | + |
| 69 | + # Add suggestive signals |
| 70 | + n_sugg = np.random.randint(5, 15) |
| 71 | + sugg_indices = np.random.choice(n_snps_per_chrom, n_sugg, replace=False) |
| 72 | + pvalues[sugg_indices] = 10 ** (-np.random.uniform(5, 8, n_sugg)) |
| 73 | + |
| 74 | + cumulative_positions = positions + cumulative_offset |
| 75 | + all_chroms.extend([chrom] * n_snps_per_chrom) |
| 76 | + all_cumulative_pos.extend(cumulative_positions) |
| 77 | + all_pvalues.extend(pvalues) |
| 78 | + |
| 79 | + cumulative_offset += length |
| 80 | + |
| 81 | +# Convert to -log10 p-values |
| 82 | +neg_log_pvalues = -np.log10(np.array(all_pvalues)) |
| 83 | + |
| 84 | +# Thresholds |
| 85 | +genome_wide_sig = -np.log10(5e-8) # ~7.3 |
| 86 | +suggestive_sig = 5.0 # -log10(1e-5) |
| 87 | + |
| 88 | +# Custom style with explicit colors for threshold lines |
| 89 | +custom_style = Style( |
| 90 | + background="white", |
| 91 | + plot_background="white", |
| 92 | + foreground="#333333", |
| 93 | + foreground_strong="#333333", |
| 94 | + foreground_subtle="#666666", |
| 95 | + colors=( |
| 96 | + "#306998", # Blue - odd chromosomes |
| 97 | + "#888888", # Gray - even chromosomes |
| 98 | + "#D62728", # Red - significant points above threshold |
| 99 | + "#D62728", # Red - genome-wide threshold line |
| 100 | + "#FF7F0E", # Orange - suggestive threshold line |
| 101 | + ), |
| 102 | + title_font_size=56, |
| 103 | + label_font_size=36, |
| 104 | + major_label_font_size=32, |
| 105 | + legend_font_size=32, |
| 106 | + value_font_size=20, |
| 107 | + font_family="sans-serif", |
| 108 | +) |
| 109 | + |
| 110 | +# Create XY scatter chart |
| 111 | +chart = pygal.XY( |
| 112 | + width=4800, |
| 113 | + height=2700, |
| 114 | + style=custom_style, |
| 115 | + title="manhattan-gwas · pygal · pyplots.ai", |
| 116 | + x_title="Chromosome", |
| 117 | + y_title="-log₁₀(p-value)", |
| 118 | + show_legend=True, |
| 119 | + legend_at_bottom=True, |
| 120 | + legend_at_bottom_columns=5, |
| 121 | + legend_box_size=24, |
| 122 | + stroke=False, |
| 123 | + dots_size=5, |
| 124 | + show_x_guides=False, |
| 125 | + show_y_guides=True, |
| 126 | + truncate_label=-1, |
| 127 | + print_values=False, |
| 128 | + x_label_rotation=0, |
| 129 | + range=(0, 16), |
| 130 | + xrange=(0, cumulative_offset), |
| 131 | + tooltip_border_radius=10, |
| 132 | + explicit_size=True, |
| 133 | + spacing=30, |
| 134 | + margin=60, |
| 135 | + margin_bottom=200, |
| 136 | + margin_top=100, |
| 137 | +) |
| 138 | + |
| 139 | +# Create x_labels with chromosome numbers at midpoints |
| 140 | +# Use string labels for chromosomes 1-22 |
| 141 | +chart.x_labels = [str(i + 1) for i in range(len(chromosomes))] |
| 142 | +chart.x_labels_major_count = len(chromosomes) |
| 143 | + |
| 144 | +# Map positions to display label indices |
| 145 | +# We need to normalize x values to show chromosome labels |
| 146 | +# Pygal XY chart needs numeric x_labels for scatter, so we'll use custom formatter |
| 147 | +label_positions = chrom_midpoints.copy() |
| 148 | + |
| 149 | + |
| 150 | +def format_x_label(x_val): |
| 151 | + """Find closest chromosome midpoint and return chromosome number.""" |
| 152 | + if not label_positions: |
| 153 | + return "" |
| 154 | + min_dist = float("inf") |
| 155 | + closest_idx = 0 |
| 156 | + for i, pos in enumerate(label_positions): |
| 157 | + dist = abs(x_val - pos) |
| 158 | + if dist < min_dist: |
| 159 | + min_dist = dist |
| 160 | + closest_idx = i |
| 161 | + # Only return label if close to midpoint (within half chromosome width) |
| 162 | + if min_dist < 50: |
| 163 | + return str(closest_idx + 1) |
| 164 | + return "" |
| 165 | + |
| 166 | + |
| 167 | +# Set numeric x_labels at chromosome midpoints |
| 168 | +chart.x_labels = chrom_midpoints |
| 169 | +chart.x_value_formatter = lambda x: format_x_label(x) if x in chrom_midpoints else "" |
| 170 | + |
| 171 | +# Prepare data by chromosome with alternating colors |
| 172 | +odd_chrom_points = [] |
| 173 | +even_chrom_points = [] |
| 174 | +significant_points = [] # Points above genome-wide significance |
| 175 | + |
| 176 | +for idx, chrom in enumerate(chromosomes): |
| 177 | + chrom_mask = [c == chrom for c in all_chroms] |
| 178 | + chrom_x = [all_cumulative_pos[j] for j in range(len(all_chroms)) if chrom_mask[j]] |
| 179 | + chrom_y = [neg_log_pvalues[j] for j in range(len(all_chroms)) if chrom_mask[j]] |
| 180 | + |
| 181 | + for x, y in zip(chrom_x, chrom_y, strict=True): |
| 182 | + point = {"value": (x, y), "label": f"Chr {chrom}: {x:.1f} Mb, -log₁₀(p)={y:.2f}"} |
| 183 | + if y >= genome_wide_sig: |
| 184 | + significant_points.append(point) |
| 185 | + elif idx % 2 == 0: |
| 186 | + odd_chrom_points.append(point) |
| 187 | + else: |
| 188 | + even_chrom_points.append(point) |
| 189 | + |
| 190 | +# Add chromosome data series (blue for odd, gray for even) |
| 191 | +chart.add("Odd chromosomes", odd_chrom_points, stroke=False, show_dots=True) |
| 192 | +chart.add("Even chromosomes", even_chrom_points, stroke=False, show_dots=True) |
| 193 | + |
| 194 | +# Add significant points as separate series (highlighted) |
| 195 | +chart.add("Significant (p<5×10⁻⁸)", significant_points, stroke=False, show_dots=True) |
| 196 | + |
| 197 | +# Add threshold lines as dense scatter points (works better in PNG than stroke) |
| 198 | +# Using many small dots to create visible line effect |
| 199 | +n_line_points = 200 |
| 200 | +threshold_x = np.linspace(10, cumulative_offset - 10, n_line_points) |
| 201 | + |
| 202 | +# Genome-wide significance threshold line (y ≈ 7.3) |
| 203 | +gw_line_points = [ |
| 204 | + {"value": (x, genome_wide_sig), "label": f"Genome-wide threshold: -log₁₀(5×10⁻⁸) = {genome_wide_sig:.1f}"} |
| 205 | + for x in threshold_x |
| 206 | +] |
| 207 | +chart.add("p = 5×10⁻⁸ threshold", gw_line_points, stroke=True, show_dots=True, dots_size=2) |
| 208 | + |
| 209 | +# Suggestive threshold line (y = 5) |
| 210 | +sugg_line_points = [ |
| 211 | + {"value": (x, suggestive_sig), "label": f"Suggestive threshold: -log₁₀(1×10⁻⁵) = {suggestive_sig:.1f}"} |
| 212 | + for x in threshold_x |
| 213 | +] |
| 214 | +chart.add("p = 1×10⁻⁵ threshold", sugg_line_points, stroke=True, show_dots=True, dots_size=2) |
| 215 | + |
| 216 | +# Save outputs |
| 217 | +chart.render_to_file("plot.html") |
| 218 | +chart.render_to_png("plot.png") |
0 commit comments