Skip to content

Commit e331970

Browse files
committed
Add analyses for 2026 study
1 parent 546027b commit e331970

9 files changed

Lines changed: 6364 additions & 0 deletions

analyses/.DS_Store

6 KB
Binary file not shown.

analyses/01_Covariates.ipynb

Lines changed: 911 additions & 0 deletions
Large diffs are not rendered by default.

analyses/02_GWAS_GP2.ipynb

Lines changed: 608 additions & 0 deletions
Large diffs are not rendered by default.

analyses/03_Munge_Sumstats.ipynb

Lines changed: 1292 additions & 0 deletions
Large diffs are not rendered by default.

analyses/04_Meta_Analysis.ipynb

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "99fcfef3-26d9-4ffd-b3d8-b64dfd437a3b",
6+
"metadata": {},
7+
"source": [
8+
"# Meta-analysis \n",
9+
"1. AAC meta\n",
10+
"2. AFR/AAC meta"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 4,
16+
"id": "5417430a-1122-4a63-b107-62b911338cf7",
17+
"metadata": {},
18+
"outputs": [
19+
{
20+
"name": "stdout",
21+
"output_type": "stream",
22+
"text": [
23+
"PACKAGE VERSIONS (29-DEC-2025)\n",
24+
"\tseaborn==0.13.2\n",
25+
"\tstatsmodels==0.14.4\n",
26+
"\tmatplotlib==3.7.2\n",
27+
"\tnumpy==1.24.4\n",
28+
"\tpandas==2.0.3\n",
29+
"\tscipy==1.11.1\n",
30+
"\n",
31+
"PYTHON INFO\n",
32+
"\tPython executable: /usr/local/Anaconda/envs/py3.10/bin/python\n"
33+
]
34+
}
35+
],
36+
"source": [
37+
"## Import the necessary packages \n",
38+
"import os\n",
39+
"import numpy as np\n",
40+
"import pandas as pd\n",
41+
"import math\n",
42+
"import sys\n",
43+
"import subprocess\n",
44+
"import statsmodels.api as sm\n",
45+
"import scipy\n",
46+
"from scipy import stats\n",
47+
"from scipy.stats import chi2\n",
48+
"import matplotlib.pyplot as plt\n",
49+
"import seaborn as sns\n",
50+
"from IPython.display import display\n",
51+
"\n",
52+
"## Print out package versions\n",
53+
"## Getting packages loaded into this notebook and their versions to allow for reproducibility\n",
54+
"import pkg_resources\n",
55+
"import types\n",
56+
"from datetime import date\n",
57+
"\n",
58+
"today = date.today()\n",
59+
"date = today.strftime(\"%d-%b-%Y\").upper()\n",
60+
"\n",
61+
"## Define function \n",
62+
"def get_imports():\n",
63+
" for name, val in globals().items():\n",
64+
" if isinstance(val, types.ModuleType):\n",
65+
" name = val.__name__.split(\".\")[0]\n",
66+
" elif isinstance(val, type):\n",
67+
" name = val.__module__.split(\".\")[0]\n",
68+
"\n",
69+
" poorly_named_packages = {\n",
70+
" \"PIL\": \"Pillow\",\n",
71+
" \"sklearn\": \"scikit-learn\"\n",
72+
" }\n",
73+
" if name in poorly_named_packages:\n",
74+
" name = poorly_named_packages[name]\n",
75+
"\n",
76+
" yield name\n",
77+
"\n",
78+
"## Get a list of packages imported \n",
79+
"imports = list(set(get_imports()))\n",
80+
"\n",
81+
"requirements = []\n",
82+
"for m in pkg_resources.working_set:\n",
83+
" if m.project_name in imports and m.project_name != \"pip\":\n",
84+
" requirements.append((m.project_name, m.version))\n",
85+
"\n",
86+
"## Print out packages and versions \n",
87+
"print(f\"PACKAGE VERSIONS ({date})\")\n",
88+
"for r in requirements:\n",
89+
" print(\"\\t{}=={}\".format(*r))\n",
90+
"\n",
91+
"## Also print which Python is being used\n",
92+
"print(\"\\nPYTHON INFO\")\n",
93+
"print(f\"\\tPython executable: {sys.executable}\")"
94+
]
95+
},
96+
{
97+
"cell_type": "markdown",
98+
"id": "a3cb3410-4939-47c0-8359-68ec46d369b1",
99+
"metadata": {},
100+
"source": [
101+
"## AAC Only Meta: GP2 AAC / 23andMe AAC / MVP AAC"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": null,
107+
"id": "7a4953ef-5423-48d9-9fd1-2396cd6e25f6",
108+
"metadata": {},
109+
"outputs": [],
110+
"source": [
111+
"%%bash\n",
112+
"ml plink/1.9\n",
113+
"\n",
114+
"plink --meta-analysis \\\n",
115+
"${WORK_DIR}/data/GP2_R11/AAC/GP2_AAC_GWAS_R11.wAlleles.FOR_PLINK.txt \\\n",
116+
"${WORK_DIR}/data/23andMe/23andMe_AAC.wAlleles.v2.FOR_PLINK.txt \\\n",
117+
"${WORK_DIR}/data/MVP/MVP_AAC.wAlleles.FOR_PLINK.txt \\\n",
118+
"+ logscale weighted-z study qt \\\n",
119+
"--out ${WORK_DIR}/results/AAC_META_GP2_23andMe_MVP/GP2_R11_AAC_23andMe_MVP_GWAS.wAlleles.PLINK_meta"
120+
]
121+
},
122+
{
123+
"cell_type": "markdown",
124+
"id": "c13e37cb-8a26-43be-a810-391cbddea3d6",
125+
"metadata": {},
126+
"source": [
127+
"Write out METAL command (here for documentation -- needs to be run interactively!)\n",
128+
"\n",
129+
"```bash\n",
130+
"module load metal \n",
131+
"metal << EOT \n",
132+
"\n",
133+
"# GENRAL OPTIONS\n",
134+
"SCHEME STDERR\n",
135+
"GENOMICCONTROL OFF\n",
136+
"AVERAGEFREQ ON\n",
137+
"MINMAXFREQ ON\n",
138+
"\n",
139+
"# === 1st: GP2 AAC R11 - munged ===\n",
140+
"MARKER SNP\n",
141+
"CHROMOSOMELABEL CHR\n",
142+
"POSITIONLABEL BP\n",
143+
"ALLELE A1 A2\n",
144+
"FREQ EAF\n",
145+
"EFFECT BETA\n",
146+
"STDERR SE\n",
147+
"PVALUE P\n",
148+
"WEIGHT NMISS\n",
149+
"PROCESS /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/data/GP2_R11/AAC/GP2_AAC_GWAS_R11.wAlleles.FOR_PLINK.txt\n",
150+
"\n",
151+
"# === 2nd: 23andMe AAC - munged ===\n",
152+
"MARKER SNP\n",
153+
"CHROMOSOMELABEL CHR\n",
154+
"POSITIONLABEL BP\n",
155+
"ALLELE A1 A2\n",
156+
"FREQ EAF\n",
157+
"EFFECT BETA\n",
158+
"STDERR SE\n",
159+
"PVALUE P\n",
160+
"WEIGHT NMISS \n",
161+
"PROCESS /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/data/23andMe/23andMe_AAC.wAlleles.v2.FOR_PLINK.txt\n",
162+
"\n",
163+
"# === 3rd: MVP AAC - munged ===\n",
164+
"MARKER SNP\n",
165+
"CHROMOSOMELABEL CHR\n",
166+
"POSITIONLABEL BP\n",
167+
"ALLELE A1 A2\n",
168+
"FREQ EAF\n",
169+
"EFFECT BETA\n",
170+
"STDERR SE\n",
171+
"PVALUE P\n",
172+
"WEIGHT NMISS \n",
173+
"PROCESS /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/data/MVP/MVP_AAC.wAlleles.FOR_PLINK.txt\n",
174+
"\n",
175+
"OUTFILE /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/results/AAC_META_GP2_23andMe_MVP/GP2_R11_AAC_23andMe_MVP_GWAS.wAlleles.METAL_meta .meta\n",
176+
"ANALYZE HETEROGENEITY\n",
177+
"QUIT\n",
178+
"```\n",
179+
"\n",
180+
"**Then Control+D to submit job!**"
181+
]
182+
},
183+
{
184+
"cell_type": "markdown",
185+
"id": "28a57347-066b-4f6f-ad4f-272739b23ced",
186+
"metadata": {},
187+
"source": [
188+
"## AFR/AAC Meta "
189+
]
190+
},
191+
{
192+
"cell_type": "code",
193+
"execution_count": null,
194+
"id": "6c3b95aa-1402-42bc-9bc5-f02c7f6916a6",
195+
"metadata": {},
196+
"outputs": [],
197+
"source": [
198+
"%%bash\n",
199+
"ml plink/1.9\n",
200+
"\n",
201+
"plink --meta-analysis \\\n",
202+
"${WORK_DIR}/data/GP2_R11/AAC/GP2_AAC_GWAS_R11.wAlleles.FOR_PLINK.txt \\\n",
203+
"${WORK_DIR}/data/GP2_R11/AFR/GP2_AFR_GWAS_R11.wAlleles.FOR_PLINK.txt \\\n",
204+
"${WORK_DIR}/data/23andMe/23andMe_AAC.wAlleles.v2.FOR_PLINK.txt \\\n",
205+
"${WORK_DIR}/data/MVP/MVP_AAC.wAlleles.FOR_PLINK.txt \\\n",
206+
"+ logscale weighted-z study qt \\\n",
207+
"--out ${WORK_DIR}/results/META_GP2_23andMe_MVP/GP2_R11_AAC_AFR_23andMe_MVP_GWAS.wAlleles.PLINK_meta"
208+
]
209+
},
210+
{
211+
"cell_type": "markdown",
212+
"id": "59242c6f-ed79-4ab6-a8dd-af932b4175c3",
213+
"metadata": {},
214+
"source": [
215+
"Write out METAL command (here for documentation -- needs to be run interactively!)\n",
216+
"\n",
217+
"```bash\n",
218+
"module load metal \n",
219+
"metal << EOT \n",
220+
"\n",
221+
"# GENRAL OPTIONS\n",
222+
"SCHEME STDERR\n",
223+
"GENOMICCONTROL OFF\n",
224+
"AVERAGEFREQ ON\n",
225+
"MINMAXFREQ ON\n",
226+
"\n",
227+
"# === 1st: GP2 AAC R11 - munged ===\n",
228+
"MARKER SNP\n",
229+
"CHROMOSOMELABEL CHR\n",
230+
"POSITIONLABEL BP\n",
231+
"ALLELE A1 A2\n",
232+
"FREQ EAF\n",
233+
"EFFECT BETA\n",
234+
"STDERR SE\n",
235+
"PVALUE P\n",
236+
"WEIGHT NMISS\n",
237+
"PROCESS /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/data/GP2_R11/AAC/GP2_AAC_GWAS_R11.wAlleles.FOR_PLINK.txt\n",
238+
"\n",
239+
"# === 2nd: GP2 AFR R11 - munged ===\n",
240+
"MARKER SNP\n",
241+
"CHROMOSOMELABEL CHR\n",
242+
"POSITIONLABEL BP\n",
243+
"ALLELE A1 A2\n",
244+
"FREQ EAF\n",
245+
"EFFECT BETA\n",
246+
"STDERR SE\n",
247+
"PVALUE P\n",
248+
"WEIGHT NMISS \n",
249+
"PROCESS /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/data/GP2_R11/AFR/GP2_AFR_GWAS_R11.wAlleles.FOR_PLINK.txt\n",
250+
"\n",
251+
"# === 3rd: 23andMe AAC - munged ===\n",
252+
"MARKER SNP\n",
253+
"CHROMOSOMELABEL CHR\n",
254+
"POSITIONLABEL BP\n",
255+
"ALLELE A1 A2\n",
256+
"FREQ EAF\n",
257+
"EFFECT BETA\n",
258+
"STDERR SE\n",
259+
"PVALUE P\n",
260+
"WEIGHT NMISS \n",
261+
"PROCESS /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/data/23andMe/23andMe_AAC.wAlleles.v2.FOR_PLINK.txt\n",
262+
"\n",
263+
"# === 4th: MVP AAC - munged ===\n",
264+
"MARKER SNP\n",
265+
"CHROMOSOMELABEL CHR\n",
266+
"POSITIONLABEL BP\n",
267+
"ALLELE A1 A2\n",
268+
"FREQ EAF\n",
269+
"EFFECT BETA\n",
270+
"STDERR SE\n",
271+
"PVALUE P\n",
272+
"WEIGHT NMISS \n",
273+
"PROCESS /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/data/MVP/MVP_AAC.wAlleles.FOR_PLINK.txt\n",
274+
"\n",
275+
"OUTFILE /data/CARD_AA/projects/2025_2026_AFR_AAC_GWAS/results/META_GP2_23andMe_MVP/GP2_R11_AAC_AFR_23andMe_MVP_GWAS.wAlleles.METAL_meta .meta\n",
276+
"ANALYZE HETEROGENEITY\n",
277+
"QUIT\n",
278+
"```\n",
279+
"\n",
280+
"**Then Control+D to submit job!**"
281+
]
282+
}
283+
],
284+
"metadata": {
285+
"kernelspec": {
286+
"display_name": "python/3.10",
287+
"language": "python",
288+
"name": "py3.10"
289+
},
290+
"language_info": {
291+
"codemirror_mode": {
292+
"name": "ipython",
293+
"version": 3
294+
},
295+
"file_extension": ".py",
296+
"mimetype": "text/x-python",
297+
"name": "python",
298+
"nbconvert_exporter": "python",
299+
"pygments_lexer": "ipython3",
300+
"version": "3.10.8"
301+
}
302+
},
303+
"nbformat": 4,
304+
"nbformat_minor": 5
305+
}

0 commit comments

Comments
 (0)