|
77 | 77 | }, |
78 | 78 | { |
79 | 79 | "cell_type": "code", |
80 | | - "execution_count": 1, |
81 | | - "metadata": {}, |
82 | | - "outputs": [ |
83 | | - { |
84 | | - "name": "stdout", |
85 | | - "output_type": "stream", |
86 | | - "text": [ |
87 | | - "mkdir: cannot create directory ‘variants’: File exists\n" |
88 | | - ] |
89 | | - } |
90 | | - ], |
| 80 | + "execution_count": null, |
| 81 | + "metadata": {}, |
| 82 | + "outputs": [], |
91 | 83 | "source": [ |
92 | 84 | "!mkdir variants" |
93 | 85 | ] |
|
111 | 103 | }, |
112 | 104 | { |
113 | 105 | "cell_type": "code", |
114 | | - "execution_count": 2, |
| 106 | + "execution_count": null, |
115 | 107 | "metadata": {}, |
116 | 108 | "outputs": [], |
117 | 109 | "source": [ |
|
135 | 127 | }, |
136 | 128 | { |
137 | 129 | "cell_type": "code", |
138 | | - "execution_count": 3, |
139 | | - "metadata": {}, |
140 | | - "outputs": [ |
141 | | - { |
142 | | - "name": "stdout", |
143 | | - "output_type": "stream", |
144 | | - "text": [ |
145 | | - "[vg call]: You can restrict the search to GBZ haplotypes, often to the benefict of speed and accuracy, with the -z option\n" |
146 | | - ] |
147 | | - } |
148 | | - ], |
| 130 | + "execution_count": null, |
| 131 | + "metadata": {}, |
| 132 | + "outputs": [], |
149 | 133 | "source": [ |
150 | 134 | "!vg call -k alignments/yprp.chrVIII.pggb.mapped.pack -t 4 graphs/yprp.chrVIII.pggb.giraffe.gbz > variants/SK1xyprp.chrVIII.pggb.graph_calls.vcf" |
151 | 135 | ] |
|
196 | 180 | }, |
197 | 181 | { |
198 | 182 | "cell_type": "code", |
199 | | - "execution_count": 5, |
200 | | - "metadata": {}, |
201 | | - "outputs": [ |
202 | | - { |
203 | | - "name": "stdout", |
204 | | - "output_type": "stream", |
205 | | - "text": [ |
206 | | - "SN\t0\tnumber of samples:\t1\n", |
207 | | - "SN\t0\tnumber of records:\t4573\n", |
208 | | - "SN\t0\tnumber of no-ALTs:\t0\n", |
209 | | - "SN\t0\tnumber of SNPs:\t3997\n", |
210 | | - "SN\t0\tnumber of MNPs:\t186\n", |
211 | | - "SN\t0\tnumber of indels:\t358\n", |
212 | | - "SN\t0\tnumber of others:\t41\n", |
213 | | - "SN\t0\tnumber of multiallelic sites:\t32\n", |
214 | | - "SN\t0\tnumber of multiallelic SNP sites:\t6\n" |
215 | | - ] |
216 | | - } |
217 | | - ], |
| 183 | + "execution_count": null, |
| 184 | + "metadata": {}, |
| 185 | + "outputs": [], |
218 | 186 | "source": [ |
219 | 187 | "!bcftools stats variants/SK1xyprp.chrVIII.pggb.graph_calls.vcf | grep \"^SN\"" |
220 | 188 | ] |
|
228 | 196 | }, |
229 | 197 | { |
230 | 198 | "cell_type": "code", |
231 | | - "execution_count": 6, |
232 | | - "metadata": {}, |
233 | | - "outputs": [ |
234 | | - { |
235 | | - "data": { |
236 | | - "text/html": [ |
237 | | - "\n", |
238 | | - " <iframe\n", |
239 | | - " width=\"800\"\n", |
240 | | - " height=\"400\"\n", |
241 | | - " src=\"../html/flashcard_variants.html\"\n", |
242 | | - " frameborder=\"0\"\n", |
243 | | - " allowfullscreen\n", |
244 | | - " \n", |
245 | | - " ></iframe>\n", |
246 | | - " " |
247 | | - ], |
248 | | - "text/plain": [ |
249 | | - "<IPython.lib.display.IFrame at 0x7f14893989b0>" |
250 | | - ] |
251 | | - }, |
252 | | - "execution_count": 6, |
253 | | - "metadata": {}, |
254 | | - "output_type": "execute_result" |
255 | | - } |
256 | | - ], |
| 199 | + "execution_count": null, |
| 200 | + "metadata": {}, |
| 201 | + "outputs": [], |
257 | 202 | "source": [ |
258 | 203 | "from IPython.display import IFrame\n", |
259 | 204 | "IFrame('../html/flashcard_variants.html', width=800, height=400)" |
|
302 | 247 | }, |
303 | 248 | { |
304 | 249 | "cell_type": "code", |
305 | | - "execution_count": 8, |
| 250 | + "execution_count": null, |
306 | 251 | "metadata": {}, |
307 | 252 | "outputs": [], |
308 | 253 | "source": [ |
|
327 | 272 | }, |
328 | 273 | { |
329 | 274 | "cell_type": "code", |
330 | | - "execution_count": 9, |
| 275 | + "execution_count": null, |
331 | 276 | "metadata": {}, |
332 | 277 | "outputs": [], |
333 | 278 | "source": [ |
|
351 | 296 | }, |
352 | 297 | { |
353 | 298 | "cell_type": "code", |
354 | | - "execution_count": 10, |
| 299 | + "execution_count": null, |
355 | 300 | "metadata": {}, |
356 | 301 | "outputs": [], |
357 | 302 | "source": [ |
|
367 | 312 | }, |
368 | 313 | { |
369 | 314 | "cell_type": "code", |
370 | | - "execution_count": 11, |
| 315 | + "execution_count": null, |
371 | 316 | "metadata": {}, |
372 | 317 | "outputs": [], |
373 | 318 | "source": [ |
|
383 | 328 | }, |
384 | 329 | { |
385 | 330 | "cell_type": "code", |
386 | | - "execution_count": 12, |
| 331 | + "execution_count": null, |
387 | 332 | "metadata": {}, |
388 | 333 | "outputs": [], |
389 | 334 | "source": [ |
|
399 | 344 | }, |
400 | 345 | { |
401 | 346 | "cell_type": "code", |
402 | | - "execution_count": 13, |
403 | | - "metadata": {}, |
404 | | - "outputs": [ |
405 | | - { |
406 | | - "name": "stdout", |
407 | | - "output_type": "stream", |
408 | | - "text": [ |
409 | | - "SN\t0\tnumber of samples:\t1\n", |
410 | | - "SN\t0\tnumber of records:\t5401\n", |
411 | | - "SN\t0\tnumber of no-ALTs:\t0\n", |
412 | | - "SN\t0\tnumber of SNPs:\t4549\n", |
413 | | - "SN\t0\tnumber of MNPs:\t495\n", |
414 | | - "SN\t0\tnumber of indels:\t358\n", |
415 | | - "SN\t0\tnumber of others:\t108\n", |
416 | | - "SN\t0\tnumber of multiallelic sites:\t239\n", |
417 | | - "SN\t0\tnumber of multiallelic SNP sites:\t56\n" |
418 | | - ] |
419 | | - } |
420 | | - ], |
| 347 | + "execution_count": null, |
| 348 | + "metadata": {}, |
| 349 | + "outputs": [], |
421 | 350 | "source": [ |
422 | 351 | "!bcftools stats variants/SK1xyprp.chrVIII.pggb.aug_calls.vcf | grep \"^SN\"" |
423 | 352 | ] |
|
517 | 446 | }, |
518 | 447 | { |
519 | 448 | "cell_type": "code", |
520 | | - "execution_count": 14, |
521 | | - "metadata": {}, |
522 | | - "outputs": [ |
523 | | - { |
524 | | - "name": "stdout", |
525 | | - "output_type": "stream", |
526 | | - "text": [ |
527 | | - "SN\t0\tnumber of samples:\t1\n", |
528 | | - "SN\t0\tnumber of records:\t83627\n", |
529 | | - "SN\t0\tnumber of no-ALTs:\t0\n", |
530 | | - "SN\t0\tnumber of SNPs:\t73426\n", |
531 | | - "SN\t0\tnumber of MNPs:\t3126\n", |
532 | | - "SN\t0\tnumber of indels:\t6327\n", |
533 | | - "SN\t0\tnumber of others:\t880\n", |
534 | | - "SN\t0\tnumber of multiallelic sites:\t347\n", |
535 | | - "SN\t0\tnumber of multiallelic SNP sites:\t27\n" |
536 | | - ] |
537 | | - } |
538 | | - ], |
| 449 | + "execution_count": null, |
| 450 | + "metadata": {}, |
| 451 | + "outputs": [], |
539 | 452 | "source": [ |
540 | 453 | "!vg convert graphs/yprp.fullgenome.pggb.giraffe.gbz > graphs/yprp.fullgenome.pggb.giraffe.vg\n", |
541 | 454 | "\n", |
|
570 | 483 | }, |
571 | 484 | { |
572 | 485 | "cell_type": "code", |
573 | | - "execution_count": 1, |
| 486 | + "execution_count": null, |
574 | 487 | "metadata": {}, |
575 | 488 | "outputs": [], |
576 | 489 | "source": [ |
|
603 | 516 | }, |
604 | 517 | { |
605 | 518 | "cell_type": "code", |
606 | | - "execution_count": 2, |
| 519 | + "execution_count": null, |
607 | 520 | "metadata": {}, |
608 | 521 | "outputs": [], |
609 | 522 | "source": [ |
|
619 | 532 | }, |
620 | 533 | { |
621 | 534 | "cell_type": "code", |
622 | | - "execution_count": 3, |
623 | | - "metadata": {}, |
624 | | - "outputs": [ |
625 | | - { |
626 | | - "name": "stdout", |
627 | | - "output_type": "stream", |
628 | | - "text": [ |
629 | | - "SN\t0\tnumber of samples:\t2\n", |
630 | | - "SN\t0\tnumber of records:\t5979\n", |
631 | | - "SN\t0\tnumber of no-ALTs:\t0\n", |
632 | | - "SN\t0\tnumber of SNPs:\t5327\n", |
633 | | - "SN\t0\tnumber of MNPs:\t220\n", |
634 | | - "SN\t0\tnumber of indels:\t455\n", |
635 | | - "SN\t0\tnumber of others:\t47\n", |
636 | | - "SN\t0\tnumber of multiallelic sites:\t169\n", |
637 | | - "SN\t0\tnumber of multiallelic SNP sites:\t35\n" |
638 | | - ] |
639 | | - } |
640 | | - ], |
| 535 | + "execution_count": null, |
| 536 | + "metadata": {}, |
| 537 | + "outputs": [], |
641 | 538 | "source": [ |
642 | 539 | "!bcftools stats variants/yprp.chrVIII.pggb.S288Cpaths.deconstruct.vcf | grep \"^SN\"" |
643 | 540 | ] |
|
696 | 593 | }, |
697 | 594 | { |
698 | 595 | "cell_type": "code", |
699 | | - "execution_count": 5, |
| 596 | + "execution_count": null, |
700 | 597 | "metadata": {}, |
701 | 598 | "outputs": [], |
702 | 599 | "source": [ |
|
721 | 618 | }, |
722 | 619 | { |
723 | 620 | "cell_type": "code", |
724 | | - "execution_count": 6, |
725 | | - "metadata": {}, |
726 | | - "outputs": [ |
727 | | - { |
728 | | - "data": { |
729 | | - "text/html": [ |
730 | | - "\n", |
731 | | - " <iframe\n", |
732 | | - " width=\"800\"\n", |
733 | | - " height=\"400\"\n", |
734 | | - " src=\"../html/flashcard_viz.html\"\n", |
735 | | - " frameborder=\"0\"\n", |
736 | | - " allowfullscreen\n", |
737 | | - " \n", |
738 | | - " ></iframe>\n", |
739 | | - " " |
740 | | - ], |
741 | | - "text/plain": [ |
742 | | - "<IPython.lib.display.IFrame at 0x7f7e8d3dc0e0>" |
743 | | - ] |
744 | | - }, |
745 | | - "execution_count": 6, |
746 | | - "metadata": {}, |
747 | | - "output_type": "execute_result" |
748 | | - } |
749 | | - ], |
| 621 | + "execution_count": null, |
| 622 | + "metadata": {}, |
| 623 | + "outputs": [], |
750 | 624 | "source": [ |
751 | 625 | "from IPython.display import IFrame\n", |
752 | 626 | "IFrame('../html/flashcard_viz.html', width=800, height=400)" |
|
824 | 698 | } |
825 | 699 | ], |
826 | 700 | "metadata": { |
827 | | - "environment": { |
828 | | - "kernel": "conda-env-nigms-pangenomics-nigms-pangenomics", |
829 | | - "name": "workbench-notebooks.m129", |
830 | | - "type": "gcloud", |
831 | | - "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m129" |
832 | | - }, |
833 | 701 | "kernelspec": { |
834 | 702 | "display_name": "nigms-pangenomics (Local) (Local)", |
835 | 703 | "language": "python", |
|
0 commit comments