Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions examples/msprime/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# msprime Examples

Each sub-directory contains a self-contained example. The order in
which the examples are to appear is specified in `order.json` (an
array of directory names in the expected order).

In each example directory you'll find:

* `config.toml` - must conform to the specification outlined here:
https://docs.pyscript.net/latest/user-guide/configuration/ This is
parsed and ultimately turned into a JSON representation as part of
the package's API object.
* `setup.py` - Python code for contextual and environmental setup,
NOT SEEN BY THE END USER, but is run before the `code.py` code is
evaluated. Allows us to create useful (IPython) shims, avoid
repeating boilerplate and whatnot.
* `code.py` - the actual code added to the editor which forms the
practical example of using the package.
49 changes: 49 additions & 0 deletions examples/msprime/ancestry_basics/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
A first look at msprime: simulating the genealogy of a sample.

msprime simulates the ancestral history of a sample of chromosomes
drawn from a population. The result is a "tree sequence" -- a compact
representation of all the genealogical trees along a chromosome.

Docs: https://tskit.dev/msprime/docs/stable/
"""
from IPython.core.display import display, HTML

import numpy as np
import matplotlib.pyplot as plt
import msprime


heading("A small coalescent simulation")
note(
"We sample 6 diploid individuals (so 12 chromosomes) from a "
"population of effective size 10,000 and simulate their shared "
"ancestry back to a common ancestor."
)

ancestry = msprime.sim_ancestry(
samples=6,
population_size=10_000,
random_seed=42,
)

note(f"Tree sequence summary:")
display(HTML(f"<pre>{ancestry}</pre>"), append=True)

# With no recombination there is a single tree spanning the whole
# sequence. Pull it out and look at it.
tree = ancestry.first()
note(
f"Number of trees: <strong>{ancestry.num_trees}</strong>. "
f"Time to most recent common ancestor (TMRCA): "
f"<strong>{tree.tmrca(0, 1):.0f}</strong> generations "
f"(for samples 0 and 1)."
)

heading("The genealogy as text")
note(
"Tskit can render trees as ASCII art. Leaves are present-day "
"samples; internal nodes are inferred ancestors with their "
"ages in generations."
)
display(HTML(f"<pre>{tree.draw_text()}</pre>"), append=True)
1 change: 1 addition & 0 deletions examples/msprime/ancestry_basics/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["msprime", "matplotlib", "numpy"]
40 changes: 40 additions & 0 deletions examples/msprime/ancestry_basics/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Shim IPython's display API onto PyScript so example code written in a
Jupyter/IPython idiom runs unmodified in the browser.
"""

import sys
import types
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(
*args, **kwargs, target=__pyscript_display_target__,
)


ipython = types.ModuleType("IPython")
core = types.ModuleType("IPython.core")
core_display = types.ModuleType("IPython.core.display")
core_display.display = display
core_display.HTML = HTML
ipython.core = core
core.display = core_display
ipython.get_ipython = lambda: None
ipython.display = core_display
sys.modules["IPython"] = ipython
sys.modules["IPython.core"] = core
sys.modules["IPython.core.display"] = core_display
sys.modules["IPython.display"] = core_display


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)
77 changes: 77 additions & 0 deletions examples/msprime/demography_two_populations/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# ---------------------------------------------------------------------
# Building a demographic model: two populations that split in the past.
# ---------------------------------------------------------------------

heading("Two populations diverging from a common ancestor")
note(
"We define a Demography with two present-day populations, A and B, "
"that split from an ancestral population ANC 2,000 generations ago. "
"Then we sample 20 diploids from each and compare their genetic "
"diversity (pi) and divergence (Fst)."
)

demography = msprime.Demography()
demography.add_population(name="A", initial_size=10_000)
demography.add_population(name="B", initial_size=5_000)
demography.add_population(name="ANC", initial_size=10_000)
demography.add_population_split(
time=2_000, derived=["A", "B"], ancestral="ANC",
)

note("Population table from the demography:")
display(HTML(f"<pre>{demography.debug()}</pre>"), append=True)

ancestry = msprime.sim_ancestry(
samples={"A": 20, "B": 20},
demography=demography,
sequence_length=2_000_000,
recombination_rate=1e-8,
random_seed=99,
)
mutated = msprime.sim_mutations(ancestry, rate=1e-8, random_seed=99)

# Sample sets, expressed as lists of sample node IDs per population.
samples_A = mutated.samples(population=0)
samples_B = mutated.samples(population=1)

# Diversity within each population, and Fst between them.
pi_A = mutated.diversity(sample_sets=samples_A)
pi_B = mutated.diversity(sample_sets=samples_B)
fst = mutated.Fst(sample_sets=[samples_A, samples_B])

note(
f"Variant sites: <strong>{mutated.num_sites:,}</strong>. "
f"Population A pi = <strong>{pi_A:.5f}</strong>, "
f"Population B pi = <strong>{pi_B:.5f}</strong>, "
f"Fst(A, B) = <strong>{fst:.4f}</strong>."
)

heading("Allele frequency spectrum, per population")
note(
"The site frequency spectrum (SFS) counts variants by how many "
"samples carry the derived allele. Population A has more "
"diversity, so its spectrum extends further to the right."
)

afs_A = mutated.allele_frequency_spectrum(
sample_sets=[samples_A], polarised=True, span_normalise=False,
)
afs_B = mutated.allele_frequency_spectrum(
sample_sets=[samples_B], polarised=True, span_normalise=False,
)

# Drop the monomorphic (0 and n) bins for plotting.
freqs_A = afs_A[1:-1]
freqs_B = afs_B[1:-1]
x = np.arange(1, len(freqs_A) + 1)

fig, ax = plt.subplots(figsize=(9, 4))
width = 0.4
ax.bar(x - width / 2, freqs_A, width, color="steelblue", label="A")
ax.bar(x + width / 2, freqs_B, width, color="indianred", label="B")
ax.set_title("Site frequency spectrum")
ax.set_xlabel("Derived allele count in sample")
ax.set_ylabel("Number of variant sites")
ax.legend()
fig.tight_layout()
display(fig, append=True)
1 change: 1 addition & 0 deletions examples/msprime/demography_two_populations/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["msprime", "matplotlib", "numpy"]
24 changes: 24 additions & 0 deletions examples/msprime/demography_two_populations/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Lighter setup for later cells: same names, no IPython shim."""
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(
*args, **kwargs, target=__pyscript_display_target__,
)


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)


import numpy as np
import matplotlib.pyplot as plt
import msprime
5 changes: 5 additions & 0 deletions examples/msprime/order.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
"ancestry_basics",
"recombination_and_mutations",
"demography_two_populations"
]
70 changes: 70 additions & 0 deletions examples/msprime/recombination_and_mutations/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# ---------------------------------------------------------------------
# Adding recombination and layering mutations onto the genealogy.
# ---------------------------------------------------------------------

heading("Recombination breaks the chromosome into pieces")
note(
"With recombination, different segments of the chromosome have "
"different genealogical trees. We simulate a 1 Mb chromosome "
"for 10 diploid samples."
)

ancestry = msprime.sim_ancestry(
samples=10,
population_size=10_000,
sequence_length=1_000_000,
recombination_rate=1e-8,
random_seed=7,
)

note(
f"Trees along the 1 Mb chromosome: "
f"<strong>{ancestry.num_trees}</strong>."
)

# Each tree spans an interval. Show the first few.
spans = []
for tree in ancestry.trees():
spans.append((tree.interval.left, tree.interval.right, tree.num_edges))
if len(spans) >= 5:
break

note("First five trees (interval and edge count):")
rows = "".join(
f"<tr><td>{l:,.0f}</td><td>{r:,.0f}</td><td>{e}</td></tr>"
for l, r, e in spans
)
display(HTML(
"<table border='1' cellpadding='4'>"
"<tr><th>left</th><th>right</th><th>edges</th></tr>"
f"{rows}</table>"
), append=True)

heading("Sprinkle mutations onto the genealogy")
note(
"sim_mutations adds neutral mutations along the branches at a "
"given per-site, per-generation rate. The result is genetic "
"variation we can analyze."
)

mutated = msprime.sim_mutations(
ancestry,
rate=1e-8,
random_seed=7,
)

note(
f"Variant sites generated: "
f"<strong>{mutated.num_sites}</strong>."
)

# Plot the distribution of variant positions along the chromosome.
positions = np.array([site.position for site in mutated.sites()])

fig, ax = plt.subplots(figsize=(9, 3))
ax.hist(positions, bins=40, color="seagreen", edgecolor="white")
ax.set_title("Distribution of mutations along the 1 Mb chromosome")
ax.set_xlabel("Position (bp)")
ax.set_ylabel("Number of variant sites")
fig.tight_layout()
display(fig, append=True)
1 change: 1 addition & 0 deletions examples/msprime/recombination_and_mutations/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["msprime", "matplotlib", "numpy"]
24 changes: 24 additions & 0 deletions examples/msprime/recombination_and_mutations/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Lighter setup for later cells: same names, no IPython shim."""
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(
*args, **kwargs, target=__pyscript_display_target__,
)


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)


import numpy as np
import matplotlib.pyplot as plt
import msprime