Skip to content

Commit 9fecb38

Browse files
committed
Updating code to prevent multi-allelic variants
1 parent 68a7126 commit 9fecb38

7 files changed

Lines changed: 31 additions & 28 deletions

File tree

neat/read_simulator/utils/generate_variants.py

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ def generate_variants(
208208

209209
# Begin random mutations for this slice
210210
# Note that any new variant types will need code in this area to handle the functions.
211-
debug = 0
212211
while variants_to_add_in_slice > 0:
213212
# We decrement now because we don't want to get stuck in a never ending loop
214213
variants_to_add_in_slice -= 1
@@ -256,34 +255,25 @@ def generate_variants(
256255
# pick which ploid is mutated
257256
temp_variant.genotype = pick_ploids(options.ploidy, mutation_model.homozygous_freq, 1, options.rng)
258257

259-
# There shouldn't be a ton of overlapping variants, but this is to handle those.
260258
if location in return_variants:
261-
"""
262-
If the location already exists, then we'll need to force it to pick a ploid
263-
that currently doesn't have a variant. This overrides the default genotype
264-
variable created above, but it shouldn't happen very often.
265-
"""
266-
if return_variants.find_dups(temp_variant):
267-
# This compiles all the variants at this location, giving a 1 for every ploid that has a variant.
268-
composite_genotype = return_variants.compile_genotypes_for_location(location)
269-
if 0 not in composite_genotype:
270-
# Here's a counter to make sure we're not getting stuck on a single location
271-
debug += 1
272-
if debug > 1000000:
273-
_LOG.error("Check this if, as it may be causing an infinite loop.")
274-
sys.exit(999)
275-
# No suitable place to put this, so we skip.
276-
continue
277-
# This sets up a probability array with weights 1 for open spots (x==0) and 0 elsewhere
278-
probs = np.array([1 if x == 0 else 0 for x in composite_genotype])
279-
probs = probs / sum(probs)
280-
# Pick an index of a position to mutate based on the probabilities, which are uniform for 0s left
281-
# in the composite genotype
282-
ploid = options.rng.choice(list(range(len(composite_genotype))), p=probs)
283-
genotype = np.zeros(options.ploidy)
284-
genotype[ploid] = 1
285-
temp_variant.genotype = genotype
286-
# pdb.set_trace()
259+
existing_variants = return_variants.contig_variants[location]
260+
sv_involved = temp_variant.is_structural or any(
261+
v.is_structural for v in existing_variants
262+
)
263+
if not sv_involved:
264+
# Two independent point mutations at the same anchor occur with
265+
# probability p² — effectively impossible at realistic rates. Skip.
266+
continue
267+
# SV compound-het: assign to a free ploid on the other haplotype.
268+
composite_genotype = return_variants.compile_genotypes_for_location(location)
269+
if 0 not in composite_genotype:
270+
continue
271+
probs = np.array([1 if x == 0 else 0 for x in composite_genotype])
272+
probs = probs / sum(probs)
273+
ploid = options.rng.choice(list(range(len(composite_genotype))), p=probs)
274+
genotype = np.zeros(options.ploidy)
275+
genotype[ploid] = 1
276+
temp_variant.genotype = genotype
287277
# Make sure this new variant doesn't overlap an existing insertion or deletion
288278
in_deletion = return_variants.check_if_del(temp_variant)
289279
in_insertion = return_variants.check_if_ins(temp_variant)

neat/variants/base_variant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ class BaseVariant(abc.ABC):
1414
A template for a variant to include in NEAT
1515
"""
1616

17+
is_structural: bool = False
18+
1719
@abc.abstractmethod
1820
def __init__(self, position1, length, position2, orientation, alt, genotype, qual_score, is_input, **kwargs):
1921
"""

neat/variants/copy_number_variant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ class CopyNumberVariant(BaseVariant):
1515
:param position1: location of the variant.
1616
"""
1717

18+
is_structural = True
19+
1820
def __init__(self,
1921
position1: int,
2022
length: int = None,

neat/variants/duplication.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class Duplication(BaseVariant):
2626
:param kwargs: can be used to store data from input vars or unused variables from the base class.
2727
"""
2828

29+
is_structural = True
30+
2931
def __init__(self,
3032
position1: int,
3133
length: int,

neat/variants/inversion.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ class Inversion(BaseVariant):
2020
:param is_input: True if from an input vcf, in which case this variant will get priority.
2121
:param kwargs: can be used to store data from input vars or unused variables from the base class.
2222
"""
23+
24+
is_structural = True
25+
2326
def __init__(self,
2427
position1: int,
2528
length: int,

neat/variants/translocation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ class Translocation(BaseVariant):
1616
:param kwargs: can be used to store data from input vars.
1717
"""
1818

19+
is_structural = True
20+
1921
def __init__(self,
2022
position1: int,
2123
length: int,

neat/variants/transposition.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ class Transposition(BaseVariant):
2020
:param kwargs: can be used to store data from input vars or unused variables from the base class.
2121
"""
2222

23+
is_structural = True
24+
2325
def __init__(self,
2426
position1: int,
2527
length: int,

0 commit comments

Comments
 (0)