-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcorr.py
More file actions
52 lines (40 loc) · 1.43 KB
/
corr.py
File metadata and controls
52 lines (40 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from collections import defaultdict
from typing import Iterable
import typer
from rosalind import parse_fasta, reverse_complement
def hamming(s1: str, s2: str) -> int:
d = 0
for c1, c2 in zip(s1, s2):
if c1 != c2:
d += 1
return d
def build_distance_mapping(rna1: str, rnas: Iterable[str]) -> dict[int, list[str]]:
result = defaultdict(list)
for rna2 in rnas:
d = hamming(rna1, rna2)
result[d].append(rna2)
return result
def build_errors(rnas: list[str]) -> Iterable[str]:
for rna in rnas:
dm = build_distance_mapping(rna, rnas)
rev_comps = [reverse_complement(rna2) for rna2 in rnas]
dmrc = build_distance_mapping(rna, rev_comps)
if len(dm[0]) + len(dmrc[0]) < 2:
yield rna
def main(filename: str):
with open(filename) as f:
rnas = list(parse_fasta(f))
errors = list(build_errors(rnas))
correct = set(rnas) - set(errors)
for error in errors:
dm = build_distance_mapping(error, correct)
rev_comps = [reverse_complement(rna2) for rna2 in correct]
dmrc = build_distance_mapping(error, rev_comps)
# Found a match - correctly sequenced
if len(dm[1]) >= 1:
print(f"{error}->{dm[1][0]}")
continue
if len(dmrc[1]) >= 1:
print(f"{error}->{dmrc[1][0]}")
if __name__ == "__main__":
typer.run(main)