-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdna_string_manipulation.py
More file actions
52 lines (44 loc) · 1.64 KB
/
Copy pathdna_string_manipulation.py
File metadata and controls
52 lines (44 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from typing import List, Optional
valid_nucleotides = {"A","T","G","C","a","t","c","g"}
def to_uppercase(sequence:str)->str:
"""
Converts all nucleotides to upper case
eg- "atcg" -> "ATCG
"""
return sequence.upper()
def to_lowercase(sequence:str)->str:
""""
Converts all nucleotides to lower case
eg- "atcg" -> "ATCG
"""
return sequence.lower()
def remove_non_nucleotides(sequence:str,valid_characters:set= valid_nucleotides)->str:
""""
Removes non-nucleotide characters from sequence
eg- "ATTCG!XBYTTGC" -> "ATTCGTTGC"
"""
cleaned_sequence="".join(char for char in sequence if char in valid_characters)
return cleaned_sequence
def split_into_codons(sequence:str, reading_frame:int=1)->List[str]:
""""
Splits sequence into codons (groups of 3 nucleotides)
Reading frame determines where to start (1= index 0, 2= index 1, 3= index 2)
Any leftover nucleotides (<3 bases) will be ignored
"""
sequence = to_uppercase(sequence)
start_index= reading_frame-1
if not 0<= start_index< 3:
raise ValueError('start_index must be between 1 and 3')
codons=[]
for i in range (start_index , len(sequence), 3):
codon= sequence[i:i+3]
if(len(codon)==3):
codons.append(codon)
return codons
def merge_dna_fragments(fragments:List[str], separator:Optional[str]=" ") -> str:
""""
Merges a list of DNA fragments into a single sequence
Optionally insert a separator between fragments
"""
cleaned_fragments= [str(f) for f in fragments]
return separator.join(cleaned_fragments)