-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreverse_complement_generation.py
More file actions
65 lines (55 loc) · 2.48 KB
/
reverse_complement_generation.py
File metadata and controls
65 lines (55 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from typing import Dict
class InvalidNucleotideError(Exception):
"""Raised when an invalid nucleotide is encountered."""
pass
#Complement Mapping (IUPAC codes)
complement_map= {
#standard bases
'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G',
'a': 't', 't': 'a', 'g': 'c', 'c': 'g',
# Degenerate Nucleotides
'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W', 'K': 'M', 'M': 'K',
'H': 'D', 'D': 'H', 'V': 'B', 'B': 'V', 'N': 'N', 'X': 'X',
'r': 'y', 'y': 'r', 's': 's', 'w': 'w', 'k': 'm', 'm': 'k',
'h': 'd', 'd': 'h', 'v': 'b', 'b': 'v', 'n': 'n', 'x': 'x'
}
valid_iupac_nucleotides= set(complement_map.keys())
def generate_reverse_complement(sequence: str,
orientation: str = "5'-3'",
case_sensitive: bool = False
)->str:
"""
Builds robust reverse complement functionality.
Args:
sequence: The input DNA sequence string.
orientation: The desired output orientation.
Options: "5'-3'" (default, the standard result) or
"3'-5'" (the complement before reversal).
case_sensitive: If True, maintains the original case of the output bases
(useful for masked sequences). Default is False (uppercase).
Returns:
The reverse complement sequence string in the specified orientation.
Raises:
InvalidNucleotideError: If the sequence contains non-IUPAC characters.
ValueError: If an invalid orientation is provided.
"""
if not case_sensitive:
working_sequence = sequence.upper()
complement_map_used={k.upper():v.upper() for k, v in complement_map.items()}
else:
working_sequence = sequence
complement_map_used= complement_map
invalid_characters= set(working_sequence) - set(complement_map_used.keys())
if invalid_characters:
raise InvalidNucleotideError(
f"Invalid nucleotides present: {",".join(invalid_characters)}"
)
translation_table= str.maketrans(complement_map_used)
complement= working_sequence.translate(translation_table)
orientation= orientation.strip().lower()
if orientation == "5'-3'":
return complement[::-1]
elif orientation == "3'-5'":
return complement
else:
raise ValueError(f"Invalid orientation:{orientation}")