Skip to content

Commit 3286b29

Browse files
committed
reimplementation
big changes, GUI added
1 parent e31a240 commit 3286b29

20 files changed

Lines changed: 67347 additions & 132 deletions

CUTable.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
'''
2+
Created on 08.02.2015
3+
4+
@author: david
5+
'''
6+
7+
from Bio.Data.CodonTable import standard_dna_table
8+
from Bio.Seq import Seq
9+
10+
class CUTable(object):
11+
'''
12+
classdocs
13+
'''
14+
15+
16+
def __init__(self, SPSUM_LABEL, spsum, code=standard_dna_table):
17+
'''
18+
Constructor
19+
SPSUM_LABEL is a string of whitespace separated codons
20+
spsusm is a string of whitespace separated numbers(frequencies)
21+
as present in the .spsum files of Kazusa
22+
Code is a Biopython codon-table object (TODO?: only standard at the moment)
23+
'''
24+
25+
# codon >> aa map
26+
self.codon2AA = dict()
27+
# aa >> (codon, freq) list
28+
self.aa2CodonAndFreq = dict()
29+
30+
labels = SPSUM_LABEL.split(" ")
31+
freqs = spsum.split(" ")
32+
33+
for i in range(len(labels)):
34+
## RNA labels to DNA labels
35+
lt = Seq(labels[i]).back_transcribe()
36+
labels[i] = str(lt)
37+
38+
## convert frequencies to ints
39+
freqs[i] = int(freqs[i])
40+
41+
#print(code.forward_table)
42+
43+
for i in range(len(labels)):
44+
if labels[i] in code.stop_codons:
45+
self.codon2AA[labels[i]] = 'STOP'
46+
else:
47+
self.codon2AA[labels[i]] = code.forward_table[labels[i]]
48+
49+
tAA = self.codon2AA[labels[i]]
50+
if not self.aa2CodonAndFreq.get(tAA):
51+
self.aa2CodonAndFreq[tAA] = list()
52+
53+
self.aa2CodonAndFreq[tAA].append((labels[i], freqs[i]))
54+
55+
#print(self.codon2AA)
56+
#print(self.aa2CodonAndFreq)
57+
58+
def print(self):
59+
'''
60+
print a (relatively) readable vesion of the CUTable to stdout
61+
'''
62+
for k, v in self.aa2CodonAndFreq.items():
63+
print(k + ":")
64+
for cd, fr in self.getCodonsForAA(k):
65+
print("\t" + cd + ": " + str(fr))
66+
67+
def getAAForCodon(self, codon):
68+
'''
69+
get the AA corresponding to codon
70+
'''
71+
return self.codon2AA[codon]
72+
73+
def getCodonRelativeUsage(self, codon):
74+
aa = self.codon2AA[codon]
75+
for cd, fr in self.getCodonsForAARelative(aa):
76+
if cd == codon:
77+
return fr
78+
79+
def getCodonUsage(self, codon):
80+
aa = self.codon2AA[codon]
81+
for cd, fr in self.getCodonsForAA(aa):
82+
if cd == codon:
83+
return fr
84+
85+
def getCodonsForAA(self, aa):
86+
'''
87+
get a list of (codon, rel. usage) for all codons coding for aa
88+
'''
89+
res = list()
90+
sumFr = 0
91+
for cd, fr in self.aa2CodonAndFreq[aa]:
92+
sumFr += fr
93+
94+
for cd, fr in self.aa2CodonAndFreq[aa]:
95+
res.append((cd, fr/sumFr))
96+
97+
return res
98+
99+
def getCodonsForAARelative(self, aa):
100+
'''
101+
get a list of (codon, rel. usage %of max) for all codons coding for aa
102+
'''
103+
res = list()
104+
maxFr = 0
105+
for cd, fr in self.aa2CodonAndFreq[aa]:
106+
if fr > maxFr:
107+
maxFr = fr
108+
109+
for cd, fr in self.aa2CodonAndFreq[aa]:
110+
res.append((cd, fr/maxFr))
111+
112+
return res

KazusaSPSUMHandler.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
'''
2+
Created on 07.02.2015
3+
4+
@author: david
5+
'''
6+
7+
from os import listdir
8+
from os.path import join
9+
10+
from CUTable import CUTable
11+
12+
class KazusaSPSUMHandler(object):
13+
'''
14+
classdocs
15+
'''
16+
17+
18+
def __init__(self, path=""):
19+
'''
20+
Constructor
21+
'''
22+
23+
self.taxidToCodonUsage = dict()
24+
self.descToTaxidAndNCDS = dict()
25+
26+
if not path:
27+
pass
28+
else:
29+
self.loadSPSUMFromPath(path)
30+
31+
32+
def loadSPSUMFromPath(self, path):
33+
'''
34+
35+
'''
36+
37+
files = listdir(path)
38+
39+
for f in files:
40+
if f.endswith(".spsum"):
41+
self.readSPSUMFile(join(path, f))
42+
elif f == "SPSUM_LABEL":
43+
self.readSPSUM_LABEL(join(path, f))
44+
45+
46+
def readSPSUMFile(self, file):
47+
fd = open(file)
48+
49+
while True:
50+
51+
descLine = fd.readline()
52+
if not descLine: break
53+
54+
cu = fd.readline()
55+
56+
# split into taxid:desc:nrCDS
57+
taxid = descLine.split(":", 1)[0].strip()
58+
desc = descLine.split(":", 1)[1].strip()
59+
ncds = desc.rsplit(":", 1)[1].strip()
60+
desc = desc.rsplit(":", 1)[0].strip()
61+
62+
self.taxidToCodonUsage[taxid] = cu.strip()
63+
self.descToTaxidAndNCDS[desc] = (taxid, ncds)
64+
65+
fd.close()
66+
67+
def readSPSUM_LABEL(self, file):
68+
69+
fd = open(file)
70+
fd.readline() # omit first line
71+
72+
self.SPSUM_LABEL = fd.readline().strip()
73+
74+
fd.close()
75+
76+
77+
def search(self, query):
78+
'''
79+
'''
80+
81+
res = dict()
82+
83+
for desc, taxidNCS in self.descToTaxidAndNCDS.items():
84+
if desc.find(query) != -1:
85+
res[desc] = taxidNCS
86+
87+
return res
88+
89+
def getCUTable(self, taxid):
90+
'''
91+
'''
92+
res = CUTable(self.SPSUM_LABEL, self.taxidToCodonUsage[taxid])
93+
return res
94+
95+

OptimizerApp.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
'''
2+
Created on 14.02.2015
3+
4+
@author: David
5+
'''
6+
from KazusaSPSUMHandler import KazusaSPSUMHandler
7+
from codonoptimizer import *
8+
from Bio.Restriction.Restriction_Dictionary import rest_dict
9+
from Bio.Seq import Seq
10+
11+
import configparser
12+
13+
class OptimizerApp:
14+
'''
15+
classdocs
16+
'''
17+
18+
19+
def __init__(self, config=None):
20+
'''
21+
Constructor
22+
'''
23+
self.restrictionEnzymeList = list()
24+
self.speciesList = list()
25+
self.SPSUMHandler = KazusaSPSUMHandler("res")
26+
27+
if config:
28+
self.loadConfig(config)
29+
30+
self.possibleOptimizationStrategies = ["Fastest Codons", "Adapt Speed To Source"]
31+
self.optimizer = None
32+
33+
# self.speciesList.append(("1234", "Testus specius"))
34+
35+
self.sourceSequence = ""
36+
self.optimizedSequence = ""
37+
38+
def setOptimizer(self, sourceTaxid, targetTaxid, strategy):
39+
40+
if not strategy in self.possibleOptimizationStrategies:
41+
return
42+
43+
if strategy == "Fastest Codons":
44+
self.optimizer = MostFrequentCodonOptimizer(self.SPSUMHandler.getCUTable(sourceTaxid), self.SPSUMHandler.getCUTable(targetTaxid))
45+
elif strategy == "Adapt Speed To Source":
46+
self.optimizer = AdaptingCodonOptimizer(self.SPSUMHandler.getCUTable(sourceTaxid), self.SPSUMHandler.getCUTable(targetTaxid))
47+
else:
48+
return
49+
50+
def setSourceSeq(self, seq):
51+
self.sourceSequence = seq.upper()
52+
53+
def setOptimizedSeq(self, seq):
54+
self.optimizedSequence = seq.upper()
55+
56+
def runOptimization(self):
57+
self.optimizedSequence = self.optimizer.getBestSequence(self.sourceSequence)
58+
# print("optimized to: " + self.optimizedSequence)
59+
assert Seq(self.sourceSequence).translate() == Seq(self.optimizedSequence).translate()
60+
61+
def runRestricionRemoval(self):
62+
restrictionSequences = list()
63+
for r in self.restrictionEnzymeList:
64+
restrictionSequences.append(rest_dict[r]['site'])
65+
self.optimizedSequence = self.optimizer.removeRestrictionSites(self.sourceSequence, self.optimizedSequence, restrictionSequences)
66+
assert Seq(self.sourceSequence).translate() == Seq(self.optimizedSequence).translate()
67+
68+
def getCodonsForPrint(self, source=True):
69+
if self.optimizer:
70+
restrictionSequences = list()
71+
for r in self.restrictionEnzymeList:
72+
restrictionSequences.append(rest_dict[r]['site'])
73+
74+
if source:
75+
return self.optimizer.SequenceToPrint(self.sourceSequence, restrictionSequences, source)
76+
else:
77+
return self.optimizer.SequenceToPrint(self.optimizedSequence, restrictionSequences, source)
78+
79+
else:
80+
return None
81+
82+
def testPrint(self):
83+
print(self.optimizer)
84+
print(self.sourceSequence)
85+
86+
def saveConfig(self, path):
87+
cp = configparser.ConfigParser()
88+
taxids = list()
89+
names = list()
90+
for t, s in self.speciesList:
91+
taxids.append(t)
92+
names.append(s)
93+
restrictionEnzymes = list()
94+
for r in self.restrictionEnzymeList:
95+
restrictionEnzymes.append(r)
96+
97+
cp['config'] = {"speciesTaxids" : ",".join(taxids), "speciesNames" : ",".join(names), "restrictionEnzymes" :",".join(restrictionEnzymes)}
98+
with open(path, 'w') as configfile:
99+
# print("writing config")
100+
cp.write(configfile)
101+
102+
def loadConfig(self, path):
103+
cp = configparser.ConfigParser()
104+
cp.read(path)
105+
106+
taxids = list()
107+
for t in cp["config"]["speciesTaxids"].split(","):
108+
if t:
109+
taxids.append(t)
110+
111+
names = list()
112+
for n in cp["config"]["speciesNames"].split(","):
113+
if n:
114+
names.append(n)
115+
116+
restrictionEnzymes = list()
117+
for r in cp["config"]["restrictionEnzymes"].split(","):
118+
if r:
119+
restrictionEnzymes.append(r)
120+
121+
for i in range(len(taxids)):
122+
self.speciesList.append((taxids[i], names[i]))
123+
124+
self.restrictionEnzymeList = restrictionEnzymes
125+
126+
def setTest(self):
127+
self.sourceSequence = "ATGC"
128+

OptimizerMain.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
'''
2+
Created on 14.02.2015
3+
4+
@author: David
5+
'''
6+
from OptimizerApp import OptimizerApp
7+
from OptimizerMainWindow import OptimizerMainWindow
8+
import SeqUtils
9+
10+
11+
if __name__ == '__main__':
12+
13+
# print(SeqUtils.getRemainderSuffix("AAAACCA"))
14+
15+
myOptimizer = OptimizerApp("config.ini")
16+
myOptimizerGUI = OptimizerMainWindow(myOptimizer)
17+
# gui = Tk()
18+
# gui.mainloop()
19+
# myOptimizer.testPrint()

0 commit comments

Comments
 (0)