-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathkazusaIO.py
More file actions
51 lines (43 loc) · 1.53 KB
/
kazusaIO.py
File metadata and controls
51 lines (43 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from html.parser import HTMLParser
import urllib
import urllib.request
class KazusaHTMLParser(HTMLParser):
'''
HTML parser to extract the raw codon usage table from Kazusa HTML result
'''
def __init__(self):
HTMLParser.__init__(self)
self.CUTableIncoming = False
self.res = ""
def handle_starttag(self, tag, attrs):
if (tag=="pre"): # table is in a <pre>-tag
self.CUTableIncoming = True
def handle_data(self, data):
if self.CUTableIncoming: # save content of <pre>
self.res = data
self.CUTableIncoming = False
def getResult(self):
'''
return parsed CU table, only gives results != "" after parsing
'''
return self.res
def getCU(taxid, codeid=1):
'''
get codon usage in the form:
[codon][amino acid][relative frequency of codon] ...
one line per codon
'''
# construct URL for CU of species with given taxid and genetic code
req = "http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species="+str(taxid)+"&aa="+str(codeid)
res = urllib.request.urlopen(req).read().decode("utf-8") # get HTML
# parse HTML
p = KazusaHTMLParser()
p.feed(str(res))
rstr = p.getResult().replace(") ", ")\n") # one codon per line
#remove empty lines
rlines = rstr.split("\n")
rlines2 = list()
for i in range(0, len(rlines)):
if rlines[i] != "":
rlines2.append(rlines[i])
return "\n".join(rlines2)