-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathprepare_pdb_feature1cd8ss8sa5angle.py
More file actions
122 lines (104 loc) · 3.9 KB
/
prepare_pdb_feature1cd8ss8sa5angle.py
File metadata and controls
122 lines (104 loc) · 3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#####################################################################################################################
# Date created : 10/21/2022
# Date modified: 10/21/2022
# Purpose : This program is to crate features from pdb (dssp) complementary ss, sa, angle features and relative spetial position of a residue
#####################################################################################################################
import os,sys
import optparse
import numpy as np
from atomic_feature import get_residue_area_volume
import math
parser=optparse.OptionParser()
parser.add_option('--dssp_feat', dest='dssp_feat',
default= '', #default empty!
help= 'name of dssp feature file')
parser.add_option('--o', dest='o',
default= '', #default empth
help= 'name of output feature file npz format')
(options,args) = parser.parse_args()
dssp_feat = options.dssp_feat
o = options.o
fd = open(dssp_feat, 'r')
fdlines = fd.readlines()
length = int(fdlines[-1].split()[0])
feature_list = [[0 for _ in range(22)] for _ in range(length)] # 1D feature dimension is set 28, may change
dssp_list = [[0 for _ in range(22)] for _ in range(length)]
def ss_one_hot8(ss):
if(ss == 'H'):
return 1, 0, 0, 0, 0, 0, 0, 0
elif(ss == 'G'):
return 0, 1, 0, 0, 0, 0, 0, 0
elif(ss == 'I'):
return 0, 0, 1, 0, 0, 0, 0, 0
elif(ss == 'E'):
return 0, 0, 0, 1, 0, 0, 0, 0
elif(ss == 'B'):
return 0, 0, 0, 0, 1, 0, 0, 0
elif(ss == 'T'):
return 0, 0, 0, 0, 0, 1, 0, 0
elif(ss == 'S'):
return 0, 0, 0, 0, 0, 0, 1, 0
else:
return 0, 0, 0, 0, 0, 0, 0, 1
def sa_one_hot8(sa):
if(float(sa) < 30):
return 1, 0, 0, 0, 0, 0, 0, 0
elif(float(sa) < 60):
return 0, 1, 0, 0, 0, 0, 0, 0
elif(float(sa) < 90):
return 0, 0, 1, 0, 0, 0, 0, 0
elif(float(sa) < 120):
return 0, 0, 0, 1, 0, 0, 0, 0
elif(float(sa) < 150):
return 0, 0, 0, 0, 1, 0, 0, 0
elif(float(sa) < 180):
return 0, 0, 0, 0, 0, 1, 0, 0
elif(float(sa) < 210):
return 0, 0, 0, 0, 0, 0, 1, 0
else:
return 0, 0, 0, 0, 0, 0, 0, 1
def dist(x1, y1, z1, x2, y2, z2):
return math.sqrt((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2)
def centroid(pos):
cx = 0
cy = 0
cz = 0
for i in range(len(pos)):
cx += pos[i][0]
cy += pos[i][1]
cz += pos[i][2]
cx /= len(pos)
cy /= len(pos)
cz /= len(pos)
return cx, cy, cz
pos = []
for line in fdlines[1:]:
line = line.strip().split()
x = float(line[9])
y = float(line[10])
z = float(line[11])
pos.append([x, y, z])
#print(pos)
cx, cy, cz = centroid(pos)
for line in fdlines[1:]:
line = line.strip().split()
x = float(line[9])
y = float(line[10])
z = float(line[11])
d = dist(x, y, z, cx, cy, cz)
d = 1/d
#print(d)
dssp_list[int(line[0]) - 1][0] = d #for d
dssp_list[int(line[0]) - 1][1:(1+8)] = ss_one_hot8(line[2])
dssp_list[int(line[0]) - 1][9:(9+8)] = sa_one_hot8(line[3])
dssp_list[int(line[0]) - 1][17] = float(line[4])
dssp_list[int(line[0]) - 1][18] = float(line[5])/360.0
dssp_list[int(line[0]) - 1][19] = float(line[6])/360.0
dssp_list[int(line[0]) - 1][20] = float(line[7])/360.0
dssp_list[int(line[0]) - 1][21] = float(line[8])/360.0
for i in range(len(feature_list)):
feature_list[i] = dssp_list[i] #+ [inverse(areaA[i])] + [inverse(volA[i])] + [1/(i+1)] #last one is relative position
#print(feature_list[i])
np.save(o, feature_list)
fd.close()
#fm.close()