-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path063_POST_PROCESSING_VAR_IMPORTANCE.py
More file actions
91 lines (65 loc) · 2.97 KB
/
063_POST_PROCESSING_VAR_IMPORTANCE.py
File metadata and controls
91 lines (65 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
exec(open("Utils.py").read(), globals())
from pandas import ExcelWriter
from pandas import ExcelFile
SEED = 741
'''CARICAMENTO DATI '''
data_multi = pd.read_csv( 'results/VARIABLE_SELECTION/' + str(SEED) + '/importance_ranked.csv')
data_uni = pd.read_csv( 'results/VARIABLE_SELECTION/' + str(SEED) + '/univariate_var_sel.csv')
data_isis = pd.read_csv( 'results/VARIABLE_SELECTION/' + str(SEED) + '/ISIS.csv')
data = data_multi.merge( data_uni, on = 'VARIABLE')
# data = data_NN.copy()
data = data.drop( 'ANOVA_pvalue', axis = 1 )
importance = []
writer = ExcelWriter('Variable_ranking.xlsx')
for col in data.columns:
# print col, len(data[ col ].unique()), 'n_eff:', data.ix[ (data[col] <20) ,['VARIABLE', col]
df = data.ix[:,['VARIABLE', col]]
print( col, len(df))
df.to_excel(writer, col , index=False)
writer.save()
# importance.append( data.ix[ (data[col] <20) , ['VARIABLE', col]])
prova = pd.DataFrame( importance )
VI_multi = data.ix[:, 1:7].apply(np.mean, axis = 1)
VI_uni = data.ix[:, 7:9].apply(np.mean, axis = 1)
VI_ALL = data.ix[:, 1:9].apply(np.mean, axis = 1)
data_importance = pd.concat([data.VARIABLE, VI_multi, VI_uni, VI_ALL], axis = 1)
data_importance.columns = [ 'Variable', 'VI_multi', 'VI_uni', 'VI_ALL']
data_importance['Ranking'] = data_importance.VI_ALL.rank()
n_var = 10
data_top20 = data_importance[ data_importance.Ranking < n_var ].round(decimals = 2)
table = data_top20.transpose()
table.columns = table[table.index == 'Ranking'].values.tolist()[0]
#table[ table.index == 'Model'].values.tolist()[0]
table = table.ix[[1, 2, 3], : ]
columns = table.columns
rows = table.index.tolist()
n_rows = len(table.values)
# Add a table at the bottom of the axes
cell_text = table.values
#cell_text.reverse()
# create plot
fig, ax = plt.subplots(); index = np.arange(n_var-1); bar_width = 0.20; opacity = 0.8
plt.bar(index, data_top20.VI_ALL, bar_width, alpha=opacity, color='b', label='ALL')
plt.bar(index + bar_width, data_top20.VI_multi, bar_width, alpha=opacity, color='y', label='MULTI')
plt.bar(index + bar_width + bar_width, data_top20.VI_uni, bar_width, alpha=opacity, color='r', label='UNI')
#plt.xlabel('Models')
plt.title('Variable Importance')
plt.xticks([])
#plt.xticks(index + bar_width / 2) #index + bar_width, best_results_AUC.Model, rotation = 90)
#index + bar_width, best_results_AUC.Model, rotation = 90)
plt.legend(loc='upper right')
plt.tight_layout()
the_table = plt.table(cellText = cell_text, rowLabels = rows,
colLabels=columns, loc='bottom', cellLoc='center')
the_table.auto_set_font_size(False)
the_table.set_fontsize(10)
plt.subplots_adjust(left=0.2, bottom=0.2)
#plt.figure(figsize=( 1080, 1920))
x = index + bar_width /2
for i in range( len(x) ):
plt.text( x[i], 20, data_top20.Variable.values[i], rotation=90 )
# plt.text( index + bar_width /2, np.repeat(20, 19), data_top20.Variable.astype('str') )
# plt.interactive()
plt.show()
plt.savefig(dir_dest + 'Variable_ranking' + '.png', bbox_inches="tight")
plt.close()