-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathpostprocessing.py
More file actions
43 lines (27 loc) · 1.78 KB
/
postprocessing.py
File metadata and controls
43 lines (27 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# coding: utf-8
import pandas as pd
import numpy as np
import re
df = pd.read_csv("final.csv")
orig = pd.read_csv('final_nominal.csv')
mappings = {}
nominal_columns = ['Identification', 'MachineType', '.text_type', '.text_flags', '.bss_type', '.bss_flags', '.comment_type', '.comment_flags', '.data_type', '.data_flags', '.data1_type', '.data1_flags', '.dynamic_type', '.dynamic_flags', '.dynstr_type', '.dynstr_flags', '.dynsym_type', '.dynsym_flags', '.fini_type', '.fini_flags', '.hash_type', '.hash_flags', '.gnu.hash_type', '.gnu.hash_flags', '.init_type', '.init_flags', '.got_type', '.got_flags', '.interp_type', '.interp_flags', '.note_type', '.plt_type', '.plt_flags', '.rodata_type', '.rodata_flags', '.shstrtab_type', '.strtab_type', '.symtab_type', '.sdata_type', '.sdata_flags', '.sbss_type', '.sbss_flags', '.rel.dyn_type', '.rel.dyn_flags', '.rel.plt_type', '.rel.plt_flags', '.got.plt_type', '.got.plt_flags']
for i in nominal_columns:
mappings[i] = list(orig[i].dropna().unique())
for i in nominal_columns:
if df[i][0] not in mappings[i]:
df[i] = '<unknown>'
df.drop(['EntryPointAddress'], axis=1, inplace=True)
df.drop(['Name'], axis=1, inplace=True)
df['ELFVersion'] = df['ELFVersion'].apply(lambda x: x[2:])
df['Flags'] = df['Flags'].apply(lambda x: x[2:])
req_col = [a for a in df.columns if '_size' in a]
req_col.extend([a for a in df.columns if 'entsize' in a])
for i in req_col:
df[i] = df[i].apply(lambda x: str(x).replace(".0", ''))
for i in req_col:
df[i] = df[i].apply(lambda x: int(str(x), 16) if x != "nan" and "E" not in str(x).upper() else x)
df[i] = df[i].apply(lambda x: np.NaN if x != "nan" and "E" in str(x).upper() else x)
df[i] = df[i].apply(lambda x: np.NaN if x == "nan" else x)
df[i] = pd.to_numeric(df[i])
df.to_csv("final2.csv", index=False)