-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess_text.py
More file actions
26 lines (21 loc) · 878 Bytes
/
preprocess_text.py
File metadata and controls
26 lines (21 loc) · 878 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import csv
import os
from wordsegment import load
from generate_embeddings import apply_preprocessing
if __name__ == "__main__":
in_file_paths = ["../new_labeled_path_reports.csv", "../new_labeled_rad_reports.csv"]
load()
for file_path in in_file_paths:
with open(file_path, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter='|')
out_file_path = os.path.splitext(file_path)[0] + "_preprocessed.csv"
out_file = open(out_file_path, "w")
writer = csv.writer(out_file, delimiter="|")
writer.writerow(["anon_id", "text", "label"])
header = next(reader)
i = 0
for row in reader:
row[1] = " ".join(apply_preprocessing(row[1]))
if row[2] == "-1":
row[2] = "0"
writer.writerow(row)