-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathremove_non_existent.py
More file actions
33 lines (30 loc) · 1.5 KB
/
remove_non_existent.py
File metadata and controls
33 lines (30 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os, argparse, tqdm
def remove_non_existent(filename: str):
with open(filename, 'r') as f:
lines = []
for line in f:
line = line.split("|")
lines.append(line)
count = 0
for line in tqdm.tqdm(lines):
wav_path = line[0]
if not os.path.exists(wav_path):
print(f"File {wav_path} does not exist, removing from list.")
count += 1
print(f"Removed {count} non-existent files from {filename}.")
lines = [line for line in lines if os.path.exists(line[0])]
with open(filename, 'w') as f:
for line in lines:
f.write("|".join(line))
def main(args):
remove_non_existent(os.path.join(args.in_file_dir, args.in_train_filename))
remove_non_existent(os.path.join(args.in_file_dir, args.in_val_filename))
remove_non_existent(os.path.join(args.in_file_dir, args.in_OOD_filename))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Check vocabulary against a character set.")
parser.add_argument("--in_file_dir", type=str, required=True, help="Directory containing the input files.")
parser.add_argument("--in_train_filename", type=str, default="train_list.txt", help="Name of the training file.")
parser.add_argument("--in_val_filename", type=str, default="val_list.txt", help="Name of the validation file.")
parser.add_argument("--in_OOD_filename", type=str, default="OOD_texts.txt", help="Name of the OOD file.")
args = parser.parse_args()
main(args)