-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlist_from_dir.py
More file actions
executable file
·38 lines (29 loc) · 1.38 KB
/
list_from_dir.py
File metadata and controls
executable file
·38 lines (29 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python3
import argparse
import os
import pandas as pd
Description = '''
This script lists all files in a directory and its subdirectories
and saves the list to a csv file compatible with bench_PPC.py
'''
parser = argparse.ArgumentParser(
description=Description, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('root_directory', metavar="root_directory", nargs='+',
help='')
args = parser.parse_args()
if __name__ == "__main__":
root_full_path = args.root_directory[0]
root_dir = os.path.basename(root_full_path)
# print(root_full_path)
df = pd.DataFrame(
columns=['swh_id', 'file_id', 'length', 'filename', 'filepath', 'local_path'])
# recursivelly list all files in the directory
for directory, dirs, files in os.walk(root_full_path):
for file in files:
file_full_path = os.path.join(directory, file)
local_path = directory.replace(root_full_path + '/', "")
file_length = os.path.getsize(file_full_path)
new_row = {'swh_id': '0', 'file_id': os.path.basename(file), 'length': file_length,
'filename': os.path.basename(file), 'filepath': os.path.join(local_path, file), 'local_path': local_path}
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
df.to_csv(f'{root_dir}_list_of_files.csv', index=False)