-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_proposed_qa.py
More file actions
33 lines (28 loc) · 1.31 KB
/
extract_proposed_qa.py
File metadata and controls
33 lines (28 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import json
import os
import argparse
def main():
parser = argparse.ArgumentParser(description='Extract prediction fields from JSON files')
parser.add_argument('--input_dir', type=str, default="./outputs/openended_trajectories/", help='Input directory containing JSON files')
parser.add_argument('--output_file', type=str, default='./outputs/extracted_questions.jsonl', help='Output JSONL file path')
args = parser.parse_args()
filelist = os.listdir(args.input_dir)
filelist.sort(key=lambda x: int(x.split('_')[1]))
out = []
for idx, file in enumerate(filelist):
with open(os.path.join(args.input_dir, file), "r") as f:
data = json.load(f)
out.append({
'id': idx + 1,
'topic': "_".join(file.split('_')[7:]).strip("json").strip('.'),
"conceptual_breadth": data["prediction"]['json']['conceptual_breadth'],
"logical_nesting": data["prediction"]['json']['logical_nesting'],
"exploration": data["prediction"]['json']['exploration'],
'prompt': data["prediction"]['json']['proposed_question']
})
print(len(out))
with open(args.output_file, "w") as f:
for item in out:
f.write(json.dumps(item) + "\n")
if __name__ == "__main__":
main()