-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsaver.py
More file actions
39 lines (31 loc) · 1.31 KB
/
saver.py
File metadata and controls
39 lines (31 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
import pandas as pd
import json
from datetime import datetime
class Saver:
def __init__(self, output_dir="data", base_filename="dari_dataset"):
self.output_dir = output_dir
self.base_filename = base_filename
os.makedirs(self.output_dir, exist_ok=True)
def _timestamped_filename(self, ext="csv"):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return os.path.join(self.output_dir, f"{self.base_filename}_{timestamp}.{ext}")
def save_csv(self, articles: list):
if not articles:
print("[Saver] No articles to save.")
return None
df = pd.DataFrame(articles)
filepath = self._timestamped_filename("csv")
df.to_csv(filepath, index=False, encoding="utf-8-sig")
print(f"[Saver] Saved {len(articles)} articles to {filepath}")
return filepath
def save_jsonl(self, articles: list):
if not articles:
print("[Saver] No articles to save.")
return None
filepath = self._timestamped_filename("jsonl")
with open(filepath, "w", encoding="utf-8") as f:
for article in articles:
f.write(json.dumps(article, ensure_ascii=False) + "\n")
print(f"[Saver] Saved {len(articles)} articles to {filepath}")
return filepath