-
Notifications
You must be signed in to change notification settings - Fork 112
Expand file tree
/
Copy pathuploads.py
More file actions
57 lines (46 loc) · 1.71 KB
/
uploads.py
File metadata and controls
57 lines (46 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from datasets import Dataset, load_dataset
from huggingface_hub import HfApi
import pandas as pd
# Hugging Face dataset names
INDEX_DATASET = "/agent_traces_index"
TRACE_DATASET = "/agent_traces_data"
# Hugging Face API instance
api = HfApi()
def upload_index_data(index_df: pd.DataFrame):
dataset = Dataset.from_pandas(index_df)
dataset.push_to_hub(INDEX_DATASET, split="train")
def upload_trace(trace_file: str, exp_id: str):
api.upload_file(
path_or_fileobj=trace_file,
path_in_repo=f"{exp_id}.zip",
repo_id=TRACE_DATASET,
repo_type="dataset",
)
def add_study(exp_id: str, study_name: str, llm: str, benchmark: str, trace_file: str):
# Check if the benchmark is whitelisted
WHITELISTED_BENCHMARKS = ["benchmark1", "benchmark2"]
if benchmark not in WHITELISTED_BENCHMARKS:
raise ValueError("Benchmark not whitelisted")
# Assign a license based on LLM and benchmark
LICENSES = {
("GPT-4", "benchmark1"): "MIT",
("Llama2", "benchmark2"): "Apache-2.0",
}
license_type = LICENSES.get((llm, benchmark), "Unknown")
# Upload trace file
upload_trace(trace_file, exp_id)
# Create metadata entry
index_entry = {
"exp_id": exp_id,
"study_name": study_name,
"llm": llm,
"benchmark": benchmark,
"license": license_type,
"trace_pointer": f"https://huggingface.co/datasets/{TRACE_DATASET}/resolve/main/{exp_id}.zip",
}
# Load the existing index dataset and add new entry
dataset = load_dataset(INDEX_DATASET, split="train")
df = dataset.to_pandas()
df = df.append(index_entry, ignore_index=True)
upload_index_data(df)
print(f"Study {exp_id} added successfully!")