Skip to content

Commit 73f9745

Browse files
authored
Add seed script (#54)
* Add seed script Signed-off-by: kerthcet <kerthcet@gmail.com> * Update Readme.md Signed-off-by: kerthcet <kerthcet@gmail.com> * fix lint Signed-off-by: kerthcet <kerthcet@gmail.com> --------- Signed-off-by: kerthcet <kerthcet@gmail.com>
1 parent 770990c commit 73f9745

11 files changed

Lines changed: 202 additions & 20 deletions

File tree

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,7 @@ test-integration: lint
4242
'
4343
.PHONY: test-all
4444
test-all: test test-integration
45+
46+
.PHONY: seed
47+
seed:
48+
python hack/seed.py

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Open, modular framework to build GenAI applications.
1616

1717
## Concepts
1818

19+
- **Project**: A Project is a namespace-level abstraction that isolates experiments from different users or teams.
1920
- **Experiment**: An Experiment is a high-level abstraction for organizing and managing a series of related trials. It serves as a way to group together multiple trials that share a common goal or objective.
2021
- **Trial**: A Trial represents a single attempt or multiple iterations within an experiment. It encapsulates the configuration, execution, and results of a specific set of runs.
2122
- **Run**: A Run is an execution of a specific configuration within a trial. It represents a real iteration of the trial.
@@ -67,7 +68,6 @@ Dashboard is coming soon! Meanwhile, you can query the results directly from the
6768
make down
6869
```
6970

70-
7171
## Contributing
7272

7373
We welcome contributions! Please refer to [developer.md](./site/docs/development.md) for more information on how to set up your development environment and contribute to the project.

alphatrion/metadata/sql.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from alphatrion.metadata.sql_models import (
88
Base,
99
Experiment,
10-
Metrics,
10+
Metric,
1111
Model,
1212
Project,
1313
Run,
@@ -267,7 +267,7 @@ def create_metric(
267267
step: int,
268268
):
269269
session = self._session()
270-
new_metric = Metrics(
270+
new_metric = Metric(
271271
project_id=project_id,
272272
trial_id=trial_id,
273273
key=key,
@@ -278,8 +278,8 @@ def create_metric(
278278
session.commit()
279279
session.close()
280280

281-
def list_metrics(self, trial_id: uuid.UUID) -> list[Metrics]:
281+
def list_metrics(self, trial_id: uuid.UUID) -> list[Metric]:
282282
session = self._session()
283-
metrics = session.query(Metrics).filter(Metrics.trial_id == trial_id).all()
283+
metrics = session.query(Metric).filter(Metric.trial_id == trial_id).all()
284284
session.close()
285285
return metrics

alphatrion/metadata/sql_models.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ class Trial(Base):
5959
name = Column(String, nullable=False)
6060
description = Column(String, nullable=True)
6161
meta = Column(JSON, nullable=True, comment="Additional metadata for the trial")
62-
duration = Column(Integer, default=0, comment="Duration in seconds")
6362
params = Column(JSON, nullable=True, comment="Parameters for the experiment")
6463
status = Column(
6564
Enum(TrialStatus),
@@ -104,7 +103,7 @@ class Model(Base):
104103
is_del = Column(Integer, default=0, comment="0 for not deleted, 1 for deleted")
105104

106105

107-
class Metrics(Base):
106+
class Metric(Base):
108107
__tablename__ = "metrics"
109108

110109
uuid = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)

alphatrion/runtime/runtime.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ def init(
1717
"""
1818
Initialize the AlphaTrion runtime environment.
1919
20-
:param project_id: the project ID to initialize the environment for,
21-
for testing purpose, you can use a random UUID.
20+
:param project_id: the project ID to initialize the environment.
21+
For testing purpose, you can use a random UUID.
2222
:param artifact_insecure: whether to use insecure connection to the
2323
artifact registry
2424
"""

alphatrion/tracing/__init__.py

Whitespace-only changes.

alphatrion/utils/__init__.py

Whitespace-only changes.

hack/seed.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
#!/usr/bin/env python3
2+
3+
import os
4+
import random
5+
import uuid
6+
from datetime import datetime
7+
from decimal import Decimal
8+
9+
from dotenv import load_dotenv
10+
from faker import Faker
11+
from sqlalchemy import create_engine
12+
from sqlalchemy.orm import sessionmaker
13+
14+
from alphatrion import consts
15+
from alphatrion.metadata.sql_models import (
16+
Base,
17+
Experiment,
18+
Metric,
19+
Project,
20+
Run,
21+
Trial,
22+
TrialStatus,
23+
)
24+
25+
load_dotenv()
26+
27+
DATABASE_URL = os.getenv(consts.METADATA_DB_URL)
28+
29+
engine = create_engine(DATABASE_URL, echo=False)
30+
SessionLocal = sessionmaker(bind=engine)
31+
session = SessionLocal()
32+
33+
fake = Faker()
34+
35+
36+
def make_json_serializable(obj):
37+
if isinstance(obj, dict):
38+
return {k: make_json_serializable(v) for k, v in obj.items()}
39+
elif isinstance(obj, list):
40+
return [make_json_serializable(v) for v in obj]
41+
elif isinstance(obj, Decimal):
42+
return float(obj)
43+
elif isinstance(obj, datetime):
44+
return obj.isoformat()
45+
else:
46+
return obj
47+
48+
49+
def generate_project() -> Project:
50+
return Project(
51+
uuid=uuid.uuid4(),
52+
name=fake.bs().title(),
53+
description=fake.catch_phrase(),
54+
)
55+
56+
57+
def generate_experiment(projects: list[Project]) -> Experiment:
58+
return Experiment(
59+
name=fake.bs().title(),
60+
description=fake.catch_phrase(),
61+
meta=make_json_serializable(
62+
fake.pydict(nb_elements=3, variable_nb_elements=True)
63+
),
64+
project_id=random.choice(projects).uuid,
65+
)
66+
67+
68+
def generate_trial(exps: list[Experiment]) -> Trial:
69+
exp = random.choice(exps)
70+
return Trial(
71+
project_id=exp.project_id,
72+
experiment_id=exp.uuid,
73+
name=fake.bs().title(),
74+
description=fake.catch_phrase(),
75+
meta=make_json_serializable(
76+
fake.pydict(nb_elements=3, variable_nb_elements=True)
77+
),
78+
params=make_json_serializable(
79+
fake.pydict(nb_elements=3, variable_nb_elements=True)
80+
),
81+
status=random.choice(list(TrialStatus)),
82+
)
83+
84+
85+
def generate_run(trials: list[Trial]) -> Run:
86+
trial = random.choice(trials)
87+
return Run(
88+
project_id=trial.project_id,
89+
trial_id=trial.uuid,
90+
)
91+
92+
93+
def generate_metric(runs: list[Run]) -> Metric:
94+
run = random.choice(runs)
95+
return Metric(
96+
project_id=run.project_id,
97+
trial_id=run.trial_id,
98+
key=random.choice(["accuracy", "loss", "precision", "fitness"]),
99+
value=random.uniform(0, 1),
100+
step=random.randint(1, 1000),
101+
)
102+
103+
104+
def seed_all(
105+
num_projects: int,
106+
num_exps_per_project: int,
107+
num_trials_per_exp: int,
108+
num_runs_per_trial: int,
109+
num_metrics_per_run: int,
110+
):
111+
Base.metadata.create_all(bind=engine)
112+
113+
print("🌱 generating seeds ...")
114+
projects = [generate_project() for _ in range(num_projects)]
115+
session.add_all(projects)
116+
session.commit()
117+
118+
experiments = [generate_experiment(projects) for _ in range(num_exps_per_project)]
119+
session.add_all(experiments)
120+
session.commit()
121+
122+
trials = [generate_trial([exp]) for exp in experiments]
123+
session.add_all(trials)
124+
session.commit()
125+
126+
runs = [generate_run(trials) for _ in range(num_runs_per_trial)]
127+
session.add_all(runs)
128+
session.commit()
129+
130+
metrics = [generate_metric(runs) for _ in range(num_metrics_per_run)]
131+
session.add_all(metrics)
132+
session.commit()
133+
134+
print("🌳 seeding completed.")
135+
136+
137+
if __name__ == "__main__":
138+
seed_all(
139+
num_projects=3,
140+
num_exps_per_project=20,
141+
num_trials_per_exp=50,
142+
num_runs_per_trial=100,
143+
num_metrics_per_run=100,
144+
)

poetry.lock

Lines changed: 28 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ pytest = ">=8.4.2,<9.0.0"
2222
ruff = "^0.12.12"
2323
pytest-asyncio = ">=0.22.0,<1.0.0"
2424
pytest-timeout = ">=2.1.0,<3.0.0"
25+
faker = "^37.12.0"
2526

2627
[build-system]
2728
requires = ["poetry-core>=2.0.0,<3.0.0"]

0 commit comments

Comments
 (0)