Add run_id to metrics (#56)

kerthcet · web-flow · commit e2a85dd32c8d · 2025-11-08T22:30:55.000Z
Signed-off-by: kerthcet &lt;kerthcet@gmail.com&gt;
diff --git a/.env.example b/.env.example
@@ -1,4 +1,4 @@
 # We use PG by default as the metadata database.
-METADATA_DB_URL=postgresql+psycopg2://user:pass@localhost:5432/mydb
+METADATA_DB_URL=postgresql+psycopg2://alphatrion:alphatr1on@localhost:5432/alphatrion
 ARTIFACT_REGISTRY_URL=http://localhost:5000/
 LOG_LEVEL=INFO
diff --git a/README.md b/README.md
@@ -31,8 +31,10 @@ pip install alphatrion
 
 ### Initialize the Environment
 
+Run the following command for setup:
+
 ```bash
-make up
+cp .env.example .env & make up
 ```
 
 You can login to pgAdmin at `http://localhost:8080` to see the Postgres database. The host name for registering a new server is `postgres`, and the username and password are `alphatrion` and `alphatr1on`, respectively.
diff --git a/alphatrion/log/log.py b/alphatrion/log/log.py
@@ -1,3 +1,4 @@
+from alphatrion.run.run import current_run_id
 from alphatrion.runtime.runtime import global_runtime
 from alphatrion.trial.trial import current_trial_id
 from alphatrion.utils import time as utime
@@ -51,7 +52,13 @@ async def log_params(params: dict):
 # metric key must be string, value must be float.
 # If save_on_best is enabled in the trial config, and the metric is the best metric
 # so far, the trial will checkpoint the current data.
+#
+# Note: log_metrics can only be called inside a Run, because it needs a run_id.
 async def log_metrics(metrics: dict[str, float]):
+    run_id = current_run_id.get()
+    if run_id is None:
+        raise RuntimeError("log_metrics must be called inside a Run.")
+
     runtime = global_runtime()
     exp = runtime.current_exp
 
@@ -70,6 +77,7 @@ async def log_metrics(metrics: dict[str, float]):
             value=value,
             project_id=runtime._project_id,
             trial_id=trial_id,
+            run_id=run_id,
             step=step,
         )
 
diff --git a/alphatrion/metadata/sql.py b/alphatrion/metadata/sql.py
@@ -262,6 +262,7 @@ def create_metric(
         self,
         project_id: uuid.UUID,
         trial_id: uuid.UUID,
+        run_id: uuid.UUID,
         key: str,
         value: float,
         step: int,
@@ -270,6 +271,7 @@ def create_metric(
         new_metric = Metric(
             project_id=project_id,
             trial_id=trial_id,
+            run_id=run_id,
             key=key,
             value=value,
             step=step,
diff --git a/alphatrion/metadata/sql_models.py b/alphatrion/metadata/sql_models.py
@@ -111,5 +111,6 @@ class Metric(Base):
     value = Column(Float, nullable=False)
     project_id = Column(UUID(as_uuid=True), nullable=False)
     trial_id = Column(UUID(as_uuid=True), nullable=False)
+    run_id = Column(UUID(as_uuid=True), nullable=False)
     step = Column(Integer, nullable=False, default=0)
     created_at = Column(DateTime(timezone=True), default=datetime.now(UTC))
diff --git a/hack/seed.py b/hack/seed.py
@@ -96,6 +96,7 @@ def generate_metric(runs: list[Run]) -> Metric:
     return Metric(
         project_id=run.project_id,
         trial_id=run.trial_id,
+        run_id=run.uuid,
         key=random.choice(["accuracy", "loss", "precision", "fitness"]),
         value=random.uniform(0, 1),
         step=random.randint(1, 1000),
diff --git a/tests/integration/test_log.py b/tests/integration/test_log.py
@@ -95,6 +95,9 @@ async def test_log_params():
 async def test_log_metrics():
     alpha.init(project_id=uuid.uuid4(), artifact_insecure=True, init_tables=True)
 
+    async def log_metric(metrics: dict):
+        await alpha.log_metrics(metrics)
+
     async with alpha.CraftExperiment.start(name="log_metrics_exp") as exp:
         trial = exp.start_trial(name="first-trial", params={"param1": 0.1})
 
@@ -105,7 +108,8 @@ async def test_log_metrics():
         metrics = exp._runtime._metadb.list_metrics(trial_id=trial._id)
         assert len(metrics) == 0
 
-        await alpha.log_metrics({"accuracy": 0.95, "loss": 0.1})
+        run = trial.start_run(lambda: log_metric({"accuracy": 0.95, "loss": 0.1}))
+        await run.wait()
 
         metrics = exp._runtime._metadb.list_metrics(trial_id=trial._id)
         assert len(metrics) == 2
@@ -115,14 +119,21 @@ async def test_log_metrics():
         assert metrics[1].key == "loss"
         assert metrics[1].value == 0.1
         assert metrics[1].step == 1
+        run_id_1 = metrics[0].run_id
+        assert run_id_1 is not None
+        assert metrics[0].run_id == metrics[1].run_id
 
-        await alpha.log_metrics({"accuracy": 0.96})
+        run = trial.start_run(lambda: log_metric({"accuracy": 0.96}))
+        await run.wait()
 
         metrics = exp._runtime._metadb.list_metrics(trial_id=trial._id)
         assert len(metrics) == 3
         assert metrics[2].key == "accuracy"
         assert metrics[2].value == 0.96
         assert metrics[2].step == 2
+        run_id_2 = metrics[2].run_id
+        assert run_id_2 is not None
+        assert run_id_2 != run_id_1
 
         trial.cancel()
 
@@ -131,6 +142,9 @@ async def test_log_metrics():
 async def test_log_metrics_with_save_on_max():
     alpha.init(project_id=uuid.uuid4(), artifact_insecure=True, init_tables=True)
 
+    async def log_metric(value: float):
+        await alpha.log_metrics({"accuracy": value})
+
     async with alpha.CraftExperiment.start(
         name="log_metrics_with_save_on_max",
         description="Context manager test",
@@ -139,7 +153,7 @@ async def test_log_metrics_with_save_on_max():
         with tempfile.TemporaryDirectory() as tmpdir:
             os.chdir(tmpdir)
 
-            _ = exp.start_trial(
+            trial = exp.start_trial(
                 name="trial-with-save_on_best",
                 config=alpha.TrialConfig(
                     checkpoint=alpha.CheckpointConfig(
@@ -156,35 +170,47 @@ async def test_log_metrics_with_save_on_max():
             with open(file1, "w") as f:
                 f.write("This is file1.")
 
-            await alpha.log_metrics({"accuracy": 0.90})
+            run = trial.start_run(lambda: log_metric(0.90))
+            await run.wait()
 
             versions = exp._runtime._artifact.list_versions(exp.id)
             assert len(versions) == 1
 
             # To avoid the same timestamp hash, we wait for 1 second
             time.sleep(1)
 
-            await alpha.log_metrics({"accuracy": 0.78})
+            run = trial.start_run(lambda: log_metric(0.78))
+            await run.wait()
+
             versions = exp._runtime._artifact.list_versions(exp.id)
             assert len(versions) == 1
 
             time.sleep(1)
 
-            await alpha.log_metrics({"accuracy": 0.91})
+            run = trial.start_run(lambda: log_metric(0.91))
+            await run.wait()
+
             versions = exp._runtime._artifact.list_versions(exp.id)
             assert len(versions) == 2
 
             time.sleep(1)
 
-            await alpha.log_metrics({"accuracy2": 0.98})
+            run = trial.start_run(lambda: log_metric(0.98))
+            await run.wait()
+
             versions = exp._runtime._artifact.list_versions(exp.id)
-            assert len(versions) == 2
+            assert len(versions) == 3
+
+            trial.cancel()
 
 
 @pytest.mark.asyncio
 async def test_log_metrics_with_save_on_min():
     alpha.init(project_id=uuid.uuid4(), artifact_insecure=True, init_tables=True)
 
+    async def log_metric(value: float):
+        await alpha.log_metrics({"accuracy": value})
+
     async with alpha.CraftExperiment.start(
         name="log_metrics_with_save_on_min",
         description="Context manager test",
@@ -193,7 +219,7 @@ async def test_log_metrics_with_save_on_min():
         with tempfile.TemporaryDirectory() as tmpdir:
             os.chdir(tmpdir)
 
-            _ = exp.start_trial(
+            trial = exp.start_trial(
                 name="trial-with-save_on_best",
                 config=alpha.TrialConfig(
                     checkpoint=alpha.CheckpointConfig(
@@ -210,29 +236,37 @@ async def test_log_metrics_with_save_on_min():
             with open(file1, "w") as f:
                 f.write("This is file1.")
 
-            await alpha.log_metrics({"accuracy": 0.30})
+            run = trial.start_run(lambda: log_metric(0.30))
+            await run.wait()
 
             versions = exp._runtime._artifact.list_versions(exp.id)
             assert len(versions) == 1
 
             # To avoid the same timestamp hash, we wait for 1 second
             time.sleep(1)
 
-            await alpha.log_metrics({"accuracy": 0.58})
+            run = trial.start_run(lambda: log_metric(0.58))
+            await run.wait()
+
             versions = exp._runtime._artifact.list_versions(exp.id)
             assert len(versions) == 1
 
             time.sleep(1)
 
-            await alpha.log_metrics({"accuracy": 0.21})
+            run = trial.start_run(lambda: log_metric(0.21))
+            await run.wait()
+
             versions = exp._runtime._artifact.list_versions(exp.id)
             assert len(versions) == 2
 
             time.sleep(1)
 
-            await alpha.log_metrics({"accuracy2": 0.18})
+            task = trial.start_run(lambda: log_metric(0.18))
+            await task.wait()
             versions = exp._runtime._artifact.list_versions(exp.id)
-            assert len(versions) == 2
+            assert len(versions) == 3
+
+            trial.cancel()
 
 
 @pytest.mark.asyncio
diff --git a/tests/unit/metadata/test_sql.py b/tests/unit/metadata/test_sql.py
@@ -88,8 +88,9 @@ def test_create_metric(db):
     project_id = uuid.uuid4()
     exp_id = db.create_exp("test_exp", project_id, "test description")
     trial_id = db.create_trial(exp_id=exp_id, project_id=project_id, name="test-trial")
-    db.create_metric(project_id, trial_id, "accuracy", 0.95, 1)
-    db.create_metric(project_id, trial_id, "accuracy", 0.85, 2)
+    run_id = db.create_run(trial_id=trial_id, project_id=project_id)
+    db.create_metric(project_id, trial_id, run_id, "accuracy", 0.95, 1)
+    db.create_metric(project_id, trial_id, run_id, "accuracy", 0.85, 2)
 
     metrics = db.list_metrics(trial_id)
     assert len(metrics) == 2