Handle the case when the benchmark script produces an empty CSV (#996)

motus · web-flow · commit de15b532c01e · 2025-10-22T15:57:07.000-07:00
# Pull Request

## Title

Handle the case when a script invoked by `local_env` fails silently
(i.e., with error code 0) and produces an empty metrics CSV file.

______________________________________________________________________

## Description

* check if the metrics input file is empty;
* make sure the input CSV file contains all required metrics columns.
* add unit test for the case
* make local env unit tests a bit more flexible to check other cases
like this one

______________________________________________________________________

## Type of Change

- 🛠️ Bug fix

______________________________________________________________________

## Testing

added a new unit test to cover teh case
diff --git a/mlos_bench/mlos_bench/environments/local/local_env.py b/mlos_bench/mlos_bench/environments/local/local_env.py
@@ -206,7 +206,10 @@ def run(self) -> tuple[Status, datetime, dict[str, TunableValue] | None]:
         )
 
         _LOG.debug("Read data:\n%s", data)
-        if list(data.columns) == ["metric", "value"]:
+        if len(data) == 0:
+            _LOG.warning("Empty metrics file - fail the run")
+            return (Status.FAILED, timestamp, None)
+        elif list(data.columns) == ["metric", "value"]:
             _LOG.info(
                 "Local results have (metric,value) header and %d rows: assume long format",
                 len(data),
diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
@@ -280,7 +280,10 @@ def bulk_register(
         df_configs = self._to_df(configs)  # Impute missing values, if necessary
 
         df_scores = self._adjust_signs_df(
-            pd.DataFrame([{} if score is None else score for score in scores])
+            pd.DataFrame(
+                [{} if score is None else score for score in scores],
+                columns=list(self._opt_targets),
+            )
         )
 
         if status is not None:
diff --git a/mlos_bench/mlos_bench/tests/environments/__init__.py b/mlos_bench/mlos_bench/tests/environments/__init__.py
@@ -10,15 +10,19 @@
 import pytest
 
 from mlos_bench.environments.base_environment import Environment
+from mlos_bench.environments.status import Status
 from mlos_bench.tunables.tunable_groups import TunableGroups
 from mlos_bench.tunables.tunable_types import TunableValue
 
 
 def check_env_success(
     env: Environment,
     tunable_groups: TunableGroups,
-    expected_results: dict[str, TunableValue],
+    *,
+    expected_results: dict[str, TunableValue] | None,
     expected_telemetry: list[tuple[datetime, str, Any]],
+    expected_status_run: set[Status] | None = None,
+    expected_status_next: set[Status] | None = None,
     global_config: dict | None = None,
 ) -> None:
     """
@@ -34,19 +38,40 @@ def check_env_success(
         Expected results of the benchmark.
     expected_telemetry : list[tuple[datetime, str, Any]]
         Expected telemetry data of the benchmark.
+    expected_status_run : set[Status]
+        Expected status right after the trial.
+        Default is the `SUCCEEDED` value.
+    expected_status_next : set[Status]
+        Expected status values for the next trial.
+        Default is the same set as in `.is_good()`.
     global_config : dict
         Global params.
     """
+    # pylint: disable=too-many-arguments
+    if expected_status_run is None:
+        expected_status_run = {Status.SUCCEEDED}
+
+    if expected_status_next is None:
+        expected_status_next = {
+            Status.PENDING,
+            Status.READY,
+            Status.RUNNING,
+            Status.SUCCEEDED,
+        }
+
     with env as env_context:
 
         assert env_context.setup(tunable_groups, global_config)
 
         (status, _ts, data) = env_context.run()
-        assert status.is_succeeded()
-        assert data == pytest.approx(expected_results, nan_ok=True)
+        assert status in expected_status_run
+        if expected_results is None:
+            assert data is None
+        else:
+            assert data == pytest.approx(expected_results, nan_ok=True)
 
         (status, _ts, telemetry) = env_context.status()
-        assert status.is_good()
+        assert status in expected_status_next
         assert telemetry == pytest.approx(expected_telemetry, nan_ok=True)
 
         env_context.teardown()
diff --git a/mlos_bench/mlos_bench/tests/environments/local/local_env_test.py b/mlos_bench/mlos_bench/tests/environments/local/local_env_test.py
@@ -5,6 +5,7 @@
 """Unit tests for LocalEnv benchmark environment."""
 import pytest
 
+from mlos_bench.environments.status import Status
 from mlos_bench.tests.environments import check_env_success
 from mlos_bench.tests.environments.local import create_local_env
 from mlos_bench.tunables.tunable_groups import TunableGroups
@@ -101,3 +102,24 @@ def test_local_env_wide(tunable_groups: TunableGroups) -> None:
         },
         expected_telemetry=[],
     )
+
+
+def test_local_env_results_empty_file(tunable_groups: TunableGroups) -> None:
+    """When the results file is empty, do not crash but mark the trial FAILED."""
+    local_env = create_local_env(
+        tunable_groups,
+        {
+            "run": [
+                "echo 'latency,throughput,score' > output.csv",
+            ],
+            "read_results_file": "output.csv",
+        },
+    )
+
+    check_env_success(
+        local_env,
+        tunable_groups,
+        expected_status_run={Status.FAILED},
+        expected_results=None,
+        expected_telemetry=[],
+    )
diff --git a/mlos_bench/mlos_bench/tests/environments/local/local_env_vars_test.py b/mlos_bench/mlos_bench/tests/environments/local/local_env_vars_test.py
@@ -34,7 +34,7 @@ def _run_local_env(tunable_groups: TunableGroups, shell_subcmd: str, expected: d
         },
     )
 
-    check_env_success(local_env, tunable_groups, expected, [])
+    check_env_success(local_env, tunable_groups, expected_results=expected, expected_telemetry=[])
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="sh-like shell only")

Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ def _run_local_env(tunable_groups: TunableGroups, shell_subcmd: str, expected: d`
`34`	`34`	`},`
`35`	`35`	`)`
`36`	`36`
`37`		`- check_env_success(local_env, tunable_groups, expected, [])`
	`37`	`+ check_env_success(local_env, tunable_groups, expected_results=expected, expected_telemetry=[])`
`38`	`38`
`39`	`39`
`40`	`40`	`@pytest.mark.skipif(sys.platform == "win32", reason="sh-like shell only")`