IntelPython · ethanglaser · Apr 24, 2026 · Apr 8, 2026 · Apr 10, 2026 · Apr 10, 2026
@@ -36,10 +36,6 @@
             }
         ],
         "svr datasets": [
-            {
-                "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
-                "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
-            },
             {
                 "data": { "dataset": "fried", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } },
                 "algorithm": { "estimator_params": { "C": 2.0, "kernel": "rbf" } }
@@ -84,10 +80,6 @@
             }
         ],
         "nusvr datasets": [
-            {
-                "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
-                "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
-            },
             {
                 "data": { "dataset": "twodplanes", "split_kwargs": { "train_size": 25000, "test_size": null } },
                 "algorithm": { "estimator_params": { "C": 1.0, "kernel": ["linear", "poly", "rbf"] } }

@@ -97,7 +97,7 @@ def fetch_and_correct_openml(
 
     # Get the data with target column specified
     x, y, _, _ = dataset.get_data(
-        dataset_format="dataframe" if as_frame is True else "array",
+        dataset_format="dataframe",
         target=dataset.default_target_attribute,
     )
 
@@ -109,6 +109,8 @@ def fetch_and_correct_openml(
     if isinstance(x, pd.DataFrame):
         if any(pd.api.types.is_sparse(x[col]) for col in x.columns):
             x = x.sparse.to_dense()
+        if as_frame is not True:
+            x = x.to_numpy()
 
     # Convert y to numpy array if needed
     if isinstance(y, pd.Series):

@@ -450,7 +450,7 @@ def load_codrnanorm(
     data_name: str, data_cache: str, raw_data_cache: str, dataset_params: Dict
 ) -> Tuple[Dict, Dict]:
     def transform_x_y(x, y):
-        x = pd.DataFrame(x.todense())
+        x = pd.DataFrame(x)
         y = y.astype("int")
         y[y == -1] = 0
         return x, y

@@ -65,22 +65,31 @@ def enrich_metrics(
     """Transforms raw performance and other results into aggregated metrics"""
     # time metrics
     res = bench_result.copy()
-    mean, std = box_filter(res["time[ms]"])
-    if include_performance_stability_metrics:
+    if isinstance(res["time[ms]"], list):
+        mean, std = box_filter(res["time[ms]"])
+        if include_performance_stability_metrics:
+            res.update(
+                {
+                    "1st run time[ms]": res["time[ms]"][0],
+                    "1st-mean run ratio": res["time[ms]"][0] / mean,
-                    "1st-mean run ratio": res["time[ms]"][0] / mean,
+                    "1st-mean run ratio": res["time[ms]"][0] / mean if mean != 0 else res["time[ms]"][0],
-                    "1st-mean run ratio": res["time[ms]"][0] / mean,
+                    "1st-mean run ratio": res["time[ms]"][0] / mean if mean != 0 else res["time[ms]"][0],
+                }
+            )
         res.update(
             {
-                "1st run time[ms]": res["time[ms]"][0],
-                "1st-mean run ratio": res["time[ms]"][0] / mean,
+                "time[ms]": mean,
+                "time CV": std / mean,  # Coefficient of Variation
             }
         )
-    res.update(
-        {
-            "time[ms]": mean,
-            "time CV": std / mean,  # Coefficient of Variation
-        }
-    )
+    else:
+        # already aggregated (e.g. from a baseline file)
+        mean = res["time[ms]"]
+        std = res.get("time std[ms]", 0.0)
+        if mean != 0:
+            res["time CV"] = std / mean
+        else:
+            res["time CV"] = 0.0
     cost = res.get("cost[microdollar]", None)
-    if cost:
+    if cost and isinstance(cost, list):
         res["cost[microdollar]"] = box_filter(res["cost[microdollar]"])[0]
     batch_size = res.get("batch_size", None)
     if batch_size: