Skip to content

Commit 842e5e7

Browse files
authored
Revise SVR dataset scope and address uncaught errors (#208)
* accommodate non-list metrics in baselines * remove year_prediction_msd from regular runs * address future warning * minor revision * only call todense if sparse * remove unnecessary diff
1 parent 17ac9ae commit 842e5e7

4 files changed

Lines changed: 24 additions & 21 deletions

File tree

configs/regular/svm.json

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@
3636
}
3737
],
3838
"svr datasets": [
39-
{
40-
"data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
41-
"algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
42-
},
4339
{
4440
"data": { "dataset": "fried", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } },
4541
"algorithm": { "estimator_params": { "C": 2.0, "kernel": "rbf" } }
@@ -84,10 +80,6 @@
8480
}
8581
],
8682
"nusvr datasets": [
87-
{
88-
"data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
89-
"algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
90-
},
9183
{
9284
"data": { "dataset": "twodplanes", "split_kwargs": { "train_size": 25000, "test_size": null } },
9385
"algorithm": { "estimator_params": { "C": 1.0, "kernel": ["linear", "poly", "rbf"] } }

sklbench/datasets/downloaders.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def fetch_and_correct_openml(
9797

9898
# Get the data with target column specified
9999
x, y, _, _ = dataset.get_data(
100-
dataset_format="dataframe" if as_frame is True else "array",
100+
dataset_format="dataframe",
101101
target=dataset.default_target_attribute,
102102
)
103103

@@ -109,6 +109,8 @@ def fetch_and_correct_openml(
109109
if isinstance(x, pd.DataFrame):
110110
if any(pd.api.types.is_sparse(x[col]) for col in x.columns):
111111
x = x.sparse.to_dense()
112+
if as_frame is not True:
113+
x = x.to_numpy()
112114

113115
# Convert y to numpy array if needed
114116
if isinstance(y, pd.Series):

sklbench/datasets/loaders.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ def load_codrnanorm(
450450
data_name: str, data_cache: str, raw_data_cache: str, dataset_params: Dict
451451
) -> Tuple[Dict, Dict]:
452452
def transform_x_y(x, y):
453-
x = pd.DataFrame(x.todense())
453+
x = pd.DataFrame(x)
454454
y = y.astype("int")
455455
y[y == -1] = 0
456456
return x, y

sklbench/utils/measurement.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,22 +65,31 @@ def enrich_metrics(
6565
"""Transforms raw performance and other results into aggregated metrics"""
6666
# time metrics
6767
res = bench_result.copy()
68-
mean, std = box_filter(res["time[ms]"])
69-
if include_performance_stability_metrics:
68+
if isinstance(res["time[ms]"], list):
69+
mean, std = box_filter(res["time[ms]"])
70+
if include_performance_stability_metrics:
71+
res.update(
72+
{
73+
"1st run time[ms]": res["time[ms]"][0],
74+
"1st-mean run ratio": res["time[ms]"][0] / mean,
75+
}
76+
)
7077
res.update(
7178
{
72-
"1st run time[ms]": res["time[ms]"][0],
73-
"1st-mean run ratio": res["time[ms]"][0] / mean,
79+
"time[ms]": mean,
80+
"time CV": std / mean, # Coefficient of Variation
7481
}
7582
)
76-
res.update(
77-
{
78-
"time[ms]": mean,
79-
"time CV": std / mean, # Coefficient of Variation
80-
}
81-
)
83+
else:
84+
# already aggregated (e.g. from a baseline file)
85+
mean = res["time[ms]"]
86+
std = res.get("time std[ms]", 0.0)
87+
if mean != 0:
88+
res["time CV"] = std / mean
89+
else:
90+
res["time CV"] = 0.0
8291
cost = res.get("cost[microdollar]", None)
83-
if cost:
92+
if cost and isinstance(cost, list):
8493
res["cost[microdollar]"] = box_filter(res["cost[microdollar]"])[0]
8594
batch_size = res.get("batch_size", None)
8695
if batch_size:

0 commit comments

Comments
 (0)