Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions configs/regular/svm.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@
}
],
"svr datasets": [
{
"data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
"algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
},
{
"data": { "dataset": "fried", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } },
"algorithm": { "estimator_params": { "C": 2.0, "kernel": "rbf" } }
Expand Down Expand Up @@ -84,10 +80,6 @@
}
],
"nusvr datasets": [
{
"data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
"algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
},
{
"data": { "dataset": "twodplanes", "split_kwargs": { "train_size": 25000, "test_size": null } },
"algorithm": { "estimator_params": { "C": 1.0, "kernel": ["linear", "poly", "rbf"] } }
Expand Down
4 changes: 3 additions & 1 deletion sklbench/datasets/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def fetch_and_correct_openml(

# Get the data with target column specified
x, y, _, _ = dataset.get_data(
dataset_format="dataframe" if as_frame is True else "array",
dataset_format="dataframe",
target=dataset.default_target_attribute,
)

Expand All @@ -109,6 +109,8 @@ def fetch_and_correct_openml(
if isinstance(x, pd.DataFrame):
if any(pd.api.types.is_sparse(x[col]) for col in x.columns):
x = x.sparse.to_dense()
if as_frame is not True:
x = x.to_numpy()

# Convert y to numpy array if needed
if isinstance(y, pd.Series):
Expand Down
2 changes: 1 addition & 1 deletion sklbench/datasets/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def load_codrnanorm(
data_name: str, data_cache: str, raw_data_cache: str, dataset_params: Dict
) -> Tuple[Dict, Dict]:
def transform_x_y(x, y):
x = pd.DataFrame(x.todense())
x = pd.DataFrame(x.todense() if hasattr(x, "todense") else x)
y = y.astype("int")
y[y == -1] = 0
return x, y
Expand Down
31 changes: 20 additions & 11 deletions sklbench/utils/measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,31 @@ def enrich_metrics(
"""Transforms raw performance and other results into aggregated metrics"""
# time metrics
res = bench_result.copy()
mean, std = box_filter(res["time[ms]"])
if include_performance_stability_metrics:
if isinstance(res["time[ms]"], list):
mean, std = box_filter(res["time[ms]"])
if include_performance_stability_metrics:
res.update(
{
"1st run time[ms]": res["time[ms]"][0],
"1st-mean run ratio": res["time[ms]"][0] / mean,
Copy link
Copy Markdown

@Vika-F Vika-F Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to have mean == 0 here?

Suggested change
"1st-mean run ratio": res["time[ms]"][0] / mean,
"1st-mean run ratio": res["time[ms]"][0] / mean if mean != 0 else res["time[ms]"][0],

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the mean runtime is 0 then we have larger problems than a divide by zero. But I did not update the logic here (see main branch), the diff only shows because I added an additional condition (if isinstance(res["time[ms]"], list):). There is no diff within this condition.

}
)
res.update(
{
"1st run time[ms]": res["time[ms]"][0],
"1st-mean run ratio": res["time[ms]"][0] / mean,
"time[ms]": mean,
"time CV": std / mean, # Coefficient of Variation
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's very confusing to have a measurement called "time CV" reflecting the coefficient of variation when we also have procedures doing cross validation there. Also note that this is not actually the coefficient of variation due to the "boxed" methodology.

Perhaps could output just the standard deviation and name it 'std[ms]'. Or otherwise maybe could name it "std to mean (ratio)".

Copy link
Copy Markdown
Contributor Author

@ethanglaser ethanglaser Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't disagree with you, in fact for the large-scale branch that we have for multi-gpu we use std instead of CV. But I suggest revising this in a different PR as I did not change the logic here - the only reason the diff shows is because there was a condition added. Scope of this PR is resolving slowdowns/errors/failures and improving stability of CI jobs.

}
)
res.update(
{
"time[ms]": mean,
"time CV": std / mean, # Coefficient of Variation
}
)
else:
# already aggregated (e.g. from a baseline file)
mean = res["time[ms]"]
std = res.get("time std[ms]", 0.0)
if mean != 0:
res["time CV"] = std / mean
else:
res["time CV"] = 0.0
cost = res.get("cost[microdollar]", None)
if cost:
if cost and isinstance(cost, list):
res["cost[microdollar]"] = box_filter(res["cost[microdollar]"])[0]
batch_size = res.get("batch_size", None)
if batch_size:
Expand Down
Loading