Skip to content

Commit 18f8a0f

Browse files
authored
[ENH] sign handling in experiments and optimization (#142)
Fixes #141 by the following approach: * adds a tag `property:higher_or_lower_is_better` to experiments to signifiy whether this is minmization or maximization * ensures the `score` method is always "higher is better" * moves the extension locus to a new method pair `evaluate` / `_evaluate`, which has the same orientation as the new tag `property:higher_or_lower_is_better`. The `_score` method no longer exists. The `SklearnCvExperiment` also gets internal functionality to detect the sign from the metric passed. Since metrics in `sklearn` are not tagged properly, there is some clunky detection logic to infer this non-existent tag. Further changes: * current experiments are adapted with correct tags `property:higher_or_lower_is_better` * current `_score` methods are changed to `_evaluate` * clarifications in extension templates * clarifications in docstrings Decision to move to maximization, see discussion in #141.
1 parent 4a40b14 commit 18f8a0f

9 files changed

Lines changed: 262 additions & 43 deletions

File tree

extension_templates/experiments.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@ class MyExperiment(BaseExperiment):
7575
#
7676
"property:randomness": "random",
7777
# valid values: "random", "deterministic"
78-
# if "deterministic", two calls of score must result in the same value
78+
# if "deterministic", two calls of "evaluate" must result in the same value
79+
#
80+
"property:higher_or_lower_is_better": "lower",
81+
# valid values: "higher", "lower", "mixed"
82+
# whether higher or lower returns of "evaluate" are better
7983
#
8084
# --------------
8185
# packaging info
@@ -147,25 +151,25 @@ def _paramnames(self):
147151
return ["score_param1", "score_param2"]
148152

149153
# todo: implement this, mandatory
150-
def _score(self, params):
151-
"""Score the parameters.
154+
def _evaluate(self, params):
155+
"""Evaluate the parameters.
152156
153157
Parameters
154158
----------
155159
params : dict with string keys
156-
Parameters to score.
160+
Parameters to evaluate.
157161
158162
Returns
159163
-------
160164
float
161-
The score of the parameters.
165+
The value of the parameters as per evaluation.
162166
dict
163167
Additional metadata about the search.
164168
"""
165169
# params is a dictionary with keys being paramnames or subset thereof
166170
# IMPORTANT: avoid side effects to params!
167171
#
168-
# the method may work if only a subste of the parameters in paramnames is passed
172+
# the method may work if only a subset of the parameters in paramnames is passed
169173
# but this is not necessary
170174
value = 42 # must be numpy.float64
171175
metadata = {"some": "metadata"} # can be any dict
@@ -230,18 +234,19 @@ def get_test_params(cls, parameter_set="default"):
230234

231235
@classmethod
232236
def _get_score_params(self):
233-
"""Return settings for testing the score function. Used in tests only.
237+
"""Return settings for testing score/evaluate functions. Used in tests only.
234238
235-
Returns a list, the i-th element corresponds to self.get_test_params()[i].
236-
It should be a valid call for self.score.
239+
Returns a list, the i-th element should be valid arguments for
240+
self.evaluate and self.score, of an instance constructed with
241+
self.get_test_params()[i].
237242
238243
Returns
239244
-------
240245
list of dict
241246
The parameters to be used for scoring.
242247
"""
243248
# dict keys should be same as paramnames return
244-
# or subset, only if _score allows for subsets of parameters
249+
# or subset, only if _evaluate allows for subsets of parameters
245250
score_params1 = {"score_param1": "foo", "score_param2": "bar"}
246251
score_params2 = {"score_param1": "baz", "score_param2": "qux"}
247252
return [score_params1, score_params2]

extension_templates/optimizers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ def _paramnames(self):
142142
return ["score_param1", "score_param2"]
143143

144144
# optional: implement this to prepare arguments for _run
145-
# the default is all parameters passed to __init__, except ex
145+
# the default is all parameters passed to __init__, minus the experiment
146+
# the result of this is passed to _run as search_config
146147
def get_search_config(self):
147148
"""Get the search configuration.
148149
@@ -153,12 +154,15 @@ def get_search_config(self):
153154
"""
154155
# the default
155156
search_config = super().get_search_config()
157+
# example of adding a new parameter to the search config
158+
# this is optional, but can be useful for clean separation or API interfacing
156159
search_config["one_more_param"] = 42
160+
# this return is available in _run as search_config
157161
return search_config
158162

159163
# todo: implement this, mandatory
160164
def _run(self, experiment, **search_config):
161-
"""Run the optimization search process.
165+
"""Run the optimization search process to maximize the experiment's score.
162166
163167
Parameters
164168
----------
@@ -173,6 +177,8 @@ def _run(self, experiment, **search_config):
173177
The best parameters found during the search.
174178
Must have keys a subset or identical to experiment.paramnames().
175179
"""
180+
# important: the search logic should *maximize* the experiment's score
181+
# this is the main method to implement, it should return the best parameters
176182
best_params = {"write_some_logic_to_get": "best_params"}
177183
return best_params
178184

src/hyperactive/base/_experiment.py

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@ class BaseExperiment(BaseObject):
1414
"property:randomness": "random", # random or deterministic
1515
# if deterministic, two calls of score will result in the same value
1616
# random = two calls may result in different values; same as "stochastic"
17+
"property:higher_or_lower_is_better": "higher", # "higher", "lower", "mixed"
18+
# whether higher or lower scores are better
1719
}
1820

1921
def __init__(self):
2022
super().__init__()
2123

2224
def __call__(self, **kwargs):
23-
"""Score parameters, with kwargs call."""
25+
"""Score parameters, with kwargs call. Same as score call."""
2426
score, _ = self.score(kwargs)
2527
return score
2628

@@ -48,30 +50,55 @@ def _paramnames(self):
4850
"""
4951
raise NotImplementedError
5052

51-
def score(self, params):
52-
"""Score the parameters.
53+
def evaluate(self, params):
54+
"""Evaluate the parameters.
5355
5456
Parameters
5557
----------
5658
params : dict with string keys
57-
Parameters to score.
59+
Parameters to evaluate.
5860
5961
Returns
6062
-------
6163
float
62-
The score of the parameters.
64+
The value of the parameters as per evaluation.
6365
dict
6466
Additional metadata about the search.
6567
"""
6668
paramnames = self.paramnames()
6769
if not set(params.keys()) <= set(paramnames):
6870
raise ValueError("Parameters do not match.")
69-
res, metadata = self._score(params)
71+
res, metadata = self._evaluate(params)
7072
res = np.float64(res)
7173
return res, metadata
7274

73-
def _score(self, params):
74-
"""Score the parameters.
75+
def _evaluate(self, params):
76+
"""Evaluate the parameters.
77+
78+
Parameters
79+
----------
80+
params : dict with string keys
81+
Parameters to evaluate.
82+
83+
Returns
84+
-------
85+
float
86+
The value of the parameters as per evaluation.
87+
dict
88+
Additional metadata about the search.
89+
"""
90+
raise NotImplementedError
91+
92+
def score(self, params):
93+
"""Score the parameters - with sign such that higher is always better.
94+
95+
Same as ``evaluate`` call except for the sign chosen so that higher is better.
96+
97+
If the tag ``property:higher_or_lower_is_better`` is set to
98+
``"lower"``, the result is ``-self.evaluate(params)``.
99+
100+
If the tag is set to ``"higher"``, the result is
101+
identical to ``self.evaluate(params)``.
75102
76103
Parameters
77104
----------
@@ -85,4 +112,14 @@ def _score(self, params):
85112
dict
86113
Additional metadata about the search.
87114
"""
88-
raise NotImplementedError
115+
hib = self.get_tag("property:higher_or_lower_is_better", "lower")
116+
if hib == "higher":
117+
sign = 1
118+
elif hib == "lower":
119+
sign = -1
120+
121+
eval_res = self.evaluate(params)
122+
value = eval_res[0]
123+
metadata = eval_res[1]
124+
125+
return sign * value, metadata

src/hyperactive/base/_optimizer.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,23 @@ def get_experiment(self):
5252
return self._experiment
5353

5454
def run(self):
55-
"""Run the optimization search process.
55+
"""Run the optimization search process to maximize the experiment's score.
56+
57+
The optimization searches for a maximizer of the experiment's
58+
``score`` method.
59+
60+
Depending on the tag ``property:higher_or_lower_is_better`` being
61+
set to ``higher`` or ``lower``, the ``run`` method will search for:
62+
63+
* the minimizer of the ``evaluate`` method if the tag is ``lower``
64+
* the maximizer of the ``evaluate`` method if the tag is ``higher``
5665
5766
Returns
5867
-------
5968
best_params : dict
6069
The best parameters found during the optimization process.
70+
The dict ``best_params`` can be used in ``experiment.score`` or
71+
``experiment.evaluate`` directly.
6172
"""
6273
experiment = self.get_experiment()
6374
search_config = self.get_search_config()

src/hyperactive/experiment/integrations/sklearn_cv.py

Lines changed: 92 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,13 @@ def __init__(self, estimator, X, y, scoring=None, cv=None):
110110
self._scoring = make_scorer(scoring)
111111
self.scorer_ = self._scoring
112112

113+
# Set the sign of the scoring function
114+
if hasattr(self._scoring, "_score"):
115+
score_func = self._scoring._score_func
116+
_sign = _guess_sign_of_sklmetric(score_func)
117+
_sign_str = "higher" if _sign == 1 else "lower"
118+
self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
119+
113120
def _paramnames(self):
114121
"""Return the parameter names of the search.
115122
@@ -120,18 +127,18 @@ def _paramnames(self):
120127
"""
121128
return list(self.estimator.get_params().keys())
122129

123-
def _score(self, params):
124-
"""Score the parameters.
130+
def _evaluate(self, params):
131+
"""Evaluate the parameters.
125132
126133
Parameters
127134
----------
128135
params : dict with string keys
129-
Parameters to score.
136+
Parameters to evaluate.
130137
131138
Returns
132139
-------
133140
float
134-
The score of the parameters.
141+
The value of the parameters as per evaluation.
135142
dict
136143
Additional metadata about the search.
137144
"""
@@ -221,10 +228,11 @@ def get_test_params(cls, parameter_set="default"):
221228

222229
@classmethod
223230
def _get_score_params(self):
224-
"""Return settings for testing the score function. Used in tests only.
231+
"""Return settings for testing score/evaluate functions. Used in tests only.
225232
226-
Returns a list, the i-th element corresponds to self.get_test_params()[i].
227-
It should be a valid call for self.score.
233+
Returns a list, the i-th element should be valid arguments for
234+
self.evaluate and self.score, of an instance constructed with
235+
self.get_test_params()[i].
228236
229237
Returns
230238
-------
@@ -235,3 +243,80 @@ def _get_score_params(self):
235243
score_params_regress = {"C": 1.0, "kernel": "linear"}
236244
score_params_defaults = {"C": 1.0, "kernel": "linear"}
237245
return [score_params_classif, score_params_regress, score_params_defaults]
246+
247+
248+
def _guess_sign_of_sklmetric(scorer):
249+
"""Guess the sign of a sklearn metric scorer.
250+
251+
Parameters
252+
----------
253+
scorer : callable
254+
The sklearn metric scorer to guess the sign for.
255+
256+
Returns
257+
-------
258+
int
259+
1 if higher scores are better, -1 if lower scores are better.
260+
"""
261+
HIGHER_IS_BETTER = {
262+
# Classification
263+
"accuracy_score": True,
264+
"auc": True,
265+
"average_precision_score": True,
266+
"balanced_accuracy_score": True,
267+
"brier_score_loss": False,
268+
"class_likelihood_ratios": False,
269+
"cohen_kappa_score": True,
270+
"d2_log_loss_score": True,
271+
"dcg_score": True,
272+
"f1_score": True,
273+
"fbeta_score": True,
274+
"hamming_loss": False,
275+
"hinge_loss": False,
276+
"jaccard_score": True,
277+
"log_loss": False,
278+
"matthews_corrcoef": True,
279+
"ndcg_score": True,
280+
"precision_score": True,
281+
"recall_score": True,
282+
"roc_auc_score": True,
283+
"top_k_accuracy_score": True,
284+
"zero_one_loss": False,
285+
286+
# Regression
287+
"d2_absolute_error_score": True,
288+
"d2_pinball_score": True,
289+
"d2_tweedie_score": True,
290+
"explained_variance_score": True,
291+
"max_error": False,
292+
"mean_absolute_error": False,
293+
"mean_absolute_percentage_error": False,
294+
"mean_gamma_deviance": False,
295+
"mean_pinball_loss": False,
296+
"mean_poisson_deviance": False,
297+
"mean_squared_error": False,
298+
"mean_squared_log_error": False,
299+
"mean_tweedie_deviance": False,
300+
"median_absolute_error": False,
301+
"r2_score": True,
302+
"root_mean_squared_error": False,
303+
"root_mean_squared_log_error": False,
304+
}
305+
306+
scorer_name = getattr(scorer, "__name__", None)
307+
308+
if hasattr(scorer, "greater_is_better"):
309+
return 1 if scorer.greater_is_better else -1
310+
elif scorer_name in HIGHER_IS_BETTER:
311+
return 1 if HIGHER_IS_BETTER[scorer_name] else -1
312+
elif scorer_name.endswith("_score"):
313+
# If the scorer name ends with "_score", we assume higher is better
314+
return 1
315+
elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
316+
# If the scorer name ends with "_loss", we assume lower is better
317+
return -1
318+
elif scorer_name.endswith("_error"):
319+
return -1
320+
else:
321+
# If we cannot determine the sign, we assume lower is better
322+
return -1

0 commit comments

Comments
 (0)