From 133f1123a8b5014f6bb7ccbfb74c645cefe02149 Mon Sep 17 00:00:00 2001 From: 7908837174 Date: Thu, 26 Feb 2026 21:56:09 +0530 Subject: [PATCH] Fix step_size parameter handling and add tests for issue #86 --- .../optimizer_search/hill_climbing.py | 11 ++++ .../random_restart_hill_climbing.py | 10 ++++ .../optimizer_search/simulated_annealing.py | 9 ++++ .../stochastic_hill_climbing.py | 7 +++ .../random_restart_hill_climbing.py | 2 + .../local_opt/hill_climbing_optimizer.py | 47 +++++++++++++++-- .../local_opt/simulated_annealing.py | 2 + .../local_opt/stochastic_hill_climbing.py | 2 + tests/test_api/test_api_global.py | 5 ++ .../test_hill_climbing_para_init.py | 3 ++ .../test_parameters/test_hill_climbing.py | 51 ++++++++++++++++++- .../test_simulated_annealing.py | 20 ++++++++ .../test_stochastic_hill_climbing.py | 18 +++++++ 13 files changed, 182 insertions(+), 5 deletions(-) diff --git a/src/gradient_free_optimizers/optimizer_search/hill_climbing.py b/src/gradient_free_optimizers/optimizer_search/hill_climbing.py index 1830e80e..31a98748 100644 --- a/src/gradient_free_optimizers/optimizer_search/hill_climbing.py +++ b/src/gradient_free_optimizers/optimizer_search/hill_climbing.py @@ -138,6 +138,15 @@ def circular_constraint(para): Smaller values are better for fine-tuning near a known good solution. Larger values help escape local optima but may overshoot narrow peaks. + + step_size : float or None, default=None + Absolute step size for neighbor generation. When provided, + ``epsilon`` is ignored and the optimizer uses an identical + absolute jump magnitude in all continuous dimensions. This is + particularly useful for fine grids where a small fractional + epsilon would still correspond to a large move in value space. + Decreasing ``step_size`` therefore allows the optimizer to converge + with finer precision. distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal" Probability distribution used to sample neighbor offsets. Each distribution produces different exploration patterns: @@ -228,6 +237,7 @@ def __init__( epsilon: float = 0.03, distribution: Literal["normal", "laplace", "gumbel", "logistic"] = "normal", n_neighbours: int = 3, + step_size: float | None = None, ): if initialize is None: initialize = get_default_initialize() @@ -244,4 +254,5 @@ def __init__( epsilon=epsilon, distribution=distribution, n_neighbours=n_neighbours, + step_size=step_size, ) diff --git a/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py b/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py index 10c0dc5d..415aed42 100644 --- a/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py +++ b/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py @@ -147,6 +147,14 @@ def circular_constraint(para): Smaller values are better for fine-tuning near a known good solution. Larger values help escape local optima but may overshoot narrow peaks. + step_size : float or None, default=None + Absolute step size for generating neighbors. This parameter behaves + identically to the one in :class:`~gradient_free_optimizers.optimizer_search.hill_climbing.HillClimbingOptimizer`. + When ``step_size`` is given, ``epsilon`` is ignored. Providing a + small step size is particularly helpful when using fine-grained + search spaces, because it allows the optimizer to make very small + moves that are comparable to the grid spacing. + distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal" Probability distribution used to sample neighbor offsets. Each distribution produces different exploration patterns: @@ -254,6 +262,7 @@ def __init__( distribution: Literal["normal", "laplace", "gumbel", "logistic"] = "normal", n_neighbours: int = 3, n_iter_restart: int = 10, + step_size: float | None = None, ): if initialize is None: initialize = get_default_initialize() @@ -271,4 +280,5 @@ def __init__( distribution=distribution, n_neighbours=n_neighbours, n_iter_restart=n_iter_restart, + step_size=step_size, ) diff --git a/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py b/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py index 6ab70a08..6e22159d 100644 --- a/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py +++ b/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py @@ -146,6 +146,13 @@ def circular_constraint(para): Smaller values are better for fine-tuning near a known good solution. Larger values help escape local optima but may overshoot narrow peaks. + + step_size : float or None, default=None + Absolute step size for neighbor generation. Overrides ``epsilon`` if + both are provided. Works in the same way as the parameter of the + parent HillClimbingOptimizer: step magnitude is fixed across all + continuous dimensions and affords precise control on finely + discretized grids. distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal" Probability distribution used to sample neighbor offsets. Each distribution produces different exploration patterns: @@ -269,6 +276,7 @@ def __init__( n_neighbours: int = 3, annealing_rate: float = 0.97, start_temp: float = 1, + step_size: float | None = None, ): if initialize is None: initialize = get_default_initialize() @@ -287,4 +295,5 @@ def __init__( n_neighbours=n_neighbours, annealing_rate=annealing_rate, start_temp=start_temp, + step_size=step_size, ) diff --git a/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py b/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py index 2a56e9f4..023adeb0 100644 --- a/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py +++ b/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py @@ -140,6 +140,11 @@ def circular_constraint(para): Smaller values are better for fine-tuning near a known good solution. Larger values help escape local optima but may overshoot narrow peaks. + + step_size : float or None, default=None + Absolute step size for neighbor generation; ignored if ``epsilon`` is + also specified. It is interpreted in the units of the search space + values and allows finer control on finely discretized grids. distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal" Probability distribution used to sample neighbor offsets. Each distribution produces different exploration patterns: @@ -246,6 +251,7 @@ def __init__( distribution: Literal["normal", "laplace", "gumbel", "logistic"] = "normal", n_neighbours: int = 3, p_accept: float = 0.5, + step_size: float | None = None, ): if initialize is None: initialize = get_default_initialize() @@ -263,4 +269,5 @@ def __init__( distribution=distribution, n_neighbours=n_neighbours, p_accept=p_accept, + step_size=step_size, ) diff --git a/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py b/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py index ffdc7ef0..52c42ca7 100644 --- a/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py +++ b/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py @@ -71,6 +71,7 @@ def __init__( distribution="normal", n_neighbours=3, n_iter_restart=10, + step_size=None, ): super().__init__( search_space=search_space, @@ -82,6 +83,7 @@ def __init__( epsilon=epsilon, distribution=distribution, n_neighbours=n_neighbours, + step_size=step_size, ) self.n_iter_restart = n_iter_restart diff --git a/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py b/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py index f2deb861..35affba5 100644 --- a/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py +++ b/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py @@ -75,6 +75,7 @@ def __init__( epsilon=0.03, distribution="normal", n_neighbours=3, + step_size=None, ): super().__init__( search_space=search_space, @@ -84,7 +85,36 @@ def __init__( rand_rest_p=rand_rest_p, nth_process=nth_process, ) - self.epsilon = epsilon + # step_size takes precedence over epsilon if provided + self.step_size = step_size + if step_size is not None: + # compute per-dimension epsilon values so that noise sigma becomes + # approximately equal to the requested absolute step size. We + # handle continuous and discrete dimensions separately since they + # use different scaling formulas. + # Note: __init__ of CoreOptimizer has already filled in the + # dimension masks and bounds. + if self._continuous_mask is not None and self._continuous_mask.any(): + ranges = self._continuous_bounds[:, 1] - self._continuous_bounds[:, 0] + # avoid division by zero for degenerate dims + eps_vals = np.where(ranges > 0, step_size / ranges, 0) + # store epsilon array to be used when sampling continuous noise + self.epsilon_cont = eps_vals + else: + self.epsilon_cont = None + if self._discrete_mask is not None and self._discrete_mask.any(): + max_pos = self._discrete_bounds[:, 1] + eps_vals = np.where(max_pos > 0, step_size / max_pos, 0) + self.epsilon_disc = eps_vals + else: + self.epsilon_disc = None + # keep self.epsilon for compatibility but it will not be used + self.epsilon = epsilon + else: + # normal behaviour using relative epsilon + self.epsilon = epsilon + self.epsilon_cont = None + self.epsilon_disc = None self.distribution = distribution self.n_neighbours = n_neighbours @@ -121,8 +151,14 @@ def _iterate_continuous_batch(self) -> np.ndarray: # Calculate range for each dimension ranges = bounds[:, 1] - bounds[:, 0] - # Scale sigma by range and epsilon - sigmas = ranges * self.epsilon + # Determine sigma for each dimension. If a fixed absolute + # ``step_size`` was provided we precomputed per-dimension epsilon + # values such that ``ranges * eps = step_size``; otherwise fall back + # to the original relative ``epsilon`` behaviour. + if getattr(self, "step_size", None) is not None and self.epsilon_cont is not None: + sigmas = ranges * self.epsilon_cont + else: + sigmas = ranges * self.epsilon # Generate noise using the configured distribution noise_fn = self._DISTRIBUTIONS[self.distribution] @@ -182,7 +218,10 @@ def _iterate_discrete_batch(self) -> np.ndarray: # Use max position to scale sigma (similar to continuous) max_positions = bounds[:, 1] - sigmas = max_positions * self.epsilon + if getattr(self, "step_size", None) is not None and self.epsilon_disc is not None: + sigmas = max_positions * self.epsilon_disc + else: + sigmas = max_positions * self.epsilon # Prevent zero sigma for single-value dimensions sigmas = np.maximum(sigmas, 1e-10) diff --git a/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py b/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py index 4413fd9c..30a0b80b 100644 --- a/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py +++ b/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py @@ -78,6 +78,7 @@ def __init__( n_neighbours=3, annealing_rate=0.97, start_temp=1, + step_size=None, ): super().__init__( search_space=search_space, @@ -89,6 +90,7 @@ def __init__( epsilon=epsilon, distribution=distribution, n_neighbours=n_neighbours, + step_size=step_size, # Note: p_accept is not used in SA, we use pure Metropolis criterion ) self.annealing_rate = annealing_rate diff --git a/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py b/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py index e158d3ea..9ff1ff73 100644 --- a/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py +++ b/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py @@ -74,6 +74,7 @@ def __init__( distribution="normal", n_neighbours=3, p_accept=0.5, + step_size=None, ): super().__init__( search_space=search_space, @@ -85,6 +86,7 @@ def __init__( epsilon=epsilon, distribution=distribution, n_neighbours=n_neighbours, + step_size=step_size, ) self.p_accept = p_accept self.temp = 1 # Constant temperature for stochastic hill climbing diff --git a/tests/test_api/test_api_global.py b/tests/test_api/test_api_global.py index 755dc7a0..4efbde77 100644 --- a/tests/test_api/test_api_global.py +++ b/tests/test_api/test_api_global.py @@ -202,6 +202,11 @@ def test_n_iter_restart_parameter(self): opt = RandomRestartHillClimbingOptimizer(SEARCH_SPACE, n_iter_restart=5) opt.search(objective, n_iter=1, verbosity=False) + def test_step_size_parameter(self): + """Test step_size parameter exists and accepts float (inherited from HC).""" + opt = RandomRestartHillClimbingOptimizer(SEARCH_SPACE, step_size=0.5) + opt.search(objective, n_iter=1, verbosity=False) + def test_all_parameters_explicit(self): """Test all parameters can be set explicitly.""" opt = RandomRestartHillClimbingOptimizer( diff --git a/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py b/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py index e3c9a43c..ad7547e8 100644 --- a/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py +++ b/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py @@ -14,6 +14,9 @@ ({"epsilon": 1}), ({"epsilon": 10}), ({"epsilon": 10000}), + ({"step_size": 0.001}), + ({"step_size": 1}), + ({"step_size": 10}), ({"distribution": "normal"}), ({"distribution": "laplace"}), ({"distribution": "logistic"}), diff --git a/tests/test_main/test_parameters/test_hill_climbing.py b/tests/test_main/test_parameters/test_hill_climbing.py index cc0e561f..e7a399d4 100644 --- a/tests/test_main/test_parameters/test_hill_climbing.py +++ b/tests/test_main/test_parameters/test_hill_climbing.py @@ -25,9 +25,58 @@ def test_epsilon_0(): opt = HillClimbingOptimizer( search_space, initialize={"vertices": 1}, epsilon=epsilon ) - opt.search(parabola_function, n_iter=100) + opt.search(parabola_function, n_iter=100, verbosity=False) search_data = opt.search_data scores = search_data["score"].values assert np.all(scores == -200) + + +def test_step_size_0(): + # step_size of zero should mimic epsilon==0 behaviour + step_size = 0.0 + opt = HillClimbingOptimizer( + search_space, initialize={"vertices": 1}, step_size=step_size + ) + opt.search(parabola_function, n_iter=100, verbosity=False) + search_data = opt.search_data + scores = search_data["score"].values + assert np.all(scores == -200) + + +def test_step_size_convergence(): + # replicate issue: decreasing step_size should produce equal or better + # final score on a fine grid + space = { + "x": np.arange(-10, 10, 0.01), + "y": np.arange(-10, 10, 0.01), + } + + def sphere(para): + x, y = para["x"], para["y"] + return -(x ** 2 + y ** 2) + + scores = [] + for step in [1, 0.5, 0.1, 0.01]: + opt = HillClimbingOptimizer(space, step_size=step, random_state=42) + opt.search(sphere, n_iter=500, verbosity=False) + scores.append(opt.best_score) + + # smaller step size should achieve a score at least as good as largest + assert scores[-1] >= max(scores[:-1]) + + +def test_step_size_epsilon_conversion(): + # discrete dims should compute epsilon_disc such that sigma == step_size + space_disc = {"x": np.arange(0, 10, 1), "y": np.arange(0, 20, 1)} + step = 2 + opt = HillClimbingOptimizer(space_disc, step_size=step) + expected = np.array([step / (len(space_disc["x"]) - 1), step / (len(space_disc["y"]) - 1)]) + assert np.allclose(opt.epsilon_disc, expected) + + # continuous dims should compute epsilon_cont similarly + space_cont = {"x": (0, 10), "y": (0, 20)} + opt2 = HillClimbingOptimizer(space_cont, step_size=step) + expected2 = np.array([step / 10, step / 20]) + assert np.allclose(opt2.epsilon_cont, expected2) diff --git a/tests/test_main/test_parameters/test_simulated_annealing.py b/tests/test_main/test_parameters/test_simulated_annealing.py index bdfff9a6..5867157c 100644 --- a/tests/test_main/test_parameters/test_simulated_annealing.py +++ b/tests/test_main/test_parameters/test_simulated_annealing.py @@ -182,3 +182,23 @@ def test_annealing_rate_0(): assert n_transitions_0 in [0, 1] assert n_transitions_1 < n_transitions_100 + + +def test_step_size_parameter(): + # ensure wrapper accepts step_size and converts epsilon + space = {"x": (0, 10), "y": (0, 20)} + opt = SimulatedAnnealingOptimizer(space, step_size=2) + assert hasattr(opt, "step_size") and opt.step_size == 2 + # epsilon_cont should be computed accordingly + assert np.allclose(opt.epsilon_cont, np.array([0.2, 0.1])) + + +def test_step_size_conversion(): + # verify epsilon_cont/disc for step_size + space_disc = {"x": np.arange(0, 5, 1)} + opt = SimulatedAnnealingOptimizer(space_disc, step_size=2) + expected = np.array([2 / (len(space_disc["x"]) - 1)]) + assert np.allclose(opt.epsilon_disc, expected) + space_cont = {"x": (0, 5)} + opt2 = SimulatedAnnealingOptimizer(space_cont, step_size=2) + assert np.allclose(opt2.epsilon_cont, np.array([0.4])) diff --git a/tests/test_main/test_parameters/test_stochastic_hill_climbing.py b/tests/test_main/test_parameters/test_stochastic_hill_climbing.py index be67f611..249a087d 100644 --- a/tests/test_main/test_parameters/test_stochastic_hill_climbing.py +++ b/tests/test_main/test_parameters/test_stochastic_hill_climbing.py @@ -52,3 +52,21 @@ def test_p_accept(): higher_bound = n_iter assert lower_bound < n_transitions_low < n_transitions_high < higher_bound + + +def test_step_size_parameter(): + # ensure the new argument is accepted and stored + opt = StochasticHillClimbingOptimizer(search_space, step_size=0.5) + assert hasattr(opt, "step_size") + assert opt.step_size == 0.5 + + +def test_step_size_conversion(): + # check internal epsilon values for discrete and continuous dims + space_disc = {"x1": np.arange(0, 5, 1)} + opt = StochasticHillClimbingOptimizer(space_disc, step_size=2) + expected = np.array([2 / (len(space_disc["x1"]) - 1)]) + assert np.allclose(opt.epsilon_disc, expected) + space_cont = {"x1": (0, 10)} + opt2 = StochasticHillClimbingOptimizer(space_cont, step_size=2) + assert np.allclose(opt2.epsilon_cont, np.array([0.2]))