From 133f1123a8b5014f6bb7ccbfb74c645cefe02149 Mon Sep 17 00:00:00 2001
From: 7908837174 <ritamukherje62@gmail.com>
Date: Thu, 26 Feb 2026 21:56:09 +0530
Subject: [PATCH] Fix step_size parameter handling and add tests for issue #86

---
 .../optimizer_search/hill_climbing.py         | 11 ++++
 .../random_restart_hill_climbing.py           | 10 ++++
 .../optimizer_search/simulated_annealing.py   |  9 ++++
 .../stochastic_hill_climbing.py               |  7 +++
 .../random_restart_hill_climbing.py           |  2 +
 .../local_opt/hill_climbing_optimizer.py      | 47 +++++++++++++++--
 .../local_opt/simulated_annealing.py          |  2 +
 .../local_opt/stochastic_hill_climbing.py     |  2 +
 tests/test_api/test_api_global.py             |  5 ++
 .../test_hill_climbing_para_init.py           |  3 ++
 .../test_parameters/test_hill_climbing.py     | 51 ++++++++++++++++++-
 .../test_simulated_annealing.py               | 20 ++++++++
 .../test_stochastic_hill_climbing.py          | 18 +++++++
 13 files changed, 182 insertions(+), 5 deletions(-)

diff --git a/src/gradient_free_optimizers/optimizer_search/hill_climbing.py b/src/gradient_free_optimizers/optimizer_search/hill_climbing.py
index 1830e80e..31a98748 100644
--- a/src/gradient_free_optimizers/optimizer_search/hill_climbing.py
+++ b/src/gradient_free_optimizers/optimizer_search/hill_climbing.py
@@ -138,6 +138,15 @@ def circular_constraint(para):
 
         Smaller values are better for fine-tuning near a known good solution.
         Larger values help escape local optima but may overshoot narrow peaks.
+
+    step_size : float or None, default=None
+        Absolute step size for neighbor generation. When provided,
+        ``epsilon`` is ignored and the optimizer uses an identical
+        absolute jump magnitude in all continuous dimensions. This is
+        particularly useful for fine grids where a small fractional
+        epsilon would still correspond to a large move in value space.
+        Decreasing ``step_size`` therefore allows the optimizer to converge
+        with finer precision.
     distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal"
         Probability distribution used to sample neighbor offsets. Each
         distribution produces different exploration patterns:
@@ -228,6 +237,7 @@ def __init__(
         epsilon: float = 0.03,
         distribution: Literal["normal", "laplace", "gumbel", "logistic"] = "normal",
         n_neighbours: int = 3,
+        step_size: float | None = None,
     ):
         if initialize is None:
             initialize = get_default_initialize()
@@ -244,4 +254,5 @@ def __init__(
             epsilon=epsilon,
             distribution=distribution,
             n_neighbours=n_neighbours,
+            step_size=step_size,
         )
diff --git a/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py b/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py
index 10c0dc5d..415aed42 100644
--- a/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py
+++ b/src/gradient_free_optimizers/optimizer_search/random_restart_hill_climbing.py
@@ -147,6 +147,14 @@ def circular_constraint(para):
         Smaller values are better for fine-tuning near a known good solution.
         Larger values help escape local optima but may overshoot narrow peaks.
 
+    step_size : float or None, default=None
+        Absolute step size for generating neighbors. This parameter behaves
+        identically to the one in :class:`~gradient_free_optimizers.optimizer_search.hill_climbing.HillClimbingOptimizer`.
+        When ``step_size`` is given, ``epsilon`` is ignored. Providing a
+        small step size is particularly helpful when using fine-grained
+        search spaces, because it allows the optimizer to make very small
+        moves that are comparable to the grid spacing.
+
     distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal"
         Probability distribution used to sample neighbor offsets. Each
         distribution produces different exploration patterns:
@@ -254,6 +262,7 @@ def __init__(
         distribution: Literal["normal", "laplace", "gumbel", "logistic"] = "normal",
         n_neighbours: int = 3,
         n_iter_restart: int = 10,
+        step_size: float | None = None,
     ):
         if initialize is None:
             initialize = get_default_initialize()
@@ -271,4 +280,5 @@ def __init__(
             distribution=distribution,
             n_neighbours=n_neighbours,
             n_iter_restart=n_iter_restart,
+            step_size=step_size,
         )
diff --git a/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py b/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py
index 6ab70a08..6e22159d 100644
--- a/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py
+++ b/src/gradient_free_optimizers/optimizer_search/simulated_annealing.py
@@ -146,6 +146,13 @@ def circular_constraint(para):
 
         Smaller values are better for fine-tuning near a known good solution.
         Larger values help escape local optima but may overshoot narrow peaks.
+
+    step_size : float or None, default=None
+        Absolute step size for neighbor generation.  Overrides ``epsilon`` if
+        both are provided. Works in the same way as the parameter of the
+        parent HillClimbingOptimizer: step magnitude is fixed across all
+        continuous dimensions and affords precise control on finely
+        discretized grids.
     distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal"
         Probability distribution used to sample neighbor offsets. Each
         distribution produces different exploration patterns:
@@ -269,6 +276,7 @@ def __init__(
         n_neighbours: int = 3,
         annealing_rate: float = 0.97,
         start_temp: float = 1,
+        step_size: float | None = None,
     ):
         if initialize is None:
             initialize = get_default_initialize()
@@ -287,4 +295,5 @@ def __init__(
             n_neighbours=n_neighbours,
             annealing_rate=annealing_rate,
             start_temp=start_temp,
+            step_size=step_size,
         )
diff --git a/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py b/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py
index 2a56e9f4..023adeb0 100644
--- a/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py
+++ b/src/gradient_free_optimizers/optimizer_search/stochastic_hill_climbing.py
@@ -140,6 +140,11 @@ def circular_constraint(para):
 
         Smaller values are better for fine-tuning near a known good solution.
         Larger values help escape local optima but may overshoot narrow peaks.
+
+    step_size : float or None, default=None
+        Absolute step size for neighbor generation; ignored if ``epsilon`` is
+        also specified. It is interpreted in the units of the search space
+        values and allows finer control on finely discretized grids.
     distribution : {"normal", "laplace", "gumbel", "logistic"}, default="normal"
         Probability distribution used to sample neighbor offsets. Each
         distribution produces different exploration patterns:
@@ -246,6 +251,7 @@ def __init__(
         distribution: Literal["normal", "laplace", "gumbel", "logistic"] = "normal",
         n_neighbours: int = 3,
         p_accept: float = 0.5,
+        step_size: float | None = None,
     ):
         if initialize is None:
             initialize = get_default_initialize()
@@ -263,4 +269,5 @@ def __init__(
             distribution=distribution,
             n_neighbours=n_neighbours,
             p_accept=p_accept,
+            step_size=step_size,
         )
diff --git a/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py b/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py
index ffdc7ef0..52c42ca7 100644
--- a/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py
+++ b/src/gradient_free_optimizers/optimizers/global_opt/random_restart_hill_climbing.py
@@ -71,6 +71,7 @@ def __init__(
         distribution="normal",
         n_neighbours=3,
         n_iter_restart=10,
+        step_size=None,
     ):
         super().__init__(
             search_space=search_space,
@@ -82,6 +83,7 @@ def __init__(
             epsilon=epsilon,
             distribution=distribution,
             n_neighbours=n_neighbours,
+            step_size=step_size,
         )
         self.n_iter_restart = n_iter_restart
 
diff --git a/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py b/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py
index f2deb861..35affba5 100644
--- a/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py
+++ b/src/gradient_free_optimizers/optimizers/local_opt/hill_climbing_optimizer.py
@@ -75,6 +75,7 @@ def __init__(
         epsilon=0.03,
         distribution="normal",
         n_neighbours=3,
+        step_size=None,
     ):
         super().__init__(
             search_space=search_space,
@@ -84,7 +85,36 @@ def __init__(
             rand_rest_p=rand_rest_p,
             nth_process=nth_process,
         )
-        self.epsilon = epsilon
+        # step_size takes precedence over epsilon if provided
+        self.step_size = step_size
+        if step_size is not None:
+            # compute per-dimension epsilon values so that noise sigma becomes
+            # approximately equal to the requested absolute step size.  We
+            # handle continuous and discrete dimensions separately since they
+            # use different scaling formulas.
+            # Note: __init__ of CoreOptimizer has already filled in the
+            # dimension masks and bounds.
+            if self._continuous_mask is not None and self._continuous_mask.any():
+                ranges = self._continuous_bounds[:, 1] - self._continuous_bounds[:, 0]
+                # avoid division by zero for degenerate dims
+                eps_vals = np.where(ranges > 0, step_size / ranges, 0)
+                # store epsilon array to be used when sampling continuous noise
+                self.epsilon_cont = eps_vals
+            else:
+                self.epsilon_cont = None
+            if self._discrete_mask is not None and self._discrete_mask.any():
+                max_pos = self._discrete_bounds[:, 1]
+                eps_vals = np.where(max_pos > 0, step_size / max_pos, 0)
+                self.epsilon_disc = eps_vals
+            else:
+                self.epsilon_disc = None
+            # keep self.epsilon for compatibility but it will not be used
+            self.epsilon = epsilon
+        else:
+            # normal behaviour using relative epsilon
+            self.epsilon = epsilon
+            self.epsilon_cont = None
+            self.epsilon_disc = None
         self.distribution = distribution
         self.n_neighbours = n_neighbours
 
@@ -121,8 +151,14 @@ def _iterate_continuous_batch(self) -> np.ndarray:
         # Calculate range for each dimension
         ranges = bounds[:, 1] - bounds[:, 0]
 
-        # Scale sigma by range and epsilon
-        sigmas = ranges * self.epsilon
+        # Determine sigma for each dimension.  If a fixed absolute
+        # ``step_size`` was provided we precomputed per-dimension epsilon
+        # values such that ``ranges * eps = step_size``; otherwise fall back
+        # to the original relative ``epsilon`` behaviour.
+        if getattr(self, "step_size", None) is not None and self.epsilon_cont is not None:
+            sigmas = ranges * self.epsilon_cont
+        else:
+            sigmas = ranges * self.epsilon
 
         # Generate noise using the configured distribution
         noise_fn = self._DISTRIBUTIONS[self.distribution]
@@ -182,7 +218,10 @@ def _iterate_discrete_batch(self) -> np.ndarray:
 
         # Use max position to scale sigma (similar to continuous)
         max_positions = bounds[:, 1]
-        sigmas = max_positions * self.epsilon
+        if getattr(self, "step_size", None) is not None and self.epsilon_disc is not None:
+            sigmas = max_positions * self.epsilon_disc
+        else:
+            sigmas = max_positions * self.epsilon
 
         # Prevent zero sigma for single-value dimensions
         sigmas = np.maximum(sigmas, 1e-10)
diff --git a/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py b/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py
index 4413fd9c..30a0b80b 100644
--- a/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py
+++ b/src/gradient_free_optimizers/optimizers/local_opt/simulated_annealing.py
@@ -78,6 +78,7 @@ def __init__(
         n_neighbours=3,
         annealing_rate=0.97,
         start_temp=1,
+        step_size=None,
     ):
         super().__init__(
             search_space=search_space,
@@ -89,6 +90,7 @@ def __init__(
             epsilon=epsilon,
             distribution=distribution,
             n_neighbours=n_neighbours,
+            step_size=step_size,
             # Note: p_accept is not used in SA, we use pure Metropolis criterion
         )
         self.annealing_rate = annealing_rate
diff --git a/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py b/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py
index e158d3ea..9ff1ff73 100644
--- a/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py
+++ b/src/gradient_free_optimizers/optimizers/local_opt/stochastic_hill_climbing.py
@@ -74,6 +74,7 @@ def __init__(
         distribution="normal",
         n_neighbours=3,
         p_accept=0.5,
+        step_size=None,
     ):
         super().__init__(
             search_space=search_space,
@@ -85,6 +86,7 @@ def __init__(
             epsilon=epsilon,
             distribution=distribution,
             n_neighbours=n_neighbours,
+            step_size=step_size,
         )
         self.p_accept = p_accept
         self.temp = 1  # Constant temperature for stochastic hill climbing
diff --git a/tests/test_api/test_api_global.py b/tests/test_api/test_api_global.py
index 755dc7a0..4efbde77 100644
--- a/tests/test_api/test_api_global.py
+++ b/tests/test_api/test_api_global.py
@@ -202,6 +202,11 @@ def test_n_iter_restart_parameter(self):
         opt = RandomRestartHillClimbingOptimizer(SEARCH_SPACE, n_iter_restart=5)
         opt.search(objective, n_iter=1, verbosity=False)
 
+    def test_step_size_parameter(self):
+        """Test step_size parameter exists and accepts float (inherited from HC)."""
+        opt = RandomRestartHillClimbingOptimizer(SEARCH_SPACE, step_size=0.5)
+        opt.search(objective, n_iter=1, verbosity=False)
+
     def test_all_parameters_explicit(self):
         """Test all parameters can be set explicitly."""
         opt = RandomRestartHillClimbingOptimizer(
diff --git a/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py b/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py
index e3c9a43c..ad7547e8 100644
--- a/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py
+++ b/tests/test_main/test_optimizers/test_parameter/test_hill_climbing_para_init.py
@@ -14,6 +14,9 @@
     ({"epsilon": 1}),
     ({"epsilon": 10}),
     ({"epsilon": 10000}),
+    ({"step_size": 0.001}),
+    ({"step_size": 1}),
+    ({"step_size": 10}),
     ({"distribution": "normal"}),
     ({"distribution": "laplace"}),
     ({"distribution": "logistic"}),
diff --git a/tests/test_main/test_parameters/test_hill_climbing.py b/tests/test_main/test_parameters/test_hill_climbing.py
index cc0e561f..e7a399d4 100644
--- a/tests/test_main/test_parameters/test_hill_climbing.py
+++ b/tests/test_main/test_parameters/test_hill_climbing.py
@@ -25,9 +25,58 @@ def test_epsilon_0():
     opt = HillClimbingOptimizer(
         search_space, initialize={"vertices": 1}, epsilon=epsilon
     )
-    opt.search(parabola_function, n_iter=100)
+    opt.search(parabola_function, n_iter=100, verbosity=False)
 
     search_data = opt.search_data
     scores = search_data["score"].values
 
     assert np.all(scores == -200)
+
+
+def test_step_size_0():
+    # step_size of zero should mimic epsilon==0 behaviour
+    step_size = 0.0
+    opt = HillClimbingOptimizer(
+        search_space, initialize={"vertices": 1}, step_size=step_size
+    )
+    opt.search(parabola_function, n_iter=100, verbosity=False)
+    search_data = opt.search_data
+    scores = search_data["score"].values
+    assert np.all(scores == -200)
+
+
+def test_step_size_convergence():
+    # replicate issue: decreasing step_size should produce equal or better
+    # final score on a fine grid
+    space = {
+        "x": np.arange(-10, 10, 0.01),
+        "y": np.arange(-10, 10, 0.01),
+    }
+
+    def sphere(para):
+        x, y = para["x"], para["y"]
+        return -(x ** 2 + y ** 2)
+
+    scores = []
+    for step in [1, 0.5, 0.1, 0.01]:
+        opt = HillClimbingOptimizer(space, step_size=step, random_state=42)
+        opt.search(sphere, n_iter=500, verbosity=False)
+        scores.append(opt.best_score)
+
+    # smaller step size should achieve a score at least as good as largest
+    assert scores[-1] >= max(scores[:-1])
+
+
+def test_step_size_epsilon_conversion():
+    # discrete dims should compute epsilon_disc such that sigma == step_size
+    space_disc = {"x": np.arange(0, 10, 1), "y": np.arange(0, 20, 1)}
+    step = 2
+    opt = HillClimbingOptimizer(space_disc, step_size=step)
+    expected = np.array([step / (len(space_disc["x"]) - 1), step / (len(space_disc["y"]) - 1)])
+    assert np.allclose(opt.epsilon_disc, expected)
+
+    # continuous dims should compute epsilon_cont similarly
+    space_cont = {"x": (0, 10), "y": (0, 20)}
+    opt2 = HillClimbingOptimizer(space_cont, step_size=step)
+    expected2 = np.array([step / 10, step / 20])
+    assert np.allclose(opt2.epsilon_cont, expected2)
diff --git a/tests/test_main/test_parameters/test_simulated_annealing.py b/tests/test_main/test_parameters/test_simulated_annealing.py
index bdfff9a6..5867157c 100644
--- a/tests/test_main/test_parameters/test_simulated_annealing.py
+++ b/tests/test_main/test_parameters/test_simulated_annealing.py
@@ -182,3 +182,23 @@ def test_annealing_rate_0():
 
     assert n_transitions_0 in [0, 1]
     assert n_transitions_1 < n_transitions_100
+
+
+def test_step_size_parameter():
+    # ensure wrapper accepts step_size and converts epsilon
+    space = {"x": (0, 10), "y": (0, 20)}
+    opt = SimulatedAnnealingOptimizer(space, step_size=2)
+    assert hasattr(opt, "step_size") and opt.step_size == 2
+    # epsilon_cont should be computed accordingly
+    assert np.allclose(opt.epsilon_cont, np.array([0.2, 0.1]))
+
+
+def test_step_size_conversion():
+    # verify epsilon_cont/disc for step_size
+    space_disc = {"x": np.arange(0, 5, 1)}
+    opt = SimulatedAnnealingOptimizer(space_disc, step_size=2)
+    expected = np.array([2 / (len(space_disc["x"]) - 1)])
+    assert np.allclose(opt.epsilon_disc, expected)
+    space_cont = {"x": (0, 5)}
+    opt2 = SimulatedAnnealingOptimizer(space_cont, step_size=2)
+    assert np.allclose(opt2.epsilon_cont, np.array([0.4]))
diff --git a/tests/test_main/test_parameters/test_stochastic_hill_climbing.py b/tests/test_main/test_parameters/test_stochastic_hill_climbing.py
index be67f611..249a087d 100644
--- a/tests/test_main/test_parameters/test_stochastic_hill_climbing.py
+++ b/tests/test_main/test_parameters/test_stochastic_hill_climbing.py
@@ -52,3 +52,21 @@ def test_p_accept():
     higher_bound = n_iter
 
     assert lower_bound < n_transitions_low < n_transitions_high < higher_bound
+
+
+def test_step_size_parameter():
+    # ensure the new argument is accepted and stored
+    opt = StochasticHillClimbingOptimizer(search_space, step_size=0.5)
+    assert hasattr(opt, "step_size")
+    assert opt.step_size == 0.5
+
+
+def test_step_size_conversion():
+    # check internal epsilon values for discrete and continuous dims
+    space_disc = {"x1": np.arange(0, 5, 1)}
+    opt = StochasticHillClimbingOptimizer(space_disc, step_size=2)
+    expected = np.array([2 / (len(space_disc["x1"]) - 1)])
+    assert np.allclose(opt.epsilon_disc, expected)
+    space_cont = {"x1": (0, 10)}
+    opt2 = StochasticHillClimbingOptimizer(space_cont, step_size=2)
+    assert np.allclose(opt2.epsilon_cont, np.array([0.2]))