Fix callable evaluator failing in subprocess workers

powerpratik · powerpratik · commit 79b72b08307a · 2026-03-01T21:09:09.000-05:00
_prepare_evaluator() stored callable evaluators in globals() of the api
module, which is inaccessible from ProcessPoolExecutor worker processes.
Workers import openevolve.api fresh and the dynamic attribute doesn't
exist in their memory space, causing AttributeError on every evaluation.

Fix: serialize the callable to disk via cloudpickle/pickle and have the
evaluator wrapper load it from the file. Also document the if __name__
== '__main__' guard requirement for macOS/Windows multiprocessing.

Verified with a local LLM endpoint (Ollama) to confirm cross-process
evaluation works end-to-end.
diff --git a/README.md b/README.md
@@ -119,14 +119,17 @@ def bubble_sort(arr):
                 arr[j], arr[j+1] = arr[j+1], arr[j] 
     return arr
 
-result = evolve_function(
-    bubble_sort,
-    test_cases=[([3,1,2], [1,2,3]), ([5,2,8], [2,5,8])],
-    iterations=50
-)
-print(f"Evolved sorting algorithm: {result.best_code}")
+if __name__ == '__main__':
+    result = evolve_function(
+        bubble_sort,
+        test_cases=[([3,1,2], [1,2,3]), ([5,2,8], [2,5,8])],
+        iterations=50
+    )
+    print(f"Evolved sorting algorithm: {result.best_code}")
 ```
 
+> **Note:** On macOS and Windows, Python uses `spawn` for multiprocessing. You must wrap evolution calls in `if __name__ == '__main__':` to avoid subprocess bootstrap errors.
+
 **Prefer Docker?** See the [Installation & Setup](#installation--setup) section for Docker options.
 
 ## See It In Action
diff --git a/openevolve/api.py b/openevolve/api.py
@@ -3,6 +3,7 @@
 """
 
 import asyncio
+import pickle
 import tempfile
 import os
 import uuid
@@ -239,20 +240,33 @@ def _prepare_evaluator(
 
     # If it's a callable, create a wrapper module
     if callable(evaluator):
-        # Create a unique global name for this evaluator
-        evaluator_id = f"_openevolve_evaluator_{uuid.uuid4().hex[:8]}"
+        try:
+            import cloudpickle as _pickle_mod
+        except ImportError:
+            _pickle_mod = pickle
+
+        # Serialize the callable to a file so subprocess workers can load it
+        if temp_dir is None:
+            temp_dir = tempfile.gettempdir()
 
-        # Store in globals so the wrapper can find it
-        globals()[evaluator_id] = evaluator
+        pickle_path = os.path.join(temp_dir, f"evaluator_{uuid.uuid4().hex[:8]}.pkl")
+        with open(pickle_path, "wb") as pf:
+            _pickle_mod.dump(evaluator, pf)
+        temp_files.append(pickle_path)
 
         evaluator_code = f"""
-# Wrapper for user-provided evaluator function
-import {__name__} as api_module
+# Wrapper for user-provided evaluator function (serialized to disk for cross-process access)
+import pickle
+
+_cached_evaluator = None
 
 def evaluate(program_path):
-    '''Wrapper for user-provided evaluator function'''
-    user_evaluator = getattr(api_module, '{evaluator_id}')
-    return user_evaluator(program_path)
+    '''Wrapper that loads the evaluator from a pickle file'''
+    global _cached_evaluator
+    if _cached_evaluator is None:
+        with open({pickle_path!r}, 'rb') as f:
+            _cached_evaluator = pickle.load(f)
+    return _cached_evaluator(program_path)
 """
     else:
         # Treat as code string
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "tqdm>=4.64.0",
     "flask",
     "dacite>=1.9.2",
+    "cloudpickle>=2.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -116,17 +116,17 @@ def test_prepare_evaluator_from_callable(self):
         """Test _prepare_evaluator with callable function"""
         def my_evaluator(program_path):
             return {"score": 0.8, "test": "passed"}
-        
+
         temp_files = []
         result = _prepare_evaluator(my_evaluator, self.temp_dir, temp_files)
-        
+
         self.assertTrue(os.path.exists(result))
-        self.assertEqual(len(temp_files), 1)
-        
+        # 2 temp files: the pickle file + the wrapper .py
+        self.assertEqual(len(temp_files), 2)
+
         with open(result, 'r') as f:
             content = f.read()
             self.assertIn("def evaluate(program_path)", content)
-            self.assertIn("user_evaluator", content)
     
     def test_prepare_evaluator_from_string(self):
         """Test _prepare_evaluator with code string"""
@@ -278,5 +278,77 @@ def test_run_evolution_cleanup_false(self):
             mock_async.assert_called_once()
 
 
+class TestEvaluatorCrossProcess(unittest.TestCase):
+    """Test that callable evaluators work across process boundaries"""
+
+    def setUp(self):
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        import shutil
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_callable_evaluator_works_in_subprocess(self):
+        """Test that a callable evaluator serialized by _prepare_evaluator
+        can be loaded and executed in a separate process (simulating
+        ProcessPoolExecutor workers)."""
+        def my_evaluator(program_path):
+            return {"score": 0.42, "passed": True}
+
+        temp_files = []
+        eval_file = _prepare_evaluator(my_evaluator, self.temp_dir, temp_files)
+
+        # Load and run the evaluator in a subprocess — this is what
+        # process_parallel.py workers do.
+        import subprocess, sys, json
+        result = subprocess.run(
+            [
+                sys.executable, "-c",
+                f"""
+import importlib.util, json, sys
+spec = importlib.util.spec_from_file_location("eval_mod", {eval_file!r})
+mod = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+print(json.dumps(mod.evaluate("dummy_path.py")))
+"""
+            ],
+            capture_output=True, text=True, timeout=10
+        )
+        self.assertEqual(result.returncode, 0, f"Subprocess failed: {result.stderr}")
+        metrics = json.loads(result.stdout.strip())
+        self.assertAlmostEqual(metrics["score"], 0.42)
+        self.assertTrue(metrics["passed"])
+
+    def test_callable_evaluator_with_closure(self):
+        """Test that a closure (capturing local variables) works across processes."""
+        threshold = 0.5
+        func_name = "my_func"
+
+        def closure_evaluator(program_path):
+            return {"score": threshold, "func": func_name}
+
+        temp_files = []
+        eval_file = _prepare_evaluator(closure_evaluator, self.temp_dir, temp_files)
+
+        import subprocess, sys, json
+        result = subprocess.run(
+            [
+                sys.executable, "-c",
+                f"""
+import importlib.util, json
+spec = importlib.util.spec_from_file_location("eval_mod", {eval_file!r})
+mod = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+print(json.dumps(mod.evaluate("dummy.py")))
+"""
+            ],
+            capture_output=True, text=True, timeout=10
+        )
+        self.assertEqual(result.returncode, 0, f"Subprocess failed: {result.stderr}")
+        metrics = json.loads(result.stdout.strip())
+        self.assertAlmostEqual(metrics["score"], 0.5)
+        self.assertEqual(metrics["func"], "my_func")
+
+
 if __name__ == '__main__':
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@ dependencies = [`
`17`	`17`	`"tqdm>=4.64.0",`
`18`	`18`	`"flask",`
`19`	`19`	`"dacite>=1.9.2",`
	`20`	`+ "cloudpickle>=2.0.0",`
`20`	`21`	`]`
`21`	`22`
`22`	`23`	`[project.optional-dependencies]`