99
1010from abc import ABC , abstractmethod
1111from dataclasses import dataclass
12- import hashlib
1312from pathlib import Path
13+ import random
1414import time
1515from typing import TYPE_CHECKING , Any
1616
1717from loguru import logger
1818
1919from gigaevo .prompts import load_prompt
20+ from gigaevo .prompts .coevolution .stats import prompt_text_to_id
2021
2122if TYPE_CHECKING :
2223 from gigaevo .database .program_storage import ProgramStorage
@@ -218,10 +219,13 @@ def _is_cache_stale(self) -> bool:
218219 return (time .monotonic () - self ._cache_timestamp ) >= self ._cache_ttl
219220
220221 def _refresh_champion (self ) -> "_PromptPack | None" :
221- """Read the current champion from the prompt run's Redis archive.
222+ """Select a prompt from the prompt run's archive using fitness-proportional sampling.
223+
224+ Instead of always picking the single best, uses stochastic selection so
225+ that multiple prompts accumulate trial data from the main run.
222226
223227 Returns:
224- _PromptPack if a champion was found , None if archive is empty
228+ _PromptPack if a prompt was selected , None if archive is empty
225229 """
226230 try :
227231 r = self ._get_sync_redis ()
@@ -235,46 +239,47 @@ def _refresh_champion(self) -> "_PromptPack | None":
235239 )
236240 return None
237241
238- # Fetch all programs and find the champion
239- best_program_id : str | None = None
240- best_fitness : float = float ("-inf" )
241- best_code : str | None = None
242+ # Collect all candidates with their fitness and code
243+ import json
242244
245+ candidates : list [tuple [str , float , str ]] = [] # (pid, fitness, code)
243246 for pid in program_ids :
244247 program_key = f"{ self ._prompt_prefix } :program:{ pid } "
245248 raw = r .get (program_key )
246249 if not raw :
247250 continue
248251 try :
249- import json
250-
251252 data = json .loads (raw )
252253 metrics = data .get ("metrics" , {})
253- fitness = float (metrics .get (self ._fitness_key , float ( "-inf" ) ))
254+ fitness = float (metrics .get (self ._fitness_key , 0.0 ))
254255 code = data .get ("code" , "" )
255- if fitness > best_fitness and code :
256- best_fitness = fitness
257- best_program_id = pid
258- best_code = code
256+ if code :
257+ candidates .append ((pid , fitness , code ))
259258 except Exception as exc :
260259 logger .debug (
261260 f"[GigaEvoArchivePromptFetcher] Error parsing program { pid } : { exc } "
262261 )
263262 continue
264263
265- if best_code is None or best_program_id is None :
264+ if not candidates :
266265 return None
267266
268- # Execute the champion's entrypoint() to get the prompt pack
269- prompt_id = hashlib .sha256 (best_program_id .encode ()).hexdigest ()[:16 ]
270- pack = self ._execute_entrypoint (best_code , prompt_id )
267+ # Fitness-proportional sampling (epsilon floor for zero-fitness prompts)
268+ epsilon = 0.01
269+ weights = [max (f , epsilon ) for _ , f , _ in candidates ]
270+ chosen_pid , chosen_fitness , chosen_code = random .choices (
271+ candidates , weights = weights , k = 1
272+ )[0 ]
273+
274+ pack = self ._execute_entrypoint (chosen_code )
271275 if pack is None :
272276 return None
273277
274278 logger .debug (
275- f"[GigaEvoArchivePromptFetcher] Champion: { best_program_id [:8 ]} "
276- f"fitness={ best_fitness :.4f} prompt_id={ prompt_id } "
277- f"has_user={ pack .user is not None } "
279+ f"[GigaEvoArchivePromptFetcher] Selected: { chosen_pid [:8 ]} "
280+ f"fitness={ chosen_fitness :.4f} prompt_id={ pack .prompt_id } "
281+ f"has_user={ pack .user is not None } "
282+ f"(from { len (candidates )} candidates)"
278283 )
279284 return pack
280285
@@ -285,17 +290,19 @@ def _refresh_champion(self) -> "_PromptPack | None":
285290 )
286291 return None
287292
288- def _execute_entrypoint (self , code : str , prompt_id : str ) -> "_PromptPack | None" :
293+ def _execute_entrypoint (self , code : str ) -> "_PromptPack | None" :
289294 """Execute a program's entrypoint() in a clean namespace.
290295
296+ Computes prompt_id from the system prompt TEXT (not the program UUID)
297+ so it matches the ID used by PromptFitnessStage on the read side.
298+
291299 Args:
292300 code: Python source code with entrypoint() function that returns
293301 either a str (system prompt only) or a dict with keys
294302 "system" (required) and "user" (optional).
295- prompt_id: Pre-computed prompt_id to attach to the resulting pack.
296303
297304 Returns:
298- _PromptPack with system/user texts, or None on error
305+ _PromptPack with system/user texts and text-derived prompt_id , or None on error
299306 """
300307 try :
301308 namespace : dict [str , Any ] = {}
@@ -313,7 +320,8 @@ def _execute_entrypoint(self, code: str, prompt_id: str) -> "_PromptPack | None"
313320 "[GigaEvoArchivePromptFetcher] entrypoint() returned empty string"
314321 )
315322 return None
316- return _PromptPack (system = result , user = None , prompt_id = prompt_id )
323+ pid = prompt_text_to_id (result )
324+ return _PromptPack (system = result , user = None , prompt_id = pid )
317325 elif isinstance (result , dict ):
318326 system = result .get ("system" , "" )
319327 if not isinstance (system , str ) or not system .strip ():
@@ -327,7 +335,8 @@ def _execute_entrypoint(self, code: str, prompt_id: str) -> "_PromptPack | None"
327335 "[GigaEvoArchivePromptFetcher] dict entrypoint() has invalid 'user' key — ignoring"
328336 )
329337 user = None
330- return _PromptPack (system = system , user = user , prompt_id = prompt_id )
338+ pid = prompt_text_to_id (system )
339+ return _PromptPack (system = system , user = user , prompt_id = pid )
331340 else :
332341 logger .warning (
333342 f"[GigaEvoArchivePromptFetcher] entrypoint() returned { type (result )} , "
0 commit comments