openevolve/configs/default_config.yaml at main · algorithmicsuperintelligence/openevolve · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# OpenEvolve Default Configuration
# This file contains all available configuration options with sensible defaults
# You can use this as a template for your own configuration

# General settings
max_iterations: 100                  # Maximum number of evolution iterations
checkpoint_interval: 10               # Save checkpoints every N iterations
log_level: "INFO"                     # Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_dir: null                         # Custom directory for logs (default: output_dir/logs)
random_seed: 42                       # Random seed for reproducibility (null = random, 42 = default)

# Evolution settings
diff_based_evolution: true            # Use diff-based evolution (true) or full rewrites (false)
max_code_length: 10000                # Maximum allowed code length in characters

# Early stopping settings
early_stopping_patience: null         # Stop after N iterations without improvement (null = disabled)
convergence_threshold: 0.001          # Minimum improvement required to reset patience counter
early_stopping_metric: "combined_score"  # Metric to track for early stopping

# LLM configuration
llm:
  # Models for evolution
  models:
    # List of available models with their weights
    - name: "gemini-2.0-flash-lite"
      weight: 0.8
    - name: "gemini-2.0-flash"
      weight: 0.2

  # Models for LLM feedback
  evaluator_models:
    # List of available models with their weights
    - name: "gemini-2.0-flash-lite"
      weight: 0.8
    - name: "gemini-2.0-flash"
      weight: 0.2

  # API configuration
  api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"  # Base URL for API (change for non-OpenAI models)
  api_key: null                       # API key (defaults to OPENAI_API_KEY env variable)
  # or use ${VAR} syntax to specify which environment variable to read from:
  # api_key: ${GEMINI_API_KEY}        # Reads API key from $GEMINI_API_KEY

  # Generation parameters
  temperature: 0.7                    # Temperature for generation (higher = more creative)
  top_p: 0.95                         # Top-p sampling parameter
  max_tokens: 4096                    # Maximum tokens to generate

  # Request parameters
  timeout: 60                         # Timeout for API requests in seconds
  retries: 3                          # Number of retries for failed requests
  retry_delay: 5                      # Delay between retries in seconds

# Prompt configuration
prompt:
  template_dir: null                  # Custom directory for prompt templates
  system_message: "You are an expert coder helping to improve programs through evolution."
  evaluator_system_message: "You are an expert code reviewer."

  # Number of examples to include in the prompt
  num_top_programs: 3                 # Number of top-performing programs to include
  num_diverse_programs: 2             # Number of diverse programs to include

  # Template stochasticity
  use_template_stochasticity: true    # Use random variations in templates for diversity
  template_variations:                # Different phrasings for parts of the template
    improvement_suggestion:
      - "Here's how we could improve this code:"
      - "I suggest the following improvements:"
      - "We can enhance this code by:"

  # Artifact rendering
  include_artifacts: true             # Include execution outputs/errors in prompt
  max_artifact_bytes: 20480           # Maximum artifact size in bytes (20KB default)
  artifact_security_filter: true      # Apply security filtering to artifacts

  # Feature extraction and program labeling thresholds
  # These control how the LLM perceives and categorizes programs
  suggest_simplification_after_chars: 500     # Suggest simplifying if program exceeds this many characters
  include_changes_under_chars: 100           # Include change descriptions in features if under this length
  concise_implementation_max_lines: 10        # Label as "concise" if program has this many lines or fewer
  comprehensive_implementation_min_lines: 50  # Label as "comprehensive" if program has this many lines or more

  # Diff summary formatting for "Previous Attempts" section
  # Controls how SEARCH/REPLACE blocks are displayed in prompts
  diff_summary_max_line_len: 100              # Truncate lines longer than this (with "...")
  diff_summary_max_lines: 30                  # Max lines per SEARCH/REPLACE block

  # Note: meta-prompting features are not yet implemented

# Database configuration
database:
  # General settings
  db_path: null                       # Path to persist database (null = in-memory only)
  in_memory: true                     # Keep database in memory for faster access
  log_prompts: true                  # If true, log all prompts and responses into the database

  # Evolutionary parameters
  population_size: 1000               # Maximum number of programs to keep in memory
  archive_size: 100                   # Size of elite archive
  num_islands: 5                      # Number of islands for island model (separate populations)

  # Island-based evolution parameters
  # Islands provide diversity by maintaining separate populations that evolve independently.
  # Migration periodically shares the best solutions between adjacent islands.
  migration_interval: 50              # Migrate between islands every N generations
  migration_rate: 0.1                 # Fraction of top programs to migrate (0.1 = 10%)

  # Selection parameters
  elite_selection_ratio: 0.1          # Ratio of elite programs to select
  exploration_ratio: 0.2              # Ratio of exploration vs exploitation
  exploitation_ratio: 0.7             # Ratio of exploitation vs random selection
  # Note: diversity_metric is fixed to "edit_distance" (feature_based not implemented)

  # Feature map dimensions for MAP-Elites
  # Default if not specified: ["complexity", "diversity"]
  #
  # Built-in features (always available, computed by OpenEvolve):
  #   - "complexity": Code length
  #   - "diversity": Code structure diversity
  #
  # You can mix built-in features with custom metrics from your evaluator:
  feature_dimensions:                 # Dimensions for MAP-Elites feature map (for diversity, NOT fitness)
    - "complexity"                    # Code length (built-in)
    - "diversity"                     # Code diversity (built-in)
  # Example with custom features:
  # feature_dimensions:
  #   - "performance"                 # Must be returned by your evaluator
  #   - "correctness"                 # Must be returned by your evaluator
  #   - "memory_efficiency"           # Must be returned by your evaluator

  # Number of bins per dimension
  # Can be a single integer (same for all dimensions) or a dict
  feature_bins: 10                    # Number of bins per dimension
  # Example of per-dimension configuration:
  # feature_bins:
  #   complexity: 10                  # 10 bins for complexity
  #   diversity: 15                   # 15 bins for diversity
  #   performance: 20                 # 20 bins for custom metric

  diversity_reference_size: 20        # Size of reference set for diversity calculation

# Evaluator configuration
evaluator:
  # Fitness calculation: Uses 'combined_score' if available, otherwise averages
  # all metrics EXCEPT those listed in database.feature_dimensions

  # General settings
  timeout: 300                        # Maximum evaluation time in seconds
  max_retries: 3                      # Maximum number of retries for evaluation

  # Note: resource limits (memory_limit_mb, cpu_limit) are not yet implemented

  # Evaluation strategies
  cascade_evaluation: true            # Use cascade evaluation to filter bad solutions early
  cascade_thresholds:                 # Thresholds for advancing to next evaluation stage
    - 0.5                             # First stage threshold
    - 0.75                            # Second stage threshold
    - 0.9                             # Third stage threshold

  # Parallel evaluation
  parallel_evaluations: 4             # Number of parallel evaluations
  # Note: distributed evaluation is not yet implemented

  # LLM-based feedback (experimental)
  use_llm_feedback: false             # Use LLM to evaluate code quality
  llm_feedback_weight: 0.1            # Weight for LLM feedback in final score

# Evolution trace configuration
# Logs detailed traces of program evolution for RL training and analysis
evolution_trace:
  enabled: false                      # Enable evolution trace logging
  format: 'jsonl'                     # Output format: 'jsonl', 'json', or 'hdf5'
  include_code: false                 # Include full program code in traces
  include_prompts: true               # Include prompts and LLM responses
  output_path: null                   # Path for trace output (defaults to output_dir/evolution_trace.{format})
  buffer_size: 10                     # Number of traces to buffer before writing
  compress: false                     # Compress output file (jsonl only)