|
| 1 | +""" |
| 2 | +CTRNN Signal Tracking with CPU vs GPU Performance Comparison |
| 3 | +
|
| 4 | +Evolves a Continuous-Time Recurrent Neural Network (CTRNN) to perform frequency |
| 5 | +doubling: given sin(2*pi*t) and cos(2*pi*t) as inputs, produce sin(4*pi*t) as |
| 6 | +output. This task requires nonlinear transformation of the inputs (since |
| 7 | +sin(2x) = 2*sin(x)*cos(x)) and is a natural fit for CTRNN dynamics. |
| 8 | +
|
| 9 | +The example runs evolution using both the pure-Python CPU evaluator and the |
| 10 | +GPU-accelerated evaluator (when available), printing a timing comparison. |
| 11 | +
|
| 12 | +Usage: |
| 13 | + python evolve.py # CPU only (GPU if available) |
| 14 | + python evolve.py --cpu-only # Force CPU only |
| 15 | + python evolve.py --gpu-only # Force GPU only (requires CuPy) |
| 16 | + python evolve.py --generations 100 # Set number of generations |
| 17 | + python evolve.py --pop-size 500 # Override population size |
| 18 | +""" |
| 19 | + |
| 20 | +import argparse |
| 21 | +import math |
| 22 | +import os |
| 23 | +import time |
| 24 | + |
| 25 | +import neat |
| 26 | + |
| 27 | +# --------------------------------------------------------------------------- |
| 28 | +# Task definition: frequency doubling |
| 29 | +# |
| 30 | +# inputs: [sin(2*pi*t), cos(2*pi*t)] |
| 31 | +# target: sin(4*pi*t) = 2 * sin(2*pi*t) * cos(2*pi*t) |
| 32 | +# |
| 33 | +# The integration runs for t_max seconds at time step dt. Both CPU and GPU |
| 34 | +# evaluators use identical input signals and fitness computation for a fair |
| 35 | +# comparison. |
| 36 | +# --------------------------------------------------------------------------- |
| 37 | + |
| 38 | +DT = 0.01 # Integration time step (seconds) |
| 39 | +T_MAX = 1.0 # Total simulation time (seconds) |
| 40 | +NUM_STEPS = int(T_MAX / DT) # 100 steps |
| 41 | +FREQ = 1.0 # Base frequency (Hz) |
| 42 | + |
| 43 | +# Precompute the target trajectory for fitness evaluation. |
| 44 | +TARGET = [math.sin(4.0 * math.pi * FREQ * (step * DT)) for step in range(NUM_STEPS)] |
| 45 | + |
| 46 | + |
| 47 | +def input_fn(t, dt): |
| 48 | + """Return the two input signals at time t.""" |
| 49 | + return [math.sin(2.0 * math.pi * FREQ * t), |
| 50 | + math.cos(2.0 * math.pi * FREQ * t)] |
| 51 | + |
| 52 | + |
| 53 | +# --------------------------------------------------------------------------- |
| 54 | +# CPU evaluation |
| 55 | +# --------------------------------------------------------------------------- |
| 56 | + |
| 57 | +def eval_genomes_cpu(genomes, config): |
| 58 | + """Evaluate all genomes sequentially on CPU using neat.ctrnn.CTRNN.""" |
| 59 | + for genome_id, genome in genomes: |
| 60 | + net = neat.ctrnn.CTRNN.create(genome, config) |
| 61 | + net.reset() |
| 62 | + |
| 63 | + total_se = 0.0 |
| 64 | + unstable = False |
| 65 | + for step in range(NUM_STEPS): |
| 66 | + t = step * DT |
| 67 | + inputs = input_fn(t, DT) |
| 68 | + output = net.advance(inputs, DT, DT) |
| 69 | + |
| 70 | + if math.isnan(output[0]) or math.isinf(output[0]) or abs(output[0]) > 1e10: |
| 71 | + unstable = True |
| 72 | + break |
| 73 | + |
| 74 | + total_se += (output[0] - TARGET[step]) ** 2 |
| 75 | + |
| 76 | + if unstable: |
| 77 | + genome.fitness = -10.0 |
| 78 | + else: |
| 79 | + genome.fitness = -total_se / NUM_STEPS |
| 80 | + |
| 81 | + |
| 82 | +# --------------------------------------------------------------------------- |
| 83 | +# GPU evaluation |
| 84 | +# --------------------------------------------------------------------------- |
| 85 | + |
| 86 | +def make_gpu_evaluator(): |
| 87 | + """Create a GPUCTRNNEvaluator with the same task parameters.""" |
| 88 | + import numpy as np |
| 89 | + from neat.gpu.evaluator import GPUCTRNNEvaluator |
| 90 | + |
| 91 | + target_np = np.array(TARGET, dtype=np.float32) |
| 92 | + |
| 93 | + def fitness_fn(trajectory): |
| 94 | + """Negative mean squared error over the output trajectory. |
| 95 | +
|
| 96 | + trajectory: ndarray of shape [num_steps, num_outputs]. |
| 97 | + """ |
| 98 | + output = trajectory[:, 0] |
| 99 | + mse = float(np.mean((output - target_np) ** 2)) |
| 100 | + return -mse |
| 101 | + |
| 102 | + return GPUCTRNNEvaluator( |
| 103 | + dt=DT, |
| 104 | + t_max=T_MAX, |
| 105 | + input_fn=input_fn, |
| 106 | + fitness_fn=fitness_fn, |
| 107 | + ) |
| 108 | + |
| 109 | + |
| 110 | +# --------------------------------------------------------------------------- |
| 111 | +# Run helper |
| 112 | +# --------------------------------------------------------------------------- |
| 113 | + |
| 114 | +def run_evolution(config, eval_fn, n_generations, label, seed=42): |
| 115 | + """Run NEAT evolution and return (winner, per-generation times, total time).""" |
| 116 | + pop = neat.Population(config, seed=seed) |
| 117 | + |
| 118 | + # Collect per-generation timing via a custom reporter. |
| 119 | + gen_times = [] |
| 120 | + |
| 121 | + class TimingReporter(neat.reporting.BaseReporter): |
| 122 | + def __init__(self): |
| 123 | + self._gen_start = None |
| 124 | + |
| 125 | + def start_generation(self, generation): |
| 126 | + self._gen_start = time.perf_counter() |
| 127 | + |
| 128 | + def post_evaluate(self, config, population, species, best_genome): |
| 129 | + elapsed = time.perf_counter() - self._gen_start |
| 130 | + gen_times.append(elapsed) |
| 131 | + |
| 132 | + pop.add_reporter(TimingReporter()) |
| 133 | + pop.add_reporter(neat.StdOutReporter(False)) |
| 134 | + |
| 135 | + t0 = time.perf_counter() |
| 136 | + winner = pop.run(eval_fn, n_generations) |
| 137 | + total = time.perf_counter() - t0 |
| 138 | + |
| 139 | + return winner, gen_times, total |
| 140 | + |
| 141 | + |
| 142 | +# --------------------------------------------------------------------------- |
| 143 | +# Main |
| 144 | +# --------------------------------------------------------------------------- |
| 145 | + |
| 146 | +def main(): |
| 147 | + parser = argparse.ArgumentParser( |
| 148 | + description='CTRNN signal tracking with CPU vs GPU comparison') |
| 149 | + parser.add_argument('--cpu-only', action='store_true', |
| 150 | + help='Run CPU evaluation only') |
| 151 | + parser.add_argument('--gpu-only', action='store_true', |
| 152 | + help='Run GPU evaluation only (requires CuPy)') |
| 153 | + parser.add_argument('--generations', type=int, default=50, |
| 154 | + help='Number of generations (default: 50)') |
| 155 | + parser.add_argument('--pop-size', type=int, default=None, |
| 156 | + help='Override population size from config') |
| 157 | + parser.add_argument('--seed', type=int, default=42, |
| 158 | + help='Random seed for reproducibility (default: 42)') |
| 159 | + args = parser.parse_args() |
| 160 | + |
| 161 | + local_dir = os.path.dirname(__file__) |
| 162 | + config_path = os.path.join(local_dir, 'config-ctrnn') |
| 163 | + config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, |
| 164 | + neat.DefaultSpeciesSet, neat.DefaultStagnation, |
| 165 | + config_path) |
| 166 | + |
| 167 | + if args.pop_size is not None: |
| 168 | + config.pop_size = args.pop_size |
| 169 | + |
| 170 | + # Check GPU availability. |
| 171 | + gpu_ok = False |
| 172 | + if not args.cpu_only: |
| 173 | + try: |
| 174 | + from neat.gpu import gpu_available |
| 175 | + gpu_ok = gpu_available() |
| 176 | + except ImportError: |
| 177 | + pass |
| 178 | + |
| 179 | + if args.gpu_only and not gpu_ok: |
| 180 | + print("ERROR: --gpu-only requested but CuPy/CUDA is not available.") |
| 181 | + print("Install with: pip install 'neat-python[gpu]'") |
| 182 | + return |
| 183 | + |
| 184 | + # Banner. |
| 185 | + print('=' * 65) |
| 186 | + print('CTRNN Signal Tracking — Frequency Doubling') |
| 187 | + print('=' * 65) |
| 188 | + print(f'Task: sin(2*pi*t), cos(2*pi*t) -> sin(4*pi*t)') |
| 189 | + print(f'Simulation: dt={DT}s, t_max={T_MAX}s, {NUM_STEPS} steps') |
| 190 | + print(f'Population: {config.pop_size}') |
| 191 | + print(f'Generations: {args.generations}') |
| 192 | + print(f'Seed: {args.seed}') |
| 193 | + print(f'GPU: {"available" if gpu_ok else "not available"}') |
| 194 | + print() |
| 195 | + |
| 196 | + cpu_result = None |
| 197 | + gpu_result = None |
| 198 | + |
| 199 | + # --- CPU run --- |
| 200 | + if not args.gpu_only: |
| 201 | + print('-' * 65) |
| 202 | + print('Running CPU evaluation...') |
| 203 | + print('-' * 65) |
| 204 | + cpu_winner, cpu_times, cpu_total = run_evolution( |
| 205 | + config, eval_genomes_cpu, args.generations, 'CPU', seed=args.seed) |
| 206 | + cpu_result = (cpu_winner, cpu_times, cpu_total) |
| 207 | + print(f'\nCPU: {cpu_total:.2f}s total, ' |
| 208 | + f'{sum(cpu_times)/len(cpu_times):.4f}s/gen avg, ' |
| 209 | + f'best fitness = {cpu_winner.fitness:.6f}') |
| 210 | + |
| 211 | + # --- GPU run --- |
| 212 | + if gpu_ok and not args.cpu_only: |
| 213 | + print() |
| 214 | + print('-' * 65) |
| 215 | + print('Running GPU evaluation...') |
| 216 | + print('-' * 65) |
| 217 | + gpu_eval = make_gpu_evaluator() |
| 218 | + gpu_winner, gpu_times, gpu_total = run_evolution( |
| 219 | + config, gpu_eval.evaluate, args.generations, 'GPU', seed=args.seed) |
| 220 | + gpu_result = (gpu_winner, gpu_times, gpu_total) |
| 221 | + print(f'\nGPU: {gpu_total:.2f}s total, ' |
| 222 | + f'{sum(gpu_times)/len(gpu_times):.4f}s/gen avg, ' |
| 223 | + f'best fitness = {gpu_winner.fitness:.6f}') |
| 224 | + |
| 225 | + # --- Comparison --- |
| 226 | + if cpu_result and gpu_result: |
| 227 | + cpu_winner, cpu_times, cpu_total = cpu_result |
| 228 | + gpu_winner, gpu_times, gpu_total = gpu_result |
| 229 | + |
| 230 | + # Compute evaluation-only time (subtract a rough estimate of NEAT |
| 231 | + # overhead by noting that reproduction/speciation is identical). |
| 232 | + cpu_eval_avg = sum(cpu_times) / len(cpu_times) |
| 233 | + gpu_eval_avg = sum(gpu_times) / len(gpu_times) |
| 234 | + speedup = cpu_total / gpu_total if gpu_total > 0 else float('inf') |
| 235 | + eval_speedup = cpu_eval_avg / gpu_eval_avg if gpu_eval_avg > 0 else float('inf') |
| 236 | + |
| 237 | + print() |
| 238 | + print('=' * 65) |
| 239 | + print('Performance Comparison') |
| 240 | + print('=' * 65) |
| 241 | + print(f'{"":>20} {"CPU":>12} {"GPU":>12} {"Speedup":>12}') |
| 242 | + print(f'{"":>20} {"---":>12} {"---":>12} {"-------":>12}') |
| 243 | + print(f'{"Total time":>20} {cpu_total:>11.2f}s {gpu_total:>11.2f}s ' |
| 244 | + f'{speedup:>10.1f}x') |
| 245 | + print(f'{"Avg per generation":>20} {cpu_eval_avg:>11.4f}s {gpu_eval_avg:>11.4f}s ' |
| 246 | + f'{eval_speedup:>10.1f}x') |
| 247 | + print(f'{"Best fitness":>20} {cpu_winner.fitness:>12.6f} {gpu_winner.fitness:>12.6f}') |
| 248 | + print() |
| 249 | + print(f'Note: GPU speedup increases with larger populations. ' |
| 250 | + f'Try --pop-size 1000.') |
| 251 | + |
| 252 | + |
| 253 | +if __name__ == '__main__': |
| 254 | + main() |
0 commit comments