Skip to content

Commit aaca8af

Browse files
Add optional GPU-accelerated CTRNN and Izhikevich evaluation via CuPy
New neat/gpu/ package provides GPUCTRNNEvaluator and GPUIZNNEvaluator that batch-evaluate entire populations on GPU using padded tensor operations. Variable-topology genomes are packed into fixed-size [N, M, M] weight matrices with zero padding. CTRNN uses exponential Euler (matching the updated CPU integrator). Izhikevich uses the same half-step method as CPU for spike-exact equivalence. A custom CUDA kernel dispatches 11 activation functions matching neat-python formulas. Sum aggregation only (natural for matmul). Import neat never loads CuPy — all GPU imports are lazy. Install via: pip install 'neat-python[gpu]' Includes 20 tests (11 CPU packing, 9 GPU requiring CuPy) and a benchmark script comparing CPU vs GPU wall time. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7d58c15 commit aaca8af

File tree

9 files changed

+2287
-2
lines changed

9 files changed

+2287
-2
lines changed

GPU_DESIGN_NOTES.md

Lines changed: 436 additions & 0 deletions
Large diffs are not rendered by default.

benchmarks/gpu_benchmark.py

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Benchmark comparing CPU vs GPU evaluation for CTRNN and Izhikevich networks.
4+
5+
Usage:
6+
python benchmarks/gpu_benchmark.py
7+
8+
Requires CuPy and NumPy.
9+
"""
10+
11+
import math
12+
import os
13+
import sys
14+
import time
15+
16+
# Add project root to path.
17+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
18+
19+
import numpy as np
20+
21+
import neat
22+
from neat.genes import DefaultNodeGene, DefaultConnectionGene
23+
24+
try:
25+
import cupy as cp
26+
except ImportError:
27+
print("CuPy not installed. GPU benchmarks will be skipped.")
28+
print("Install with: pip install 'neat-python[gpu]'")
29+
sys.exit(1)
30+
31+
32+
# ---------------------------------------------------------------------------
33+
# Configuration and genome helpers
34+
# ---------------------------------------------------------------------------
35+
36+
def make_ctrnn_config():
37+
config_path = os.path.join(os.path.dirname(__file__), '..', 'tests',
38+
'test_configuration_gpu_ctrnn')
39+
return neat.Config(
40+
neat.DefaultGenome,
41+
neat.DefaultReproduction,
42+
neat.DefaultSpeciesSet,
43+
neat.DefaultStagnation,
44+
config_path,
45+
)
46+
47+
48+
def make_iznn_config():
49+
config_path = os.path.join(os.path.dirname(__file__), '..', 'tests',
50+
'test_configuration_iznn')
51+
return neat.Config(
52+
neat.iznn.IZGenome,
53+
neat.DefaultReproduction,
54+
neat.DefaultSpeciesSet,
55+
neat.DefaultStagnation,
56+
config_path,
57+
)
58+
59+
60+
def make_ctrnn_genome(config, genome_id, num_hidden=0):
61+
"""Create a CTRNN genome with specified number of hidden nodes."""
62+
gc = config.genome_config
63+
genome = neat.DefaultGenome(genome_id)
64+
65+
# Output node.
66+
node0 = DefaultNodeGene(0)
67+
node0.bias = np.random.uniform(-1, 1)
68+
node0.response = np.random.uniform(0.5, 2.0)
69+
node0.activation = 'tanh'
70+
node0.aggregation = 'sum'
71+
node0.time_constant = np.random.uniform(0.01, 2.0)
72+
genome.nodes[0] = node0
73+
74+
innov = 0
75+
hidden_keys = []
76+
for h in range(num_hidden):
77+
key = h + 1
78+
node = DefaultNodeGene(key)
79+
node.bias = np.random.uniform(-1, 1)
80+
node.response = np.random.uniform(0.5, 2.0)
81+
node.activation = 'tanh'
82+
node.aggregation = 'sum'
83+
node.time_constant = np.random.uniform(0.01, 2.0)
84+
genome.nodes[key] = node
85+
hidden_keys.append(key)
86+
87+
# Connect inputs to first layer (hidden or output).
88+
targets = hidden_keys if hidden_keys else [0]
89+
for in_key in gc.input_keys:
90+
for t in targets:
91+
conn = DefaultConnectionGene((in_key, t), innovation=innov)
92+
conn.weight = np.random.uniform(-2, 2)
93+
conn.enabled = True
94+
genome.connections[conn.key] = conn
95+
innov += 1
96+
97+
# Connect hidden to output.
98+
if hidden_keys:
99+
for h in hidden_keys:
100+
conn = DefaultConnectionGene((h, 0), innovation=innov)
101+
conn.weight = np.random.uniform(-2, 2)
102+
conn.enabled = True
103+
genome.connections[conn.key] = conn
104+
innov += 1
105+
106+
return genome
107+
108+
109+
def make_iznn_genome(config, genome_id, num_hidden=0):
110+
"""Create an Izhikevich genome."""
111+
gc = config.genome_config
112+
genome = neat.iznn.IZGenome(genome_id)
113+
114+
for out_key in gc.output_keys:
115+
node = neat.iznn.IZNodeGene(out_key)
116+
node.bias = np.random.uniform(-5, 5)
117+
node.a = 0.02
118+
node.b = 0.2
119+
node.c = -65.0
120+
node.d = 8.0
121+
genome.nodes[out_key] = node
122+
123+
innov = 0
124+
hidden_keys = []
125+
for h in range(num_hidden):
126+
key = max(gc.output_keys) + 1 + h
127+
node = neat.iznn.IZNodeGene(key)
128+
node.bias = np.random.uniform(-5, 5)
129+
node.a = 0.02
130+
node.b = 0.2
131+
node.c = -65.0
132+
node.d = 8.0
133+
genome.nodes[key] = node
134+
hidden_keys.append(key)
135+
136+
targets = hidden_keys if hidden_keys else gc.output_keys
137+
for in_key in gc.input_keys:
138+
for t in targets:
139+
conn = DefaultConnectionGene((in_key, t), innovation=innov)
140+
conn.weight = np.random.uniform(-10, 10)
141+
conn.enabled = True
142+
genome.connections[conn.key] = conn
143+
innov += 1
144+
145+
if hidden_keys:
146+
for h in hidden_keys:
147+
for out_key in gc.output_keys:
148+
conn = DefaultConnectionGene((h, out_key), innovation=innov)
149+
conn.weight = np.random.uniform(-10, 10)
150+
conn.enabled = True
151+
genome.connections[conn.key] = conn
152+
innov += 1
153+
154+
return genome
155+
156+
157+
# ---------------------------------------------------------------------------
158+
# Benchmarks
159+
# ---------------------------------------------------------------------------
160+
161+
def benchmark_ctrnn(pop_sizes, num_hidden=3):
162+
"""Benchmark CTRNN CPU vs GPU at various population sizes."""
163+
from neat.gpu._padding import pack_ctrnn_population
164+
from neat.gpu._cupy_backend import evaluate_ctrnn_batch
165+
166+
config = make_ctrnn_config()
167+
dt = 0.01
168+
t_max = 1.0
169+
num_steps = int(t_max / dt)
170+
input_vals = [0.5, -0.3]
171+
inputs_np = np.tile(np.array(input_vals, dtype=np.float32), (num_steps, 1))
172+
173+
print(f"\n{'='*70}")
174+
print(f"CTRNN Benchmark: dt={dt}, t_max={t_max}, num_steps={num_steps}, "
175+
f"hidden_nodes={num_hidden}")
176+
print(f"{'='*70}")
177+
print(f"{'Pop Size':>10} {'Max Nodes':>10} {'CPU (s)':>10} {'GPU (s)':>10} {'Speedup':>10}")
178+
print(f"{'-'*10:>10} {'-'*10:>10} {'-'*10:>10} {'-'*10:>10} {'-'*10:>10}")
179+
180+
for pop_size in pop_sizes:
181+
np.random.seed(42)
182+
genomes = [(i, make_ctrnn_genome(config, i, num_hidden=num_hidden))
183+
for i in range(pop_size)]
184+
185+
# CPU timing.
186+
t0 = time.perf_counter()
187+
for gid, genome in genomes:
188+
net = neat.ctrnn.CTRNN.create(genome, config)
189+
for step in range(num_steps):
190+
net.advance(input_vals, dt, dt)
191+
cpu_time = time.perf_counter() - t0
192+
193+
# GPU timing (include packing + transfer + compute).
194+
# Warmup.
195+
packed = pack_ctrnn_population(genomes, config)
196+
_ = evaluate_ctrnn_batch(packed, inputs_np, dt)
197+
cp.cuda.Stream.null.synchronize()
198+
199+
t0 = time.perf_counter()
200+
packed = pack_ctrnn_population(genomes, config)
201+
trajectory = evaluate_ctrnn_batch(packed, inputs_np, dt)
202+
cp.cuda.Stream.null.synchronize()
203+
gpu_time = time.perf_counter() - t0
204+
205+
max_nodes = packed['max_nodes']
206+
speedup = cpu_time / gpu_time if gpu_time > 0 else float('inf')
207+
208+
print(f"{pop_size:>10d} {max_nodes:>10d} {cpu_time:>10.3f} {gpu_time:>10.3f} "
209+
f"{speedup:>9.1f}x")
210+
211+
212+
def benchmark_iznn(pop_sizes, num_hidden=3):
213+
"""Benchmark Izhikevich CPU vs GPU at various population sizes."""
214+
from neat.gpu._padding import pack_iznn_population
215+
from neat.gpu._cupy_backend import evaluate_iznn_batch
216+
217+
config = make_iznn_config()
218+
dt = 0.05
219+
t_max = 50.0 # 50 ms
220+
num_steps = int(t_max / dt)
221+
input_vals = [1.0, 0.5]
222+
inputs_np = np.tile(np.array(input_vals, dtype=np.float32), (num_steps, 1))
223+
224+
print(f"\n{'='*70}")
225+
print(f"Izhikevich Benchmark: dt={dt} ms, t_max={t_max} ms, "
226+
f"num_steps={num_steps}, hidden_nodes={num_hidden}")
227+
print(f"{'='*70}")
228+
print(f"{'Pop Size':>10} {'Max Nodes':>10} {'CPU (s)':>10} {'GPU (s)':>10} {'Speedup':>10}")
229+
print(f"{'-'*10:>10} {'-'*10:>10} {'-'*10:>10} {'-'*10:>10} {'-'*10:>10}")
230+
231+
for pop_size in pop_sizes:
232+
np.random.seed(42)
233+
genomes = [(i, make_iznn_genome(config, i, num_hidden=num_hidden))
234+
for i in range(pop_size)]
235+
236+
# CPU timing.
237+
t0 = time.perf_counter()
238+
for gid, genome in genomes:
239+
net = neat.iznn.IZNN.create(genome, config)
240+
net.set_inputs(input_vals)
241+
for step in range(num_steps):
242+
net.advance(dt)
243+
cpu_time = time.perf_counter() - t0
244+
245+
# GPU timing.
246+
packed = pack_iznn_population(genomes, config)
247+
_ = evaluate_iznn_batch(packed, inputs_np, dt, num_steps)
248+
cp.cuda.Stream.null.synchronize()
249+
250+
t0 = time.perf_counter()
251+
packed = pack_iznn_population(genomes, config)
252+
trajectory = evaluate_iznn_batch(packed, inputs_np, dt, num_steps)
253+
cp.cuda.Stream.null.synchronize()
254+
gpu_time = time.perf_counter() - t0
255+
256+
max_nodes = packed['max_nodes']
257+
speedup = cpu_time / gpu_time if gpu_time > 0 else float('inf')
258+
259+
print(f"{pop_size:>10d} {max_nodes:>10d} {cpu_time:>10.3f} {gpu_time:>10.3f} "
260+
f"{speedup:>9.1f}x")
261+
262+
263+
if __name__ == '__main__':
264+
pop_sizes = [100, 500, 1000]
265+
benchmark_ctrnn(pop_sizes)
266+
benchmark_iznn(pop_sizes)

neat/gpu/__init__.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
Optional GPU-accelerated evaluation for CTRNN and Izhikevich spiking networks.
3+
4+
This module requires CuPy. Install via: pip install 'neat-python[gpu]'
5+
6+
All CuPy imports are lazy — ``import neat`` never triggers a CuPy import.
7+
"""
8+
9+
10+
def _import_cupy():
11+
"""Import and return the CuPy module, or raise an informative error."""
12+
try:
13+
import cupy
14+
return cupy
15+
except ImportError:
16+
raise ImportError(
17+
"CuPy is required for GPU evaluation but is not installed.\n"
18+
"Install it with: pip install 'neat-python[gpu]'\n"
19+
"Or install CuPy directly: pip install cupy-cuda12x"
20+
) from None
21+
22+
23+
def _import_numpy():
24+
"""Import and return NumPy, or raise an informative error."""
25+
try:
26+
import numpy
27+
return numpy
28+
except ImportError:
29+
raise ImportError(
30+
"NumPy is required for GPU evaluation but is not installed.\n"
31+
"Install it with: pip install numpy"
32+
) from None
33+
34+
35+
def gpu_available():
36+
"""Return True if CuPy is installed and a GPU device is accessible."""
37+
try:
38+
cp = _import_cupy()
39+
cp.cuda.Device(0).compute_capability
40+
return True
41+
except Exception:
42+
return False

0 commit comments

Comments
 (0)