My initial investigation shows that re-creating a pyfftw.FFTW's object can have a considerable overhead for shorter arrays. We can check that by using a simple script like this:
def timing_reuse_object(T):
pyfftw.forget_wisdom() # clean up planning
total_time = 0
cntr = 0
real_arr = pyfftw.empty_aligned(len(T), dtype=np.float64)
complex_arr = pyfftw.empty_aligned(1 + len(T) // 2, dtype=np.complex128)
rfft_obj = pyfftw.FFTW(
real_arr, complex_arr, flags=("FFTW_MEASURE", "FFTW_DESTROY_INPUT"), direction='FFTW_FORWARD', threads=1
)
real_arr[:] = T
rfft_obj.execute()
while total_time < 5.0:
start = time.time()
real_arr[:] = T
rfft_obj.execute()
stop = time.time()
total_time += stop - start
cntr += 1
return total_time / cntr
def timing_recreate_object(T):
pyfftw.forget_wisdom() # clean up planning
total_time = 0
cntr = 0
real_arr = pyfftw.empty_aligned(len(T), dtype=np.float64)
complex_arr = pyfftw.empty_aligned(1 + len(T) // 2, dtype=np.complex128)
rfft_obj = pyfftw.FFTW(
real_arr, complex_arr, flags=("FFTW_MEASURE", "FFTW_DESTROY_INPUT"), direction='FFTW_FORWARD', threads=1
)
real_arr[:] = T
rfft_obj.execute()
while total_time < 5.0:
start = time.time()
rfft_obj = pyfftw.FFTW(
real_arr, complex_arr, flags=("FFTW_WISDOM_ONLY", "FFTW_DESTROY_INPUT"), direction='FFTW_FORWARD', threads=1
)
real_arr[:] = T
rfft_obj.execute()
stop = time.time()
total_time += stop - start
cntr += 1
return total_time / cntr
############################
p_range = np.arange(6, 20 + 1)
storage_saving = np.empty(len(p_range), dtype=np.float64)
for i, p in enumerate(p_range):
n = 2 ** p
T = np.random.rand(n)
timing_1 = timing_reuse_object(T)
timing_2 = timing_recreate_object(T)
r = timing_2 / timing_1
storage_saving[i] = r
print(f'p: {p}, r: {r}', flush=True)
And this gives us the performance gain that comes from re-using the object rather than re-creating it.
# In Google Colab
# Values might change in different runs but the conclusion remains the same
p: 6, r: 51.39261003413654
p: 7, r: 41.41622832796392
p: 8, r: 30.116963999168277
p: 9, r: 14.474155208575292
p: 10, r: 12.442231341579324
p: 11, r: 9.363594660800752
p: 12, r: 6.694460620381835
p: 13, r: 3.7229326023701375
p: 14, r: 2.3063108061704725
p: 15, r: 1.5304997234037254
p: 16, r: 1.391239900963883
p: 17, r: 1.2167746295271427
p: 18, r: 1.1363106695322773
p: 19, r: 1.1602727241345845
p: 20, r: 1.0722179076354825
This is reported in pyFFTW/pyFFTW#428
This becomes important when computing (I)RFFTs on arrays of varying lengths. For fixed-length arrays, the same FFTW object can be reused. For varying lengths, however, a different object must be used, either newly created or pulled from a cache.
My initial investigation shows that re-creating a
pyfftw.FFTW's object can have a considerable overhead for shorter arrays. We can check that by using a simple script like this:And this gives us the performance gain that comes from re-using the object rather than re-creating it.
This is reported in pyFFTW/pyFFTW#428
This becomes important when computing (I)RFFTs on arrays of varying lengths. For fixed-length arrays, the same FFTW object can be reused. For varying lengths, however, a different object must be used, either newly created or pulled from a cache.