From 59b49e1dfedf2a519a01092b96d1e31b542131c7 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 10 Dec 2025 21:28:12 -0500 Subject: [PATCH 01/21] propose revised pyfftw in challenger --- sdp/challenger_sdp.py | 73 +++++++++++++++++++++++++++++++++++++++---- timing.py | 2 +- 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index ce60e8d..c0df9c1 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -1,14 +1,75 @@ +import pyfftw import numpy as np +class SLIDING_DOT_PRODUCT: + # https://stackoverflow.com/a/30615425/2955541 + def __init__(self): + self.m = 0 + self.n = 0 + self.shape = 0 + self.threads = 1 + + def __call__(self, Q, T): + self.m = Q.shape[0] + + need_new_obj = False + if self.n != T.shape[0]: + # need to re-compute shape + self.n = T.shape[0] + shape = pyfftw.next_fast_len(self.n) + if self.shape != shape: + self.shape = shape + need_new_obj = True + + if need_new_obj: + # create input and output arrays and FFTW objects + self.real_arr = pyfftw.empty_aligned(self.shape, dtype="float64") + self.complex_arr = pyfftw.empty_aligned( + 1 + self.shape // 2, dtype="complex128" + ) + + self.rfft_obj = pyfftw.FFTW( + input_array=self.real_arr, + output_array=self.complex_arr, + flags=("FFTW_MEASURE",), + direction="FFTW_FORWARD", + threads=self.threads, + ) + + self.irfft_obj = pyfftw.FFTW( + input_array=self.complex_arr, + output_array=self.real_arr, + flags=("FFTW_MEASURE", "FFTW_DESTROY_INPUT"), + direction="FFTW_BACKWARD", + threads=self.threads, + ) + + # RFFT(T) + self.real_arr[: self.n] = T + self.real_arr[self.n : self.shape] = 0.0 + self.rfft_obj.execute() # output is in self.complex_arr + complex_arr_T = self.complex_arr.copy() + + # RFFT(Q) + self.real_arr[: self.m] = Q[::-1] / self.shape # reversed Q and scale + self.real_arr[self.m : self.shape] = 0.0 + self.rfft_obj.execute() # output is in self.complex_arr + + # RFFT(T) * RFFT(Q) + np.multiply(self.complex_arr, complex_arr_T, out=self.complex_arr) + self.irfft_obj.execute() # output is in self.real_arr + + return self.real_arr[self.m - 1 : self.n] + + +_sliding_dot_product = SLIDING_DOT_PRODUCT() + + def setup(Q, T): + _sliding_dot_product(Q, T) return def sliding_dot_product(Q, T): - m = len(Q) - l = T.shape[0] - m + 1 - out = np.empty(l) - for i in range(l): - out[i] = np.dot(Q, T[i : i + m]) - return out + return _sliding_dot_product(Q, T) diff --git a/timing.py b/timing.py index 0bc4af1..79e7b07 100755 --- a/timing.py +++ b/timing.py @@ -59,7 +59,7 @@ p_max = args.pmax p_diff = args.pdiff - if not noheader: + if noheader: print("module,len_Q,len_T,n_iter,time", flush=True) start_timing = time.time() From c8074a285c7dba6fa1e49614a99928b602bf85cc Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 10 Dec 2025 22:00:49 -0500 Subject: [PATCH 02/21] minor fix --- timing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/timing.py b/timing.py index 79e7b07..7fc2a6c 100755 --- a/timing.py +++ b/timing.py @@ -9,7 +9,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( - description="./timing.py -noheader -pmin 6 -pmax 23 -pdiff 3 pyfftw challenger" + description="./timing.py -pmin 6 -pmax 23 -pdiff 3 pyfftw challenger" ) parser.add_argument("-noheader", default=False, action="store_true") parser.add_argument( @@ -59,7 +59,7 @@ p_max = args.pmax p_diff = args.pdiff - if noheader: + if not noheader: print("module,len_Q,len_T,n_iter,time", flush=True) start_timing = time.time() From bb8b9ea324383d35c5cfefaf771879ddd1f24a14 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 12 Dec 2025 11:07:09 -0500 Subject: [PATCH 03/21] update files --- sdp/challenger_sdp.py | 73 ++++--------------------------------------- sdp/pyfftw_sdp.py | 62 +++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 86 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index c0df9c1..452b197 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -1,75 +1,14 @@ -import pyfftw import numpy as np -class SLIDING_DOT_PRODUCT: - # https://stackoverflow.com/a/30615425/2955541 - def __init__(self): - self.m = 0 - self.n = 0 - self.shape = 0 - self.threads = 1 - - def __call__(self, Q, T): - self.m = Q.shape[0] - - need_new_obj = False - if self.n != T.shape[0]: - # need to re-compute shape - self.n = T.shape[0] - shape = pyfftw.next_fast_len(self.n) - if self.shape != shape: - self.shape = shape - need_new_obj = True - - if need_new_obj: - # create input and output arrays and FFTW objects - self.real_arr = pyfftw.empty_aligned(self.shape, dtype="float64") - self.complex_arr = pyfftw.empty_aligned( - 1 + self.shape // 2, dtype="complex128" - ) - - self.rfft_obj = pyfftw.FFTW( - input_array=self.real_arr, - output_array=self.complex_arr, - flags=("FFTW_MEASURE",), - direction="FFTW_FORWARD", - threads=self.threads, - ) - - self.irfft_obj = pyfftw.FFTW( - input_array=self.complex_arr, - output_array=self.real_arr, - flags=("FFTW_MEASURE", "FFTW_DESTROY_INPUT"), - direction="FFTW_BACKWARD", - threads=self.threads, - ) - - # RFFT(T) - self.real_arr[: self.n] = T - self.real_arr[self.n : self.shape] = 0.0 - self.rfft_obj.execute() # output is in self.complex_arr - complex_arr_T = self.complex_arr.copy() - - # RFFT(Q) - self.real_arr[: self.m] = Q[::-1] / self.shape # reversed Q and scale - self.real_arr[self.m : self.shape] = 0.0 - self.rfft_obj.execute() # output is in self.complex_arr - - # RFFT(T) * RFFT(Q) - np.multiply(self.complex_arr, complex_arr_T, out=self.complex_arr) - self.irfft_obj.execute() # output is in self.real_arr - - return self.real_arr[self.m - 1 : self.n] - - -_sliding_dot_product = SLIDING_DOT_PRODUCT() - - def setup(Q, T): - _sliding_dot_product(Q, T) return def sliding_dot_product(Q, T): - return _sliding_dot_product(Q, T) + m = len(Q) + l = T.shape[0] - m + 1 + out = np.empty(l) + for i in range(l): + out[i] = np.dot(Q, T[i : i + m]) + return out \ No newline at end of file diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index bd0a0e9..c0df9c1 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -7,36 +7,60 @@ class SLIDING_DOT_PRODUCT: def __init__(self): self.m = 0 self.n = 0 + self.shape = 0 self.threads = 1 - self.rfft_Q_obj = None - self.rfft_T_obj = None - self.irfft_obj = None def __call__(self, Q, T): - if Q.shape[0] != self.m or T.shape[0] != self.n: - self.m = Q.shape[0] + self.m = Q.shape[0] + + need_new_obj = False + if self.n != T.shape[0]: + # need to re-compute shape self.n = T.shape[0] shape = pyfftw.next_fast_len(self.n) - self.rfft_Q_obj = pyfftw.builders.rfft( - np.empty(self.m), overwrite_input=True, n=shape, threads=self.threads + if self.shape != shape: + self.shape = shape + need_new_obj = True + + if need_new_obj: + # create input and output arrays and FFTW objects + self.real_arr = pyfftw.empty_aligned(self.shape, dtype="float64") + self.complex_arr = pyfftw.empty_aligned( + 1 + self.shape // 2, dtype="complex128" ) - self.rfft_T_obj = pyfftw.builders.rfft( - np.empty(self.n), overwrite_input=True, n=shape, threads=self.threads + + self.rfft_obj = pyfftw.FFTW( + input_array=self.real_arr, + output_array=self.complex_arr, + flags=("FFTW_MEASURE",), + direction="FFTW_FORWARD", + threads=self.threads, ) - self.irfft_obj = pyfftw.builders.irfft( - self.rfft_Q_obj.output_array, - overwrite_input=True, - n=shape, + + self.irfft_obj = pyfftw.FFTW( + input_array=self.complex_arr, + output_array=self.real_arr, + flags=("FFTW_MEASURE", "FFTW_DESTROY_INPUT"), + direction="FFTW_BACKWARD", threads=self.threads, ) - Qr = Q[::-1] # Reverse/flip Q - rfft_padded_Q = self.rfft_Q_obj(Qr) - rfft_padded_T = self.rfft_T_obj(T) + # RFFT(T) + self.real_arr[: self.n] = T + self.real_arr[self.n : self.shape] = 0.0 + self.rfft_obj.execute() # output is in self.complex_arr + complex_arr_T = self.complex_arr.copy() + + # RFFT(Q) + self.real_arr[: self.m] = Q[::-1] / self.shape # reversed Q and scale + self.real_arr[self.m : self.shape] = 0.0 + self.rfft_obj.execute() # output is in self.complex_arr + + # RFFT(T) * RFFT(Q) + np.multiply(self.complex_arr, complex_arr_T, out=self.complex_arr) + self.irfft_obj.execute() # output is in self.real_arr - return self.irfft_obj(np.multiply(rfft_padded_Q, rfft_padded_T)).real[ - self.m - 1 : self.n - ] + return self.real_arr[self.m - 1 : self.n] _sliding_dot_product = SLIDING_DOT_PRODUCT() From aa592db8ed84c9f360cee00a2dd8b8f76fcc0081 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 12 Dec 2025 11:09:33 -0500 Subject: [PATCH 04/21] improve comments --- sdp/pyfftw_sdp.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index c0df9c1..61177dd 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -58,7 +58,11 @@ def __call__(self, Q, T): # RFFT(T) * RFFT(Q) np.multiply(self.complex_arr, complex_arr_T, out=self.complex_arr) - self.irfft_obj.execute() # output is in self.real_arr + + # IRFFT + # input is in self.complex_arr + # output is in self.real_arr + self.irfft_obj.execute() return self.real_arr[self.m - 1 : self.n] From d8f0423030b23b3860300a4ebeb2688908853e16 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 12 Dec 2025 11:10:07 -0500 Subject: [PATCH 05/21] fix black formating --- sdp/challenger_sdp.py | 2 +- sdp/pyfftw_sdp.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index 452b197..ce60e8d 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -11,4 +11,4 @@ def sliding_dot_product(Q, T): out = np.empty(l) for i in range(l): out[i] = np.dot(Q, T[i : i + m]) - return out \ No newline at end of file + return out diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index 61177dd..ff77360 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -58,8 +58,8 @@ def __call__(self, Q, T): # RFFT(T) * RFFT(Q) np.multiply(self.complex_arr, complex_arr_T, out=self.complex_arr) - - # IRFFT + + # IRFFT # input is in self.complex_arr # output is in self.real_arr self.irfft_obj.execute() From 73447a70fbda592a312009b6499c5b8fe58d7d4e Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 16 Dec 2025 23:46:14 -0500 Subject: [PATCH 06/21] addressed comments --- sdp/pyfftw_sdp.py | 93 ++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index ff77360..ad5e278 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -1,79 +1,90 @@ import pyfftw import numpy as np +from . import config class SLIDING_DOT_PRODUCT: # https://stackoverflow.com/a/30615425/2955541 - def __init__(self): - self.m = 0 + def __init__(self, max_shape=2**10): + """ + Parameters + ---------- + max_shape : int + Maximum shape to preallocate arrays for. This will be the size of the + the real-valued array. A complex-valued array of size 1 + max_shape // 2 + will also be preallocated. + """ self.n = 0 self.shape = 0 - self.threads = 1 + self.real_arr = pyfftw.empty_aligned(max_shape, dtype="float64") + self.complex_arr = pyfftw.empty_aligned(1 + max_shape // 2, dtype="complex128") + self.rfft_irfft_objects = {} - def __call__(self, Q, T): - self.m = Q.shape[0] - - need_new_obj = False + def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): + m = Q.shape[0] if self.n != T.shape[0]: - # need to re-compute shape self.n = T.shape[0] - shape = pyfftw.next_fast_len(self.n) - if self.shape != shape: - self.shape = shape - need_new_obj = True + self.shape = pyfftw.next_fast_len(self.n) - if need_new_obj: - # create input and output arrays and FFTW objects + if self.shape > len(self.real_arr): self.real_arr = pyfftw.empty_aligned(self.shape, dtype="float64") self.complex_arr = pyfftw.empty_aligned( 1 + self.shape // 2, dtype="complex128" ) - - self.rfft_obj = pyfftw.FFTW( - input_array=self.real_arr, - output_array=self.complex_arr, - flags=("FFTW_MEASURE",), + real_arr = self.real_arr[: self.shape] + complex_arr = self.complex_arr[: 1 + self.shape // 2] + + rfft_irfft_obj = self.rfft_irfft_objects.get(self.shape, None) + if rfft_irfft_obj is None: + rfft_obj = pyfftw.FFTW( + input_array=real_arr, + output_array=complex_arr, direction="FFTW_FORWARD", - threads=self.threads, + flags=(planning_flag,), + threads=threads, ) - - self.irfft_obj = pyfftw.FFTW( - input_array=self.complex_arr, - output_array=self.real_arr, - flags=("FFTW_MEASURE", "FFTW_DESTROY_INPUT"), + irfft_obj = pyfftw.FFTW( + input_array=complex_arr, + output_array=real_arr, direction="FFTW_BACKWARD", - threads=self.threads, + flags=(planning_flag, "FFTW_DESTROY_INPUT"), + threads=threads, ) + self.rfft_irfft_objects[self.shape] = (rfft_obj, irfft_obj) + else: + rfft_obj, irfft_obj = rfft_irfft_obj + rfft_obj.update_arrays(real_arr, complex_arr) + irfft_obj.update_arrays(complex_arr, real_arr) # RFFT(T) - self.real_arr[: self.n] = T - self.real_arr[self.n : self.shape] = 0.0 - self.rfft_obj.execute() # output is in self.complex_arr - complex_arr_T = self.complex_arr.copy() + real_arr[: self.n] = T + real_arr[self.n :] = 0.0 + rfft_obj.execute() # output is in self.complex_arr + complex_arr_T = complex_arr.copy() # RFFT(Q) - self.real_arr[: self.m] = Q[::-1] / self.shape # reversed Q and scale - self.real_arr[self.m : self.shape] = 0.0 - self.rfft_obj.execute() # output is in self.complex_arr + real_arr[:m] = Q[::-1] / self.shape # reversed Q and scale + real_arr[m:] = 0.0 + rfft_obj.execute() # output is in self.complex_arr # RFFT(T) * RFFT(Q) - np.multiply(self.complex_arr, complex_arr_T, out=self.complex_arr) + np.multiply(complex_arr, complex_arr_T, out=complex_arr) # IRFFT # input is in self.complex_arr # output is in self.real_arr - self.irfft_obj.execute() + irfft_obj.execute() - return self.real_arr[self.m - 1 : self.n] + return real_arr[m - 1 : self.n] -_sliding_dot_product = SLIDING_DOT_PRODUCT() +_sliding_dot_product = SLIDING_DOT_PRODUCT(max_shape=2**10) -def setup(Q, T): - _sliding_dot_product(Q, T) +def setup(Q, T, threads=1, planning_flag="FFTW_MEASURE"): + _sliding_dot_product(Q, T, threads=threads, planning_flag=planning_flag) return -def sliding_dot_product(Q, T): - return _sliding_dot_product(Q, T) +def sliding_dot_product(Q, T, threads=1, planning_flag="FFTW_MEASURE"): + return _sliding_dot_product(Q, T, threads=threads, planning_flag=planning_flag) From 47ec8b7590f358b33c7b467c77dfbf9843bcf3a6 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 16 Dec 2025 23:51:17 -0500 Subject: [PATCH 07/21] fixed unnecessary import --- sdp/pyfftw_sdp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index ad5e278..edb79e9 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -1,6 +1,5 @@ import pyfftw import numpy as np -from . import config class SLIDING_DOT_PRODUCT: From c2d524792d7cc521f0b58e8f341850bd818ca91c Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 21:35:50 -0500 Subject: [PATCH 08/21] add comment to improve clarity --- sdp/pyfftw_sdp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index edb79e9..964ad33 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -62,7 +62,9 @@ def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): complex_arr_T = complex_arr.copy() # RFFT(Q) - real_arr[:m] = Q[::-1] / self.shape # reversed Q and scale + # Scale by 1/shape to account for + # FFTW's unnormalized inverse FFT via execute() + real_arr[:m] = Q[::-1] / self.shape real_arr[m:] = 0.0 rfft_obj.execute() # output is in self.complex_arr From 4014bf8dc85e38b2df5f787ac9e38180ae86e3a0 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 22:07:18 -0500 Subject: [PATCH 09/21] addressed comments --- sdp/pyfftw_sdp.py | 64 ++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index 964ad33..fff71e0 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -4,37 +4,45 @@ class SLIDING_DOT_PRODUCT: # https://stackoverflow.com/a/30615425/2955541 - def __init__(self, max_shape=2**10): + def __init__(self, max_n=2**10): """ Parameters ---------- - max_shape : int - Maximum shape to preallocate arrays for. This will be the size of the - the real-valued array. A complex-valued array of size 1 + max_shape // 2 + max_n : int + Maximum length to preallocate arrays for. This will be the size of the + the real-valued array. A complex-valued array of size `1 + (max_n // 2)` will also be preallocated. """ self.n = 0 - self.shape = 0 - self.real_arr = pyfftw.empty_aligned(max_shape, dtype="float64") - self.complex_arr = pyfftw.empty_aligned(1 + max_shape // 2, dtype="complex128") - self.rfft_irfft_objects = {} + self.next_fast_n = 0 + + # Preallocate arrays + self.real_arr = pyfftw.empty_aligned(max_n, dtype="float64") + self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype="complex128") + + # Store FFTW objects in a dict keyed by `next_fast_n`, where n=len(T) + self.rfft_objects = {} + self.irfft_objects = {} def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): m = Q.shape[0] if self.n != T.shape[0]: self.n = T.shape[0] - self.shape = pyfftw.next_fast_len(self.n) + self.next_fast_n = pyfftw.next_fast_len(self.n) - if self.shape > len(self.real_arr): - self.real_arr = pyfftw.empty_aligned(self.shape, dtype="float64") + # Update preallocated arrays if needed + if self.next_fast_n > len(self.real_arr): + self.real_arr = pyfftw.empty_aligned(self.next_fast_n, dtype="float64") self.complex_arr = pyfftw.empty_aligned( - 1 + self.shape // 2, dtype="complex128" + 1 + (self.next_fast_n // 2), dtype="complex128" ) - real_arr = self.real_arr[: self.shape] - complex_arr = self.complex_arr[: 1 + self.shape // 2] - rfft_irfft_obj = self.rfft_irfft_objects.get(self.shape, None) - if rfft_irfft_obj is None: + real_arr = self.real_arr[: self.next_fast_n] + complex_arr = self.complex_arr[: 1 + (self.next_fast_n // 2)] + + # Get or create FFTW objects + rfft_obj = self.rfft_objects.get(self.next_fast_n, None) + if rfft_obj is None: rfft_obj = pyfftw.FFTW( input_array=real_arr, output_array=complex_arr, @@ -42,6 +50,12 @@ def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): flags=(planning_flag,), threads=threads, ) + self.rfft_objects[self.next_fast_n] = rfft_obj + else: + rfft_obj.update_arrays(real_arr, complex_arr) + + irfft_obj = self.irfft_objects.get(self.next_fast_n, None) + if irfft_obj is None: irfft_obj = pyfftw.FFTW( input_array=complex_arr, output_array=real_arr, @@ -49,37 +63,35 @@ def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): flags=(planning_flag, "FFTW_DESTROY_INPUT"), threads=threads, ) - self.rfft_irfft_objects[self.shape] = (rfft_obj, irfft_obj) + self.irfft_objects[self.next_fast_n] = irfft_obj else: - rfft_obj, irfft_obj = rfft_irfft_obj - rfft_obj.update_arrays(real_arr, complex_arr) irfft_obj.update_arrays(complex_arr, real_arr) # RFFT(T) real_arr[: self.n] = T real_arr[self.n :] = 0.0 - rfft_obj.execute() # output is in self.complex_arr + rfft_obj.execute() # output is in complex_arr complex_arr_T = complex_arr.copy() # RFFT(Q) - # Scale by 1/shape to account for + # Scale by 1/next_fast_n to account for # FFTW's unnormalized inverse FFT via execute() - real_arr[:m] = Q[::-1] / self.shape + real_arr[:m] = Q[::-1] / self.next_fast_n real_arr[m:] = 0.0 - rfft_obj.execute() # output is in self.complex_arr + rfft_obj.execute() # output is in complex_arr # RFFT(T) * RFFT(Q) np.multiply(complex_arr, complex_arr_T, out=complex_arr) # IRFFT - # input is in self.complex_arr - # output is in self.real_arr + # input is in complex_arr + # output will be in real_arr irfft_obj.execute() return real_arr[m - 1 : self.n] -_sliding_dot_product = SLIDING_DOT_PRODUCT(max_shape=2**10) +_sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**10) def setup(Q, T, threads=1, planning_flag="FFTW_MEASURE"): From f9d4bb39f7fc4c56f7121f542d3bf0460c18de11 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 22:07:50 -0500 Subject: [PATCH 10/21] increase the default value of max_n parameter --- sdp/pyfftw_sdp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index fff71e0..e4a6b77 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -4,7 +4,7 @@ class SLIDING_DOT_PRODUCT: # https://stackoverflow.com/a/30615425/2955541 - def __init__(self, max_n=2**10): + def __init__(self, max_n=2**20): """ Parameters ---------- @@ -91,7 +91,7 @@ def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): return real_arr[m - 1 : self.n] -_sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**10) +_sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**20) def setup(Q, T, threads=1, planning_flag="FFTW_MEASURE"): From 1f1bae60cd57dfcdb434494a5a227f00b95a435d Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 22:20:58 -0500 Subject: [PATCH 11/21] added test function --- test.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test.py b/test.py index c72a6b3..565b2a0 100644 --- a/test.py +++ b/test.py @@ -192,3 +192,19 @@ def test_setup(): raise e return + + +def test_pyfftw_max_n_needs_update(): + from sdp.pyfftw_sdp import SLIDING_DOT_PRODUCT + + sdp_obj = SLIDING_DOT_PRODUCT(max_n=2**10) + n_T = 2**12 # larger than max_n + n_Q = 2**8 + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) + + sdp_comp = sdp_obj(Q, T) + sdf_ref = naive_sliding_dot_product(Q, T) + np.testing.assert_allclose(sdp_comp, sdf_ref) + + return From 6954b318c465b9d4baa97acba094c85864ed605a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 22:21:33 -0500 Subject: [PATCH 12/21] renamed test function --- test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test.py b/test.py index 565b2a0..b6ab1f1 100644 --- a/test.py +++ b/test.py @@ -194,7 +194,7 @@ def test_setup(): return -def test_pyfftw_max_n_needs_update(): +def test_pyfftw_sdp_max_n(): from sdp.pyfftw_sdp import SLIDING_DOT_PRODUCT sdp_obj = SLIDING_DOT_PRODUCT(max_n=2**10) @@ -203,8 +203,8 @@ def test_pyfftw_max_n_needs_update(): Q = np.random.rand(n_Q) T = np.random.rand(n_T) - sdp_comp = sdp_obj(Q, T) - sdf_ref = naive_sliding_dot_product(Q, T) + QT_comp = sdp_obj(Q, T) + QT_ref = naive_sliding_dot_product(Q, T) np.testing.assert_allclose(sdp_comp, sdf_ref) return From 1acd7619e312374eea2cb12c37d47823211c2725 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 22:40:29 -0500 Subject: [PATCH 13/21] fixed minor issue --- test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.py b/test.py index b6ab1f1..b9baa63 100644 --- a/test.py +++ b/test.py @@ -205,6 +205,6 @@ def test_pyfftw_sdp_max_n(): QT_comp = sdp_obj(Q, T) QT_ref = naive_sliding_dot_product(Q, T) - np.testing.assert_allclose(sdp_comp, sdf_ref) + np.testing.assert_allclose(QT_ref, QT_comp) return From f013dd753b3c2013177464bd807cf0ba3ff4bda6 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 22:49:47 -0500 Subject: [PATCH 14/21] minor change --- test.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/test.py b/test.py index b9baa63..b41ed18 100644 --- a/test.py +++ b/test.py @@ -197,14 +197,12 @@ def test_setup(): def test_pyfftw_sdp_max_n(): from sdp.pyfftw_sdp import SLIDING_DOT_PRODUCT - sdp_obj = SLIDING_DOT_PRODUCT(max_n=2**10) - n_T = 2**12 # larger than max_n - n_Q = 2**8 - Q = np.random.rand(n_Q) - T = np.random.rand(n_T) - - QT_comp = sdp_obj(Q, T) - QT_ref = naive_sliding_dot_product(Q, T) - np.testing.assert_allclose(QT_ref, QT_comp) + sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**10) + T = np.random.rand(2**12) # len(T) is larger than max_n + Q = np.random.rand(2**8) + + comp = sliding_dot_product(Q, T) + ref = naive_sliding_dot_product(Q, T) + np.testing.assert_allclose(comp, ref) return From 20318d9eb6328f3fd027bf59f1f8ed78b343296e Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 17 Dec 2025 22:53:39 -0500 Subject: [PATCH 15/21] Revised comment --- sdp/pyfftw_sdp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index e4a6b77..70abfbf 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -85,8 +85,7 @@ def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): # IRFFT # input is in complex_arr - # output will be in real_arr - irfft_obj.execute() + irfft_obj.execute() # output is in real_arr return real_arr[m - 1 : self.n] From c79ea81ac2acefb1c4b13b4102537995d321e082 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 19 Dec 2025 15:55:49 -0500 Subject: [PATCH 16/21] addressed comments --- sdp/pyfftw_sdp.py | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index 70abfbf..88de349 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -20,15 +20,38 @@ def __init__(self, max_n=2**20): self.real_arr = pyfftw.empty_aligned(max_n, dtype="float64") self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype="complex128") - # Store FFTW objects in a dict keyed by `next_fast_n`, where n=len(T) + # Store FFTW objects, keyed by (next_fast_n, n_threads, planning_flag) self.rfft_objects = {} self.irfft_objects = {} - def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): + def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): + """ + Parameters + ---------- + Q : numpy.ndarray + Query array or subsequence + + T : numpy.ndarray + Time series or sequence + + n_threads : int, default=1 + Number of threads to use for FFTW computations. + + planning_flag : str, default="FFTW_MEASURE" + The planning flag that will be used in FFTW for planning. + See pyfftw documentation for details. Current options include: + "FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", and "FFTW_EXHAUSTIVE". + + Returns + ------- + out : numpy.ndarray + Sliding dot product between `Q` and `T`. + """ m = Q.shape[0] if self.n != T.shape[0]: self.n = T.shape[0] self.next_fast_n = pyfftw.next_fast_len(self.n) + key = (self.next_fast_n, n_threads, planning_flag) # Update preallocated arrays if needed if self.next_fast_n > len(self.real_arr): @@ -41,29 +64,29 @@ def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): complex_arr = self.complex_arr[: 1 + (self.next_fast_n // 2)] # Get or create FFTW objects - rfft_obj = self.rfft_objects.get(self.next_fast_n, None) + rfft_obj = self.rfft_objects.get(key, None) if rfft_obj is None: rfft_obj = pyfftw.FFTW( input_array=real_arr, output_array=complex_arr, direction="FFTW_FORWARD", flags=(planning_flag,), - threads=threads, + threads=n_threads, ) - self.rfft_objects[self.next_fast_n] = rfft_obj + self.rfft_objects[key] = rfft_obj else: rfft_obj.update_arrays(real_arr, complex_arr) - irfft_obj = self.irfft_objects.get(self.next_fast_n, None) + irfft_obj = self.irfft_objects.get(key, None) if irfft_obj is None: irfft_obj = pyfftw.FFTW( input_array=complex_arr, output_array=real_arr, direction="FFTW_BACKWARD", flags=(planning_flag, "FFTW_DESTROY_INPUT"), - threads=threads, + threads=n_threads, ) - self.irfft_objects[self.next_fast_n] = irfft_obj + self.irfft_objects[key] = irfft_obj else: irfft_obj.update_arrays(complex_arr, real_arr) @@ -93,10 +116,10 @@ def __call__(self, Q, T, threads=1, planning_flag="FFTW_MEASURE"): _sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**20) -def setup(Q, T, threads=1, planning_flag="FFTW_MEASURE"): - _sliding_dot_product(Q, T, threads=threads, planning_flag=planning_flag) +def setup(Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): + _sliding_dot_product(Q, T, n_threads=n_threads, planning_flag=planning_flag) return -def sliding_dot_product(Q, T, threads=1, planning_flag="FFTW_MEASURE"): - return _sliding_dot_product(Q, T, threads=threads, planning_flag=planning_flag) +def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): + return _sliding_dot_product(Q, T, n_threads=n_threads, planning_flag=planning_flag) From 70bf879401127c511a2fd3e7a5387ff2e0e7c62c Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 19 Dec 2025 20:33:45 -0500 Subject: [PATCH 17/21] minor changes --- sdp/pyfftw_sdp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index 88de349..e031d30 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -51,7 +51,6 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): if self.n != T.shape[0]: self.n = T.shape[0] self.next_fast_n = pyfftw.next_fast_len(self.n) - key = (self.next_fast_n, n_threads, planning_flag) # Update preallocated arrays if needed if self.next_fast_n > len(self.real_arr): @@ -64,6 +63,8 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): complex_arr = self.complex_arr[: 1 + (self.next_fast_n // 2)] # Get or create FFTW objects + key = (self.next_fast_n, n_threads, planning_flag) + rfft_obj = self.rfft_objects.get(key, None) if rfft_obj is None: rfft_obj = pyfftw.FFTW( @@ -113,7 +114,7 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): return real_arr[m - 1 : self.n] -_sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**20) +_sliding_dot_product = SLIDING_DOT_PRODUCT() def setup(Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): From b259eae12b72962cc424d37cea1bc7552e9dcf38 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 19 Dec 2025 20:34:51 -0500 Subject: [PATCH 18/21] minor change in docstring --- sdp/pyfftw_sdp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index e031d30..ea5a7a3 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -29,10 +29,10 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): Parameters ---------- Q : numpy.ndarray - Query array or subsequence + Query array or subsequence. T : numpy.ndarray - Time series or sequence + Time series or sequence. n_threads : int, default=1 Number of threads to use for FFTW computations. From a419895938a3985b81d1e2898097bffec40cd9d7 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 19 Dec 2025 20:35:51 -0500 Subject: [PATCH 19/21] minor change in docstring --- sdp/pyfftw_sdp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index ea5a7a3..3affb2f 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -26,6 +26,8 @@ def __init__(self, max_n=2**20): def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): """ + Compute the sliding dot product between `Q` and `T` using FFTW via pyfftw. + Parameters ---------- Q : numpy.ndarray From c358de90de8bb8cc764b89fa155bc5974e59abd0 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 19 Dec 2025 20:42:08 -0500 Subject: [PATCH 20/21] add comment to test function to show its purpose --- test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index b41ed18..d1b80e8 100644 --- a/test.py +++ b/test.py @@ -195,14 +195,18 @@ def test_setup(): def test_pyfftw_sdp_max_n(): + # When `len(T)` larger than `max_n` in pyfftw_sdp, + # the internal preallocated arrays should be resized. + # This test checks that functionality. from sdp.pyfftw_sdp import SLIDING_DOT_PRODUCT - sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**10) - T = np.random.rand(2**12) # len(T) is larger than max_n + T = np.random.rand(2**12) Q = np.random.rand(2**8) + sliding_dot_product = SLIDING_DOT_PRODUCT(max_n=2**10) comp = sliding_dot_product(Q, T) ref = naive_sliding_dot_product(Q, T) + np.testing.assert_allclose(comp, ref) return From 28b1b75fd1ca5936fa720c7b4bb06bcc23596b00 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 20 Dec 2025 01:31:20 -0500 Subject: [PATCH 21/21] avoid tracking len(T) via attribute --- sdp/pyfftw_sdp.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/sdp/pyfftw_sdp.py b/sdp/pyfftw_sdp.py index 3affb2f..55e9454 100644 --- a/sdp/pyfftw_sdp.py +++ b/sdp/pyfftw_sdp.py @@ -13,9 +13,6 @@ def __init__(self, max_n=2**20): the real-valued array. A complex-valued array of size `1 + (max_n // 2)` will also be preallocated. """ - self.n = 0 - self.next_fast_n = 0 - # Preallocate arrays self.real_arr = pyfftw.empty_aligned(max_n, dtype="float64") self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype="complex128") @@ -50,22 +47,21 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): Sliding dot product between `Q` and `T`. """ m = Q.shape[0] - if self.n != T.shape[0]: - self.n = T.shape[0] - self.next_fast_n = pyfftw.next_fast_len(self.n) + n = T.shape[0] + next_fast_n = pyfftw.next_fast_len(n) # Update preallocated arrays if needed - if self.next_fast_n > len(self.real_arr): - self.real_arr = pyfftw.empty_aligned(self.next_fast_n, dtype="float64") + if next_fast_n > len(self.real_arr): + self.real_arr = pyfftw.empty_aligned(next_fast_n, dtype="float64") self.complex_arr = pyfftw.empty_aligned( - 1 + (self.next_fast_n // 2), dtype="complex128" + 1 + (next_fast_n // 2), dtype="complex128" ) - real_arr = self.real_arr[: self.next_fast_n] - complex_arr = self.complex_arr[: 1 + (self.next_fast_n // 2)] + real_arr = self.real_arr[:next_fast_n] + complex_arr = self.complex_arr[: 1 + (next_fast_n // 2)] # Get or create FFTW objects - key = (self.next_fast_n, n_threads, planning_flag) + key = (next_fast_n, n_threads, planning_flag) rfft_obj = self.rfft_objects.get(key, None) if rfft_obj is None: @@ -94,26 +90,25 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_MEASURE"): irfft_obj.update_arrays(complex_arr, real_arr) # RFFT(T) - real_arr[: self.n] = T - real_arr[self.n :] = 0.0 + real_arr[:n] = T + real_arr[n:] = 0.0 rfft_obj.execute() # output is in complex_arr complex_arr_T = complex_arr.copy() # RFFT(Q) # Scale by 1/next_fast_n to account for # FFTW's unnormalized inverse FFT via execute() - real_arr[:m] = Q[::-1] / self.next_fast_n + real_arr[:m] = Q[::-1] / next_fast_n real_arr[m:] = 0.0 rfft_obj.execute() # output is in complex_arr # RFFT(T) * RFFT(Q) np.multiply(complex_arr, complex_arr_T, out=complex_arr) - # IRFFT - # input is in complex_arr + # IRFFT (input is in complex_arr) irfft_obj.execute() # output is in real_arr - return real_arr[m - 1 : self.n] + return real_arr[m - 1 : n] _sliding_dot_product = SLIDING_DOT_PRODUCT()