33from scipy .linalg import eigh
44from .base import BasisGenerator
55
6+ try :
7+ import cupy as cp
8+ from cupy .linalg import eigh as cp_eigh
9+ HAS_CUPY = True
10+
11+ # Pre-computed gamma values for Bessel function
12+ GAMMA_1_6 = 5.56631600178
13+ GAMMA_11_6 = 0.94065585824
14+
15+ # Custom GPU kernel for K_{5/6} Bessel function (optimized for float64)
16+ _kv56_kernel_float64 = cp .ElementwiseKernel (
17+ 'float64 z' ,
18+ 'float64 K' ,
19+ '''
20+ double v = 5.0 / 6.0;
21+ double z_abs = fabs(z);
22+ if (z_abs < 2.0) {
23+ // Series approximation for small z
24+ if (z_abs < 1e-12) {
25+ K = 1.89718990814 * pow(z_abs, -5.0/6.0);
26+ return;
27+ }
28+
29+ double half_z = 0.5 * z;
30+ double half_z_sq = half_z * half_z;
31+ double z_pow_v = pow(half_z, v);
32+ double z_pow_neg_v = pow(half_z, -v);
33+
34+ double sum_a = z_pow_v / gamma_11_6;
35+ double sum_b = z_pow_neg_v / gamma_1_6;
36+ double term_a = sum_a;
37+ double term_b = sum_b;
38+
39+ double prev_sum_a = 0.0;
40+ double prev_sum_b = 0.0;
41+ int k = 1;
42+ double tol = 1e-15;
43+
44+ for (int i = 0; i < 100; ++i) {
45+ double k_plus_v = k + v;
46+ double k_minus_v = k - v;
47+
48+ double factor_a = half_z_sq / (k * k_plus_v);
49+ double factor_b = half_z_sq / (k * k_minus_v);
50+
51+ term_a *= factor_a;
52+ term_b *= factor_b;
53+ sum_a += term_a;
54+ sum_b += term_b;
55+
56+ if ((i & 1) == 1) {
57+ double rel_change_a = fabs(sum_a - prev_sum_a) / fabs(sum_a);
58+ double rel_change_b = fabs(sum_b - prev_sum_b) / fabs(sum_b);
59+
60+ if (rel_change_a < tol && rel_change_b < tol) {
61+ break;
62+ }
63+ prev_sum_a = sum_a;
64+ prev_sum_b = sum_b;
65+ }
66+ k += 1;
67+ }
68+ K = M_PI * (sum_b - sum_a);
69+ } else {
70+ // Asymptotic approximation for larger z
71+ double z_inv = 1.0 / z;
72+
73+ double sum_terms = 1.0 + z_inv * (2.0/9.0 + z_inv * (
74+ -7.0/81.0 + z_inv * (175.0/2187.0 + z_inv * (
75+ -2275.0/19683.0 + z_inv * 5005.0/177147.0
76+ ))));
77+
78+ double sqrt_term = sqrt(M_PI / (2.0 * z));
79+ double exp_term = exp(-z);
80+ K = sqrt_term * exp_term * sum_terms;
81+ }
82+ ''' ,
83+ name = 'kv56_kernel_float64' ,
84+ preamble = f'''
85+ const double gamma_1_6 = { GAMMA_1_6 } ;
86+ const double gamma_11_6 = { GAMMA_11_6 } ;
87+ '''
88+ )
89+
90+ except ImportError :
91+ cp = None
92+ cp_eigh = None
93+ HAS_CUPY = False
94+
695class KLBasisGenerator (BasisGenerator ):
796 """
897 Generates Karhunen-Loève modes based on Von Kármán statistics.
998 """
1099
11- def __init__ (self , positions : np .ndarray , fried_parameter : float = 0.16 , outer_scale : float = 30.0 ):
100+ def __init__ (self , positions : np .ndarray , fried_parameter : float = 0.16 , outer_scale : float = 30.0 , use_gpu : bool = False ):
12101 super ().__init__ (positions )
13102 self .fried_parameter = fried_parameter
14103 self .outer_scale = outer_scale
15104 self .eigenvalues = None
105+ self .use_gpu = use_gpu
106+
107+ if self .use_gpu and not HAS_CUPY :
108+ print ("Warning: CuPy not found. Falling back to CPU." )
109+ self .use_gpu = False
16110
17111 def _von_karman_covariance (self ) -> np .ndarray :
18112 """Compute the Von Karman phase covariance matrix."""
113+ if self .use_gpu :
114+ return self ._von_karman_covariance_gpu ()
115+ else :
116+ return self ._von_karman_covariance_cpu ()
117+
118+ def _von_karman_covariance_cpu (self ) -> np .ndarray :
119+ """Compute the Von Karman phase covariance matrix on CPU."""
19120 diffs = self .positions [:, None , :] - self .positions [None , :, :]
20121 r = np .linalg .norm (diffs , axis = - 1 )
21122
22123 L0 = self .outer_scale
23124 r0 = self .fried_parameter
24125
25126 # Variance sigma^2 calculation to match structure function limit
26- # D(r) = 6.88 * (r/r0)^(5/3) for small r
27- # sigma^2 = A * (L0/r0)^(5/3)
28127 A = (5.0 / 6.0 ) * (6.88 / 2.0 ) * gamma (5.0 / 6.0 ) / (gamma (1.0 / 6.0 ) * np .pi ** (5.0 / 3.0 ))
29128 sigma2 = A * (L0 / r0 )** (5.0 / 3.0 )
30129
@@ -40,21 +139,74 @@ def _von_karman_covariance(self) -> np.ndarray:
40139
41140 cov [~ mask ] = sigma2
42141 return cov
43-
44- def generate (self , n_modes : int , ignore_piston : bool = False , ** kwargs ) -> np .ndarray :
45- cov = self ._von_karman_covariance ()
46- eigenvalues , eigenvectors = eigh (cov )
142+
143+ def _von_karman_covariance_gpu (self ):
144+ """Compute the Von Karman phase covariance matrix on GPU."""
145+ # Transfer positions to GPU
146+ positions_gpu = cp .asarray (self .positions , dtype = cp .float64 )
147+
148+ # Compute pairwise distances on GPU
149+ diffs = positions_gpu [:, None , :] - positions_gpu [None , :, :]
150+ r = cp .linalg .norm (diffs , axis = - 1 )
47151
48- # Sort descending
49- sorter = np .argsort (eigenvalues )[::- 1 ]
152+ L0 = self .outer_scale
153+ r0 = self .fried_parameter
154+
155+ # Compute sigma^2 using GPU operations
156+ nu = 5.0 / 6.0
157+ gamma_5_6 = float (gamma (5.0 / 6.0 ))
158+ gamma_1_6 = float (gamma (1.0 / 6.0 ))
159+ A = (5.0 / 6.0 ) * (6.88 / 2.0 ) * gamma_5_6 / (gamma_1_6 * cp .pi ** (5.0 / 3.0 ))
160+ sigma2 = A * (L0 / r0 )** (5.0 / 3.0 )
161+
162+ # Compute covariance for all distances
163+ u = 2 * cp .pi * r / L0
164+ norm_factor = 2 ** (1 - nu ) / gamma (nu )
50165
51- sorted_eigenvalues = eigenvalues [sorter ]
52- sorted_eigenvectors = eigenvectors [:, sorter ]
166+ # Use custom GPU kernel for Bessel function K_{5/6}
167+ kv_values = cp .zeros_like (u , dtype = cp .float64 )
168+ _kv56_kernel_float64 (u , kv_values )
53169
54- start_idx = 1 if ignore_piston else 0
55- end_idx = start_idx + n_modes
170+ # Compute covariance matrix
171+ cov = sigma2 * norm_factor * ( u ** nu ) * kv_values
56172
57- self .eigenvalues = sorted_eigenvalues [start_idx :end_idx ]
58- self .modes = sorted_eigenvectors [:, start_idx :end_idx ]
173+ # Handle zero/very small distances (diagonal or very close points)
174+ mask = r <= 1e-9
175+ cov = cp .where (mask , sigma2 , cov )
176+
177+ return cov
178+
179+ def generate (self , n_modes : int , ignore_piston : bool = False , ** kwargs ) -> np .ndarray :
180+ cov = self ._von_karman_covariance ()
181+
182+ if self .use_gpu :
183+ # Covariance is already on GPU, compute eigendecomposition
184+ eigenvalues , eigenvectors = cp_eigh (cov )
185+
186+ # Sort descending on GPU
187+ sorter = cp .argsort (eigenvalues )[::- 1 ]
188+ sorted_eigenvalues = eigenvalues [sorter ]
189+ sorted_eigenvectors = eigenvectors [:, sorter ]
190+
191+ start_idx = 1 if ignore_piston else 0
192+ end_idx = start_idx + n_modes
193+
194+ # Extract modes and eigenvalues, then transfer to CPU
195+ self .eigenvalues = cp .asnumpy (sorted_eigenvalues [start_idx :end_idx ])
196+ self .modes = cp .asnumpy (sorted_eigenvectors [:, start_idx :end_idx ])
197+ else :
198+ # CPU path - cov is already numpy array
199+ eigenvalues , eigenvectors = eigh (cov )
200+
201+ # Sort descending
202+ sorter = np .argsort (eigenvalues )[::- 1 ]
203+ sorted_eigenvalues = eigenvalues [sorter ]
204+ sorted_eigenvectors = eigenvectors [:, sorter ]
205+
206+ start_idx = 1 if ignore_piston else 0
207+ end_idx = start_idx + n_modes
208+
209+ self .eigenvalues = sorted_eigenvalues [start_idx :end_idx ]
210+ self .modes = sorted_eigenvectors [:, start_idx :end_idx ]
59211
60212 return self .modes
0 commit comments