@@ -67,15 +67,17 @@ static inline void launch_pre_den_sparse(const int* data_col_ind,
6767 data_col_ind, data_values, nnz, mean_array, den, counter);
6868}
6969
70- NB_MODULE (_autocorr_cuda, m) {
70+ template <typename Device>
71+ void register_bindings (nb::module_& m) {
7172 // morans_dense - float32
7273 m.def (
7374 " morans_dense" ,
74- [](cuda_array_c<const float > data_centered,
75- cuda_array_c<const int > adj_row_ptr,
76- cuda_array_c<const int > adj_col_ind,
77- cuda_array_c<const float > adj_data, cuda_array_c<float > num,
78- int n_samples, int n_features, std::uintptr_t stream) {
75+ [](gpu_array_c<const float , Device> data_centered,
76+ gpu_array_c<const int , Device> adj_row_ptr,
77+ gpu_array_c<const int , Device> adj_col_ind,
78+ gpu_array_c<const float , Device> adj_data,
79+ gpu_array_c<float , Device> num, int n_samples, int n_features,
80+ std::uintptr_t stream) {
7981 launch_morans_dense (data_centered.data (), adj_row_ptr.data (),
8082 adj_col_ind.data (), adj_data.data (), num.data (),
8183 n_samples, n_features, (cudaStream_t)stream);
@@ -85,11 +87,12 @@ NB_MODULE(_autocorr_cuda, m) {
8587 // morans_dense - float64
8688 m.def (
8789 " morans_dense" ,
88- [](cuda_array_c<const double > data_centered,
89- cuda_array_c<const int > adj_row_ptr,
90- cuda_array_c<const int > adj_col_ind,
91- cuda_array_c<const double > adj_data, cuda_array_c<double > num,
92- int n_samples, int n_features, std::uintptr_t stream) {
90+ [](gpu_array_c<const double , Device> data_centered,
91+ gpu_array_c<const int , Device> adj_row_ptr,
92+ gpu_array_c<const int , Device> adj_col_ind,
93+ gpu_array_c<const double , Device> adj_data,
94+ gpu_array_c<double , Device> num, int n_samples, int n_features,
95+ std::uintptr_t stream) {
9396 launch_morans_dense (data_centered.data (), adj_row_ptr.data (),
9497 adj_col_ind.data (), adj_data.data (), num.data (),
9598 n_samples, n_features, (cudaStream_t)stream);
@@ -100,14 +103,14 @@ NB_MODULE(_autocorr_cuda, m) {
100103 // morans_sparse - float32
101104 m.def (
102105 " morans_sparse" ,
103- [](cuda_array_c <const int > adj_row_ptr,
104- cuda_array_c <const int > adj_col_ind,
105- cuda_array_c <const float > adj_data,
106- cuda_array_c <const int > data_row_ptr,
107- cuda_array_c <const int > data_col_ind,
108- cuda_array_c <const float > data_values, int n_samples, int n_features ,
109- cuda_array_c <const float > mean_array, cuda_array_c< float > num ,
110- std::uintptr_t stream) {
106+ [](gpu_array_c <const int , Device > adj_row_ptr,
107+ gpu_array_c <const int , Device > adj_col_ind,
108+ gpu_array_c <const float , Device > adj_data,
109+ gpu_array_c <const int , Device > data_row_ptr,
110+ gpu_array_c <const int , Device > data_col_ind,
111+ gpu_array_c <const float , Device > data_values, int n_samples,
112+ int n_features, gpu_array_c <const float , Device> mean_array ,
113+ gpu_array_c< float , Device> num, std::uintptr_t stream) {
111114 launch_morans_sparse (adj_row_ptr.data (), adj_col_ind.data (),
112115 adj_data.data (), data_row_ptr.data (),
113116 data_col_ind.data (), data_values.data (),
@@ -120,14 +123,14 @@ NB_MODULE(_autocorr_cuda, m) {
120123 // morans_sparse - float64
121124 m.def (
122125 " morans_sparse" ,
123- [](cuda_array_c <const int > adj_row_ptr,
124- cuda_array_c <const int > adj_col_ind,
125- cuda_array_c <const double > adj_data,
126- cuda_array_c <const int > data_row_ptr,
127- cuda_array_c <const int > data_col_ind,
128- cuda_array_c <const double > data_values, int n_samples,
129- int n_features, cuda_array_c <const double > mean_array,
130- cuda_array_c <double > num, std::uintptr_t stream) {
126+ [](gpu_array_c <const int , Device > adj_row_ptr,
127+ gpu_array_c <const int , Device > adj_col_ind,
128+ gpu_array_c <const double , Device > adj_data,
129+ gpu_array_c <const int , Device > data_row_ptr,
130+ gpu_array_c <const int , Device > data_col_ind,
131+ gpu_array_c <const double , Device > data_values, int n_samples,
132+ int n_features, gpu_array_c <const double , Device > mean_array,
133+ gpu_array_c <double , Device > num, std::uintptr_t stream) {
131134 launch_morans_sparse (adj_row_ptr.data (), adj_col_ind.data (),
132135 adj_data.data (), data_row_ptr.data (),
133136 data_col_ind.data (), data_values.data (),
@@ -141,10 +144,12 @@ NB_MODULE(_autocorr_cuda, m) {
141144 // gearys_dense - float32
142145 m.def (
143146 " gearys_dense" ,
144- [](cuda_array_c<const float > data, cuda_array_c<const int > adj_row_ptr,
145- cuda_array_c<const int > adj_col_ind,
146- cuda_array_c<const float > adj_data, cuda_array_c<float > num,
147- int n_samples, int n_features, std::uintptr_t stream) {
147+ [](gpu_array_c<const float , Device> data,
148+ gpu_array_c<const int , Device> adj_row_ptr,
149+ gpu_array_c<const int , Device> adj_col_ind,
150+ gpu_array_c<const float , Device> adj_data,
151+ gpu_array_c<float , Device> num, int n_samples, int n_features,
152+ std::uintptr_t stream) {
148153 launch_gearys_dense (data.data (), adj_row_ptr.data (),
149154 adj_col_ind.data (), adj_data.data (), num.data (),
150155 n_samples, n_features, (cudaStream_t)stream);
@@ -154,10 +159,12 @@ NB_MODULE(_autocorr_cuda, m) {
154159 // gearys_dense - float64
155160 m.def (
156161 " gearys_dense" ,
157- [](cuda_array_c<const double > data, cuda_array_c<const int > adj_row_ptr,
158- cuda_array_c<const int > adj_col_ind,
159- cuda_array_c<const double > adj_data, cuda_array_c<double > num,
160- int n_samples, int n_features, std::uintptr_t stream) {
162+ [](gpu_array_c<const double , Device> data,
163+ gpu_array_c<const int , Device> adj_row_ptr,
164+ gpu_array_c<const int , Device> adj_col_ind,
165+ gpu_array_c<const double , Device> adj_data,
166+ gpu_array_c<double , Device> num, int n_samples, int n_features,
167+ std::uintptr_t stream) {
161168 launch_gearys_dense (data.data (), adj_row_ptr.data (),
162169 adj_col_ind.data (), adj_data.data (), num.data (),
163170 n_samples, n_features, (cudaStream_t)stream);
@@ -168,13 +175,14 @@ NB_MODULE(_autocorr_cuda, m) {
168175 // gearys_sparse - float32
169176 m.def (
170177 " gearys_sparse" ,
171- [](cuda_array_c<const int > adj_row_ptr,
172- cuda_array_c<const int > adj_col_ind,
173- cuda_array_c<const float > adj_data,
174- cuda_array_c<const int > data_row_ptr,
175- cuda_array_c<const int > data_col_ind,
176- cuda_array_c<const float > data_values, int n_samples, int n_features,
177- cuda_array_c<float > num, std::uintptr_t stream) {
178+ [](gpu_array_c<const int , Device> adj_row_ptr,
179+ gpu_array_c<const int , Device> adj_col_ind,
180+ gpu_array_c<const float , Device> adj_data,
181+ gpu_array_c<const int , Device> data_row_ptr,
182+ gpu_array_c<const int , Device> data_col_ind,
183+ gpu_array_c<const float , Device> data_values, int n_samples,
184+ int n_features, gpu_array_c<float , Device> num,
185+ std::uintptr_t stream) {
178186 launch_gearys_sparse (
179187 adj_row_ptr.data (), adj_col_ind.data (), adj_data.data (),
180188 data_row_ptr.data (), data_col_ind.data (), data_values.data (),
@@ -186,13 +194,14 @@ NB_MODULE(_autocorr_cuda, m) {
186194 // gearys_sparse - float64
187195 m.def (
188196 " gearys_sparse" ,
189- [](cuda_array_c<const int > adj_row_ptr,
190- cuda_array_c<const int > adj_col_ind,
191- cuda_array_c<const double > adj_data,
192- cuda_array_c<const int > data_row_ptr,
193- cuda_array_c<const int > data_col_ind,
194- cuda_array_c<const double > data_values, int n_samples,
195- int n_features, cuda_array_c<double > num, std::uintptr_t stream) {
197+ [](gpu_array_c<const int , Device> adj_row_ptr,
198+ gpu_array_c<const int , Device> adj_col_ind,
199+ gpu_array_c<const double , Device> adj_data,
200+ gpu_array_c<const int , Device> data_row_ptr,
201+ gpu_array_c<const int , Device> data_col_ind,
202+ gpu_array_c<const double , Device> data_values, int n_samples,
203+ int n_features, gpu_array_c<double , Device> num,
204+ std::uintptr_t stream) {
196205 launch_gearys_sparse (
197206 adj_row_ptr.data (), adj_col_ind.data (), adj_data.data (),
198207 data_row_ptr.data (), data_col_ind.data (), data_values.data (),
@@ -205,10 +214,11 @@ NB_MODULE(_autocorr_cuda, m) {
205214 // pre_den_sparse - float32
206215 m.def (
207216 " pre_den_sparse" ,
208- [](cuda_array_c<const int > data_col_ind,
209- cuda_array_c<const float > data_values, int nnz,
210- cuda_array_c<const float > mean_array, cuda_array_c<float > den,
211- cuda_array_c<int > counter, std::uintptr_t stream) {
217+ [](gpu_array_c<const int , Device> data_col_ind,
218+ gpu_array_c<const float , Device> data_values, int nnz,
219+ gpu_array_c<const float , Device> mean_array,
220+ gpu_array_c<float , Device> den, gpu_array_c<int , Device> counter,
221+ std::uintptr_t stream) {
212222 launch_pre_den_sparse (data_col_ind.data (), data_values.data (), nnz,
213223 mean_array.data (), den.data (), counter.data (),
214224 (cudaStream_t)stream);
@@ -218,14 +228,19 @@ NB_MODULE(_autocorr_cuda, m) {
218228 // pre_den_sparse - float64
219229 m.def (
220230 " pre_den_sparse" ,
221- [](cuda_array_c<const int > data_col_ind,
222- cuda_array_c<const double > data_values, int nnz,
223- cuda_array_c<const double > mean_array, cuda_array_c<double > den,
224- cuda_array_c<int > counter, std::uintptr_t stream) {
231+ [](gpu_array_c<const int , Device> data_col_ind,
232+ gpu_array_c<const double , Device> data_values, int nnz,
233+ gpu_array_c<const double , Device> mean_array,
234+ gpu_array_c<double , Device> den, gpu_array_c<int , Device> counter,
235+ std::uintptr_t stream) {
225236 launch_pre_den_sparse (data_col_ind.data (), data_values.data (), nnz,
226237 mean_array.data (), den.data (), counter.data (),
227238 (cudaStream_t)stream);
228239 },
229240 " data_col_ind" _a, " data_values" _a, nb::kw_only (), " nnz" _a,
230241 " mean_array" _a, " den" _a, " counter" _a, " stream" _a = 0 );
231242}
243+
244+ NB_MODULE (_autocorr_cuda, m) {
245+ REGISTER_GPU_BINDINGS (register_bindings, m);
246+ }
0 commit comments