Skip to content

Commit c0a17ef

Browse files
committed
Line length, comments, remove unused mu_0
1 parent c7be3ef commit c0a17ef

2 files changed

Lines changed: 45 additions & 22 deletions

File tree

WrightSim/experiment/_scan.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,13 @@ def _gen_efp(self, indices=None):
8080
return efp
8181

8282
kernel_cuda_source = """
83-
__global__ void kernel(double time_start, double time_end, double dt, int nEFields, double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham, pycuda::complex<double>* out)
83+
__global__ void kernel(double time_start, double time_end, double dt, int nEFields,
84+
double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham,
85+
pycuda::complex<double>* out)
8486
{
8587
int idx = threadIdx.x + blockIdx.x * blockDim.x;
86-
runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx*5*nEFields), *(efparams + 2), phase_matching, n_recorded, *ham, out + (idx*ham->nRecorded*n_recorded));
88+
runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx * 5 * nEFields),
89+
phase_matching, n_recorded, *ham, out + (idx * ham->nRecorded * n_recorded));
8790
}
8891
"""
8992

@@ -95,7 +98,7 @@ def run(self, mp='cpu', chunk=False):
9598
mp : {False, 'cpu', 'gpu'} (optional)
9699
Select multiprocessing: False (or '' or None) means single-threaded.
97100
'gpu' indicates to use the CUDA implementation
98-
Any other value which evaluates to `True` indicates cpu multiprocessed.
101+
Any other value which evaluates to ``True`` indicates cpu multiprocessed.
99102
Default is 'cpu'.
100103
101104
Returns
@@ -124,10 +127,15 @@ def run(self, mp='cpu', chunk=False):
124127
start = np.min(self.efp[..., d_ind]) - self.early_buffer
125128
stop = np.max(self.efp[..., d_ind]) + self.late_buffer
126129

127-
mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source + propagate.muladd_cuda_source + propagate.dot_cuda_source + propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source + Scan.kernel_cuda_source)
130+
mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source +
131+
propagate.muladd_cuda_source + propagate.dot_cuda_source +
132+
propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source +
133+
Scan.kernel_cuda_source)
128134

129135
kernel = mod.get_function('kernel')
130-
kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr, pmPtr, np.intp(self.iprime), hamPtr, sigPtr, grid=(self.array.size//256,1), block=(256,1,1))
136+
kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr,
137+
pmPtr, np.intp(self.iprime), hamPtr, sigPtr,
138+
grid=(self.array.size//256,1), block=(256,1,1))
131139

132140
cuda.memcpy_dtoh(self.sig, sigPtr)
133141
elif mp:

WrightSim/mixed/propagate.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
4848
return rho_emitted
4949

5050
muladd_cuda_source = """
51-
__device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d, int len, pycuda::complex<double>* out)
51+
__device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d,
52+
int len, pycuda::complex<double>* out)
5253
{
5354
for (int i=0; i<len; i++)
5455
{
@@ -58,7 +59,8 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
5859
"""
5960

6061
dot_cuda_source = """
61-
__device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len, pycuda::complex<double>* out)
62+
__device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len,
63+
pycuda::complex<double>* out)
6264
{
6365
for(int i=0; i<len; i++)
6466
{
@@ -75,40 +77,45 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
7577
pulse_cuda_source = """
7678
#include <math.h>
7779
78-
__device__ void calc_efield_params(double* params, double mu_0, int n)
80+
__device__ void calc_efield_params(double* params, int n)
7981
{
8082
for(int i=0; i < n; i++)
8183
{
82-
//sigma
84+
// FWHM to sigma
8385
params[1 + i*5] /= (2. * sqrt(log(2.)));
84-
//mu
85-
//params[2 + i*5] -= mu_0;
86-
//freq
86+
// Frequency to rotating frame
8787
params[3 + i*5] *= 2 * M_PI * 3e-5;
88-
//area -> y
88+
// area -> y
8989
params[0 + i*5] /= params[1 + i*5] * sqrt(2 * M_PI);
9090
}
9191
}
9292
93-
__device__ void calc_efield(double* params, int* phase_matching, double t, int n, pycuda::complex<double>* out)
93+
__device__ void calc_efield(double* params, int* phase_matching, double t, int n,
94+
pycuda::complex<double>* out)
9495
{
9596
for(int i=0; i < n; i++)
9697
{
97-
out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) * (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
98-
out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5])*(t-params[2 + i*5])/2./params[1 + i*5]/params[1 + i*5]);
98+
// Complex phase and magnitude
99+
out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) *
100+
(params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
101+
// Gaussian envelope
102+
out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5]) * (t-params[2 + i*5])
103+
/ 2. / params[1 + i*5] / params[1 + i*5]);
99104
}
100105
}
101106
"""
102107

103108

104109
runge_kutta_cuda_source = """
105-
__device__ pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt,
106-
const int nEFields, double* efparams, double mu_0, int* phase_matching,
107-
const int n_recorded, Hamiltonian ham,
108-
pycuda::complex<double> *out)
110+
__device__
111+
pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt,
112+
const int nEFields, double* efparams, int* phase_matching,
113+
const int n_recorded, Hamiltonian ham,
114+
pycuda::complex<double> *out)
109115
{
110116
//pycuda::complex<double> *H_cur = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
111-
// pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
117+
//pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
118+
//TODO: either figure out why dynamically allocated arrays weren't working, or use a #define to statically allocate
112119
pycuda::complex<double> buf1[81];
113120
pycuda::complex<double> buf2[81];
114121
@@ -126,28 +133,35 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
126133
pycuda::complex<double>* delta_rho = (pycuda::complex<double>*)malloc(ham.nStates * sizeof(pycuda::complex<double>));
127134
pycuda::complex<double>* efields = (pycuda::complex<double>*)malloc(nEFields * sizeof(pycuda::complex<double>));
128135
136+
// Inital rho vector
137+
//TODO: Use the inital condition from the hamiltonian
129138
rho_i[0] = 1.;
130139
for(int i=1; i<ham.nStates; i++) rho_i[i] = 0.;
131140
132-
calc_efield_params(efparams, mu_0, nEFields);
141+
calc_efield_params(efparams, nEFields);
133142
134143
calc_efield(efparams, phase_matching, time_start, nEFields, efields);
135144
136145
137146
Hamiltonian_matrix(ham, efields, time_start, H_next);
138147
for(double t = time_start; t < time_end; t += dt)
139148
{
149+
// Swap pointers to current and next hamiltonians
140150
pycuda::complex<double>* temp = H_cur;
141151
H_cur = H_next;
142152
H_next = temp;
153+
154+
// First order
143155
calc_efield(efparams, phase_matching, t+dt, nEFields, efields);
144156
Hamiltonian_matrix(ham, efields, t+dt, H_next);
145157
dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
146158
muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, temp_rho_i);
159+
// Second order
147160
dot(H_next, temp_rho_i, ham.nStates, delta_rho);
148161
muladd(temp_delta_rho, 1., delta_rho, 1., ham.nStates, delta_rho);
149162
muladd(rho_i, 1., delta_rho, dt/2., ham.nStates, rho_i);
150163
164+
// Record results if close enough to the end
151165
if(index > npoints - n_recorded)
152166
{
153167
for(int i=0; i < ham.nRecorded; i++)
@@ -159,6 +173,7 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
159173
index++;
160174
}
161175
176+
// Last point, only first order, recorded
162177
dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
163178
muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, rho_i);
164179
for(int i=0; i < ham.nRecorded; i++)

0 commit comments

Comments
 (0)