@@ -48,7 +48,8 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
4848 return rho_emitted
4949
5050muladd_cuda_source = """
51- __device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d, int len, pycuda::complex<double>* out)
51+ __device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d,
52+ int len, pycuda::complex<double>* out)
5253{
5354 for (int i=0; i<len; i++)
5455 {
@@ -58,7 +59,8 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
5859"""
5960
6061dot_cuda_source = """
61- __device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len, pycuda::complex<double>* out)
62+ __device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len,
63+ pycuda::complex<double>* out)
6264{
6365 for(int i=0; i<len; i++)
6466 {
@@ -75,40 +77,45 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
7577pulse_cuda_source = """
7678#include <math.h>
7779
78- __device__ void calc_efield_params(double* params, double mu_0, int n)
80+ __device__ void calc_efield_params(double* params, int n)
7981{
8082 for(int i=0; i < n; i++)
8183 {
82- //sigma
84+ // FWHM to sigma
8385 params[1 + i*5] /= (2. * sqrt(log(2.)));
84- //mu
85- //params[2 + i*5] -= mu_0;
86- //freq
86+ // Frequency to rotating frame
8787 params[3 + i*5] *= 2 * M_PI * 3e-5;
88- //area -> y
88+ // area -> y
8989 params[0 + i*5] /= params[1 + i*5] * sqrt(2 * M_PI);
9090 }
9191}
9292
93- __device__ void calc_efield(double* params, int* phase_matching, double t, int n, pycuda::complex<double>* out)
93+ __device__ void calc_efield(double* params, int* phase_matching, double t, int n,
94+ pycuda::complex<double>* out)
9495{
9596 for(int i=0; i < n; i++)
9697 {
97- out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) * (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
98- out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5])*(t-params[2 + i*5])/2./params[1 + i*5]/params[1 + i*5]);
98+ // Complex phase and magnitude
99+ out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) *
100+ (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
101+ // Gaussian envelope
102+ out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5]) * (t-params[2 + i*5])
103+ / 2. / params[1 + i*5] / params[1 + i*5]);
99104 }
100105}
101106"""
102107
103108
104109runge_kutta_cuda_source = """
105- __device__ pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt,
106- const int nEFields, double* efparams, double mu_0, int* phase_matching,
107- const int n_recorded, Hamiltonian ham,
108- pycuda::complex<double> *out)
110+ __device__
111+ pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt,
112+ const int nEFields, double* efparams, int* phase_matching,
113+ const int n_recorded, Hamiltonian ham,
114+ pycuda::complex<double> *out)
109115{
110116 //pycuda::complex<double> *H_cur = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
111- // pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
117+ //pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
118+ //TODO: either figure out why dynamically allocated arrays weren't working, or use a #define to statically allocate
112119 pycuda::complex<double> buf1[81];
113120 pycuda::complex<double> buf2[81];
114121
@@ -126,28 +133,35 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
126133 pycuda::complex<double>* delta_rho = (pycuda::complex<double>*)malloc(ham.nStates * sizeof(pycuda::complex<double>));
127134 pycuda::complex<double>* efields = (pycuda::complex<double>*)malloc(nEFields * sizeof(pycuda::complex<double>));
128135
136+ // Inital rho vector
137+ //TODO: Use the inital condition from the hamiltonian
129138 rho_i[0] = 1.;
130139 for(int i=1; i<ham.nStates; i++) rho_i[i] = 0.;
131140
132- calc_efield_params(efparams, mu_0, nEFields);
141+ calc_efield_params(efparams, nEFields);
133142
134143 calc_efield(efparams, phase_matching, time_start, nEFields, efields);
135144
136145
137146 Hamiltonian_matrix(ham, efields, time_start, H_next);
138147 for(double t = time_start; t < time_end; t += dt)
139148 {
149+ // Swap pointers to current and next hamiltonians
140150 pycuda::complex<double>* temp = H_cur;
141151 H_cur = H_next;
142152 H_next = temp;
153+
154+ // First order
143155 calc_efield(efparams, phase_matching, t+dt, nEFields, efields);
144156 Hamiltonian_matrix(ham, efields, t+dt, H_next);
145157 dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
146158 muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, temp_rho_i);
159+ // Second order
147160 dot(H_next, temp_rho_i, ham.nStates, delta_rho);
148161 muladd(temp_delta_rho, 1., delta_rho, 1., ham.nStates, delta_rho);
149162 muladd(rho_i, 1., delta_rho, dt/2., ham.nStates, rho_i);
150163
164+ // Record results if close enough to the end
151165 if(index > npoints - n_recorded)
152166 {
153167 for(int i=0; i < ham.nRecorded; i++)
@@ -159,6 +173,7 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
159173 index++;
160174 }
161175
176+ // Last point, only first order, recorded
162177 dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
163178 muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, rho_i);
164179 for(int i=0; i < ham.nRecorded; i++)
0 commit comments