Line length, comments, remove unused mu_0

ksunden · ksunden · commit c0a17ef1de26 · 2017-12-31T00:02:13.000-06:00
diff --git a/WrightSim/experiment/_scan.py b/WrightSim/experiment/_scan.py
@@ -80,10 +80,13 @@ def _gen_efp(self, indices=None):
         return efp
 
     kernel_cuda_source = """
-    __global__ void kernel(double time_start, double time_end, double dt, int nEFields, double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham, pycuda::complex<double>* out)
+    __global__ void kernel(double time_start, double time_end, double dt, int nEFields,
+                           double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham,
+                           pycuda::complex<double>* out)
     {
         int idx = threadIdx.x + blockIdx.x * blockDim.x;
-        runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx*5*nEFields), *(efparams + 2), phase_matching, n_recorded, *ham, out + (idx*ham->nRecorded*n_recorded));
+        runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx * 5 * nEFields),
+                    phase_matching, n_recorded, *ham, out + (idx * ham->nRecorded * n_recorded));
     }
     """
 
@@ -95,7 +98,7 @@ def run(self, mp='cpu', chunk=False):
         mp : {False, 'cpu', 'gpu'} (optional)
             Select multiprocessing: False (or '' or None) means single-threaded.
                                     'gpu' indicates to use the CUDA implementation
-                                    Any other value which evaluates to `True` indicates cpu multiprocessed.
+                                    Any other value which evaluates to ``True`` indicates cpu multiprocessed.
                                     Default is 'cpu'.
 
         Returns
@@ -124,10 +127,15 @@ def run(self, mp='cpu', chunk=False):
             start = np.min(self.efp[..., d_ind]) - self.early_buffer
             stop = np.max(self.efp[..., d_ind]) + self.late_buffer
 
-            mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source + propagate.muladd_cuda_source + propagate.dot_cuda_source + propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source + Scan.kernel_cuda_source)
+            mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source +
+                               propagate.muladd_cuda_source + propagate.dot_cuda_source +
+                               propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source +
+                               Scan.kernel_cuda_source)
 
             kernel = mod.get_function('kernel')
-            kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr, pmPtr, np.intp(self.iprime), hamPtr, sigPtr, grid=(self.array.size//256,1), block=(256,1,1))
+            kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr,
+                   pmPtr, np.intp(self.iprime), hamPtr, sigPtr,
+                   grid=(self.array.size//256,1), block=(256,1,1))
 
             cuda.memcpy_dtoh(self.sig, sigPtr)
         elif mp:
diff --git a/WrightSim/mixed/propagate.py b/WrightSim/mixed/propagate.py
@@ -48,7 +48,8 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
     return rho_emitted
 
 muladd_cuda_source = """
-__device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d, int len, pycuda::complex<double>* out)
+__device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d,
+                       int len, pycuda::complex<double>* out)
 {
     for (int i=0; i<len; i++)
     {
@@ -58,7 +59,8 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
 """
 
 dot_cuda_source = """
-__device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len, pycuda::complex<double>* out)
+__device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len,
+                    pycuda::complex<double>* out)
 {
     for(int i=0; i<len; i++)
     {
@@ -75,40 +77,45 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
 pulse_cuda_source = """
 #include <math.h>
 
-__device__ void calc_efield_params(double* params, double mu_0, int n)
+__device__ void calc_efield_params(double* params, int n)
 {
     for(int i=0; i < n; i++)
     {
-        //sigma
+        // FWHM to sigma
         params[1 + i*5] /= (2. * sqrt(log(2.)));
-        //mu
-        //params[2 + i*5] -= mu_0;
-        //freq
+        // Frequency to rotating frame
         params[3 + i*5] *= 2 * M_PI * 3e-5;
-        //area -> y
+        // area -> y
         params[0 + i*5] /= params[1 + i*5] * sqrt(2 * M_PI);
     }
 }
 
-__device__ void calc_efield(double* params, int* phase_matching,  double t, int n, pycuda::complex<double>* out)
+__device__ void calc_efield(double* params, int* phase_matching,  double t, int n,
+                            pycuda::complex<double>* out)
 {
     for(int i=0; i < n; i++)
     {
-        out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) * (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
-        out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5])*(t-params[2 + i*5])/2./params[1 + i*5]/params[1 + i*5]);
+        // Complex phase and magnitude
+        out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) *
+                                        (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
+        // Gaussian envelope
+        out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5]) * (t-params[2 + i*5])
+                                        / 2. / params[1 + i*5] / params[1 + i*5]);
     }
 }
 """
 
 
 runge_kutta_cuda_source = """
-__device__ pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt, 
-                                               const int nEFields, double* efparams, double mu_0, int* phase_matching,
-                                               const int n_recorded, Hamiltonian ham,
-                                               pycuda::complex<double> *out)
+__device__
+pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt, 
+                                     const int nEFields, double* efparams, int* phase_matching,
+                                     const int n_recorded, Hamiltonian ham,
+                                     pycuda::complex<double> *out)
 {
     //pycuda::complex<double> *H_cur = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
-   // pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
+    //pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
+    //TODO: either figure out why dynamically allocated arrays weren't working, or use a #define to statically allocate
     pycuda::complex<double> buf1[81];
     pycuda::complex<double> buf2[81];
 
@@ -126,28 +133,35 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
     pycuda::complex<double>* delta_rho = (pycuda::complex<double>*)malloc(ham.nStates * sizeof(pycuda::complex<double>)); 
     pycuda::complex<double>* efields = (pycuda::complex<double>*)malloc(nEFields * sizeof(pycuda::complex<double>)); 
 
+    // Inital rho vector
+    //TODO: Use the inital condition from the hamiltonian
     rho_i[0] = 1.;
     for(int i=1; i<ham.nStates; i++) rho_i[i] = 0.;
 
-    calc_efield_params(efparams, mu_0, nEFields);
+    calc_efield_params(efparams, nEFields);
 
     calc_efield(efparams, phase_matching, time_start, nEFields, efields);
 
 
     Hamiltonian_matrix(ham, efields, time_start, H_next);
     for(double t = time_start; t < time_end; t += dt)
     {   
+        // Swap pointers to current and next hamiltonians
         pycuda::complex<double>* temp = H_cur;
         H_cur = H_next;
         H_next = temp;
+        
+        // First order
         calc_efield(efparams, phase_matching, t+dt, nEFields, efields);
         Hamiltonian_matrix(ham, efields, t+dt, H_next);
         dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
         muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, temp_rho_i);
+        // Second order
         dot(H_next, temp_rho_i, ham.nStates, delta_rho);
         muladd(temp_delta_rho, 1., delta_rho, 1., ham.nStates, delta_rho);
         muladd(rho_i, 1., delta_rho, dt/2., ham.nStates, rho_i);
 
+        // Record results if close enough to the end
         if(index > npoints - n_recorded)
         {
             for(int i=0; i < ham.nRecorded; i++)
@@ -159,6 +173,7 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
         index++;
     }
     
+    // Last point, only first order, recorded
     dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
     muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, rho_i);
     for(int i=0; i < ham.nRecorded; i++)