Skip to content

Commit 8e9c2c3

Browse files
authored
Merge pull request #1278 from MengYuanzhuo/devel
2 parents 24593fa + 5cd6c66 commit 8e9c2c3

3 files changed

Lines changed: 96 additions & 2 deletions

File tree

src/gpu/specfem2D_gpu_cuda_method_stubs.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,3 +731,10 @@ void FC_FUNC_(compute_seismograms_cuda,
731731
int* itf,
732732
int* it_endf) {}
733733

734+
void FC_FUNC_(flush_seismograms_cuda,
735+
FLUSH_SEISMOGRAMS_CUDA)(long* Mesh_pointer_f,
736+
int* i_sigf,
737+
double* sisux, double* sisuz,
738+
int* seismo_currentf,
739+
int* nlength_seismogramf) {}
740+

src/gpu/write_seismograms_cuda.cu

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,30 @@ void FC_FUNC_(compute_seismograms_cuda,
243243

244244
// seismogram buffers are 1D and components appended; size for one single component record
245245
int size = mp->nrec_local * nlength_seismogram;
246+
int valid_length = seismo_current + 1;
246247

247-
// copies from GPU to CPU (note: could use async mem copy in future...)
248-
print_CUDA_error_if_any(cudaMemcpy(h_seismo, d_seismo, sizeof(realw) * 2 * size, cudaMemcpyDeviceToHost),72001);
248+
if (valid_length > nlength_seismogram) valid_length = nlength_seismogram;
249+
250+
// clears host buffers first, then copies only the samples that were actually written.
251+
// This prevents stale tail values from a previous chunk from leaking into the final trace.
252+
for (int i = 0; i < 2 * size; i++) {
253+
h_seismo[i] = 0.0f;
254+
}
255+
256+
// copies only the valid part from GPU to CPU (note: could use async mem copy in future...)
257+
for (int irec = 0; irec < mp->nrec_local; irec++) {
258+
int device_offset = irec * nlength_seismogram;
259+
int host_offset = irec * nlength_seismogram;
260+
261+
print_CUDA_error_if_any(cudaMemcpy(h_seismo + host_offset,
262+
d_seismo + device_offset,
263+
sizeof(realw) * valid_length,
264+
cudaMemcpyDeviceToHost),72001);
265+
print_CUDA_error_if_any(cudaMemcpy(h_seismo + size + host_offset,
266+
d_seismo + size + device_offset,
267+
sizeof(realw) * valid_length,
268+
cudaMemcpyDeviceToHost),72002);
269+
}
249270

250271
// copies values into host array
251272
for (int irec=0; irec < mp->nrec_local; irec++){
@@ -258,3 +279,62 @@ void FC_FUNC_(compute_seismograms_cuda,
258279

259280
GPU_ERROR_CHECKING ("compute_seismograms_cuda");
260281
}
282+
283+
extern "C"
284+
void FC_FUNC_(flush_seismograms_cuda,
285+
FLUSH_SEISMOGRAMS_CUDA)(long* Mesh_pointer_f,
286+
int* i_sigf,
287+
double* sisux, double* sisuz,
288+
int* seismo_currentf,
289+
int* nlength_seismogramf) {
290+
291+
TRACE("flush_seismograms_cuda");
292+
293+
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
294+
295+
synchronize_cuda();
296+
297+
if (mp->nrec_local == 0) return;
298+
299+
int i_sig = *i_sigf - 1;
300+
int seismo_current = *seismo_currentf;
301+
int nlength_seismogram = *nlength_seismogramf;
302+
303+
if (seismo_current <= 0) return;
304+
305+
if (seismo_current > nlength_seismogram) seismo_current = nlength_seismogram;
306+
307+
realw* h_seismo = mp->h_seismograms[i_sig];
308+
realw* d_seismo = mp->d_seismograms[i_sig];
309+
310+
cudaStreamSynchronize(mp->compute_stream);
311+
312+
int size = mp->nrec_local * nlength_seismogram;
313+
314+
for (int i = 0; i < 2 * size; i++) {
315+
h_seismo[i] = 0.0f;
316+
}
317+
318+
for (int irec = 0; irec < mp->nrec_local; irec++) {
319+
int device_offset = irec * nlength_seismogram;
320+
int host_offset = irec * nlength_seismogram;
321+
322+
print_CUDA_error_if_any(cudaMemcpy(h_seismo + host_offset,
323+
d_seismo + device_offset,
324+
sizeof(realw) * seismo_current,
325+
cudaMemcpyDeviceToHost),73001);
326+
print_CUDA_error_if_any(cudaMemcpy(h_seismo + size + host_offset,
327+
d_seismo + size + device_offset,
328+
sizeof(realw) * seismo_current,
329+
cudaMemcpyDeviceToHost),73002);
330+
}
331+
332+
for (int irec = 0; irec < mp->nrec_local; irec++){
333+
for (int j = 0; j < nlength_seismogram; j++){
334+
sisux[j + nlength_seismogram * irec] = (double) h_seismo[j + nlength_seismogram * irec];
335+
sisuz[j + nlength_seismogram * irec] = (double) h_seismo[j + nlength_seismogram * irec + size];
336+
}
337+
}
338+
339+
GPU_ERROR_CHECKING ("flush_seismograms_cuda");
340+
}

src/specfem2D/write_seismograms.F90

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,13 @@ subroutine write_seismograms()
231231
do i_sig = 1,NSIGTYPE
232232
seismotype_l = seismotypeVec(i_sig)
233233

234+
! only flush partially filled GPU seismogram buffers here; when the buffer is
235+
! full, compute_seismograms_cuda has already copied it back to the host
236+
if (GPU_MODE .and. seismo_current > 0 .and. seismo_current < nlength_seismogram) then
237+
call flush_seismograms_cuda(Mesh_pointer,i_sig,sisux(:,:,i_sig),sisuz(:,:,i_sig), &
238+
seismo_current,nlength_seismogram)
239+
endif
240+
234241
call write_seismograms_to_file(sisux(:,:,i_sig),sisuz(:,:,i_sig),siscurl(:,:,i_sig),seismotype_l,seismo_current, &
235242
seismo_offset)
236243

0 commit comments

Comments
 (0)