Skip to content

Commit 27a4e30

Browse files
committed
fix: remove unnecessary table_info device memory allocation in GPU tests since tabulate_fusion_se_t_tebd_grad_gpu is on CPU
1 parent 00ca8b9 commit 27a4e30

2 files changed

Lines changed: 7 additions & 10 deletions

File tree

source/lib/src/gpu/tabulate.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,7 @@ void tabulate_fusion_se_t_tebd_grad_gpu(FPTYPE* dy_dem_x,
10941094
DPErrcheck(gpuGetLastError());
10951095
DPErrcheck(gpuDeviceSynchronize());
10961096
DPErrcheck(gpuMemset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
1097+
// table_info should be on CPU side
10971098
tabulate_fusion_se_t_tebd_grad_fifth_order_polynomial<FPTYPE, MM, KK>
10981099
<<<nloc, KK * WARP_SIZE>>>(dy_dem_x, table, em_x, em, dy, table_info[0],
10991100
table_info[1], table_info[2], table_info[3],

source/lib/tests/test_tabulate_se_t_tebd.cc

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -706,25 +706,23 @@ TEST_F(TestTabulateSeTTebd, tabulate_fusion_se_t_tebd_grad_cpu) {
706706
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
707707
TEST_F(TestTabulateSeTTebd, tabulate_fusion_se_t_tebd_gpu) {
708708
std::vector<double> xyz_scatter(nloc * nnei_i * nnei_j * last_layer_size, 0);
709-
double *xyz_scatter_dev = NULL, *table_dev = NULL, *table_info_dev = NULL,
710-
*em_x_dev = NULL, *em_dev = NULL;
709+
double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
710+
*em_dev = NULL;
711711

712712
deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
713713
deepmd::malloc_device_memory_sync(table_dev, table);
714-
deepmd::malloc_device_memory_sync(table_info_dev, table_info);
715714
deepmd::malloc_device_memory_sync(em_x_dev, em_x);
716715
deepmd::malloc_device_memory_sync(em_dev, em);
717716

718717
deepmd::tabulate_fusion_se_t_tebd_gpu<double>(
719-
xyz_scatter_dev, table_dev, table_info_dev, em_x_dev, em_dev, nloc,
718+
xyz_scatter_dev, table_dev, &table_info[0], em_x_dev, em_dev, nloc,
720719
nnei_i, nnei_j, last_layer_size);
721720

722721
deepmd::memcpy_device_to_host(&xyz_scatter[0], xyz_scatter_dev,
723722
xyz_scatter.size() * sizeof(double));
724723

725724
deepmd::delete_device_memory(xyz_scatter_dev);
726725
deepmd::delete_device_memory(table_dev);
727-
deepmd::delete_device_memory(table_info_dev);
728726
deepmd::delete_device_memory(em_x_dev);
729727
deepmd::delete_device_memory(em_dev);
730728

@@ -739,26 +737,24 @@ TEST_F(TestTabulateSeTTebd, tabulate_fusion_se_t_tebd_grad_gpu) {
739737
std::vector<double> dy_dem_x(em_x.size(), 0.0);
740738
std::vector<double> dy(nloc * nnei_i * nnei_j * last_layer_size, 1.0);
741739

742-
double *dy_dem_x_dev = NULL, *table_dev = NULL, *table_info_dev = NULL,
743-
*em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL;
740+
double *dy_dem_x_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
741+
*em_dev = NULL, *dy_dev = NULL;
744742

745743
deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x);
746744
deepmd::malloc_device_memory_sync(table_dev, table);
747-
deepmd::malloc_device_memory_sync(table_info_dev, table_info);
748745
deepmd::malloc_device_memory_sync(em_x_dev, em_x);
749746
deepmd::malloc_device_memory_sync(em_dev, em);
750747
deepmd::malloc_device_memory_sync(dy_dev, dy);
751748

752749
deepmd::tabulate_fusion_se_t_tebd_grad_gpu<double>(
753-
dy_dem_x_dev, table_dev, table_info_dev, em_x_dev, em_dev, dy_dev, nloc,
750+
dy_dem_x_dev, table_dev, &table_info[0], em_x_dev, em_dev, dy_dev, nloc,
754751
nnei_i, nnei_j, last_layer_size);
755752

756753
deepmd::memcpy_device_to_host(&dy_dem_x[0], dy_dem_x_dev,
757754
dy_dem_x.size() * sizeof(double));
758755

759756
deepmd::delete_device_memory(dy_dem_x_dev);
760757
deepmd::delete_device_memory(table_dev);
761-
deepmd::delete_device_memory(table_info_dev);
762758
deepmd::delete_device_memory(em_x_dev);
763759
deepmd::delete_device_memory(em_dev);
764760
deepmd::delete_device_memory(dy_dev);

0 commit comments

Comments
 (0)