fix(pd): change numel function return type from int to size_t to prevent overflow (#4924)

Copilot · njzjz · web-flow · commit 996d19280cec · 2025-08-27T03:38:25.000Z
The `numel` function in the Paddle backend was using `int` for computing tensor element counts, which can overflow for large tensors. This fix changes the return type and intermediate calculations to `size_t` to handle larger tensor sizes safely. ## Problem The original implementation multiplied tensor dimensions as `int` values: ```cpp int numel(const paddle_infer::Tensor& x) const { // TODO: There might be a overflow problem here for multiply int numbers. int ret = 1; std::vector<int> x_shape = x.shape(); for (std::size_t i = 0, n = x_shape.size(); i < n; ++i) { ret *= x_shape[i]; // Can overflow for large tensors } return ret; } ``` For large tensors (e.g., shape `[50000, 50000, 10]` = 25 billion elements), this causes integer overflow and returns negative values. ## Solution - Changed return type from `int` to `size_t` - Changed intermediate calculations to use `size_t` with explicit casting - Updated all calling sites to use `size_t` variables - Removed the TODO comment since the overflow issue is now resolved ```cpp size_t numel(const paddle_infer::Tensor& x) const { size_t ret = 1; std::vector<int> x_shape = x.shape(); for (std::size_t i = 0, n = x_shape.size(); i < n; ++i) { ret *= static_cast<size_t>(x_shape[i]); // Safe from overflow } return ret; } ``` The `size_t` type can handle up to 2^64 elements on 64-bit systems (vs 2^31 for `int`), making it appropriate for tensor element counts. This change is backward compatible since `std::vector::resize()` and other consumers already accept `size_t`. Fixes #4551.  --- ✨ Let Copilot coding agent [set things up for you](https://github.com/deepmodeling/deepmd-kit/issues/new?title=✨+Set+up+Copilot+instructions&body=Configure%20instructions%20for%20this%20repository%20as%20documented%20in%20%5BBest%20practices%20for%20Copilot%20coding%20agent%20in%20your%20repository%5D%28https://gh.io/copilot-coding-agent-tips%29%2E%0A%0A%3COnboard%20this%20repo%3E&assignees=copilot) — coding agent works faster and does higher quality work when set up for your repo. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com>
diff --git a/source/api_cc/include/DeepPotPD.h b/source/api_cc/include/DeepPotPD.h
@@ -282,12 +282,11 @@ class DeepPotPD : public DeepPotBackend {
    * @brief Compute the number of elements in a tensor.
    * @param[in] x Tensor x.
    **/
-  int numel(const paddle_infer::Tensor& x) const {
-    // TODO: There might be a overflow problem here for multiply int numbers.
-    int ret = 1;
+  size_t numel(const paddle_infer::Tensor& x) const {
+    size_t ret = 1;
     std::vector<int> x_shape = x.shape();
     for (std::size_t i = 0, n = x_shape.size(); i < n; ++i) {
-      ret *= x_shape[i];
+      ret *= static_cast<size_t>(x_shape[i]);
     }
     return ret;
   };
diff --git a/source/api_cc/src/DeepPotPD.cc b/source/api_cc/src/DeepPotPD.cc
@@ -474,9 +474,9 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
   auto energy_ = predictor_fl->GetOutputHandle(output_names.at(1));
   auto force_ = predictor_fl->GetOutputHandle(output_names.at(2));
   auto virial_ = predictor_fl->GetOutputHandle(output_names.at(4));
-  int output_energy_size = numel(*energy_);
-  int output_force_size = numel(*force_);
-  int output_virial_size = numel(*virial_);
+  size_t output_energy_size = numel(*energy_);
+  size_t output_force_size = numel(*force_);
+  size_t output_virial_size = numel(*virial_);
   // output energy
   ener.resize(output_energy_size);
   energy_->CopyToCpu(ener.data());
@@ -597,26 +597,26 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
   auto force_ = predictor->GetOutputHandle(output_names.at(3));
   auto virial_ = predictor->GetOutputHandle(output_names.at(5));
 
-  int enery_numel = numel(*energy_);
+  size_t enery_numel = numel(*energy_);
   assert(enery_numel > 0);
   ener.resize(enery_numel);
   energy_->CopyToCpu(ener.data());
 
-  int force_numel = numel(*force_);
+  size_t force_numel = numel(*force_);
   assert(force_numel > 0);
   force.resize(force_numel);
   force_->CopyToCpu(force.data());
 
-  int virial_numel = numel(*virial_);
+  size_t virial_numel = numel(*virial_);
   assert(virial_numel > 0);
   virial.resize(virial_numel);
   virial_->CopyToCpu(virial.data());
 
   if (atomic) {
     auto atom_energy_ = predictor->GetOutputHandle(output_names.at(0));
     auto atom_virial_ = predictor->GetOutputHandle(output_names.at(1));
-    int atom_energy_numel = numel(*atom_energy_);
-    int atom_virial_numel = numel(*atom_virial_);
+    size_t atom_energy_numel = numel(*atom_energy_);
+    size_t atom_virial_numel = numel(*atom_virial_);
     assert(atom_energy_numel > 0);
     assert(atom_virial_numel > 0);
     atom_energy.resize(atom_energy_numel);
@@ -656,7 +656,7 @@ template void DeepPotPD::compute<float, std::vector<ENERGYTYPE>>(
 that need to be postprocessed */
 void DeepPotPD::get_type_map(std::string& type_map) {
   auto type_map_tensor = predictor->GetOutputHandle("buffer_type_map");
-  int type_map_size = numel(*type_map_tensor);
+  size_t type_map_size = numel(*type_map_tensor);
 
   std::vector<int> type_map_arr(type_map_size, 0);
   type_map_tensor->CopyToCpu(type_map_arr.data());
@@ -670,7 +670,7 @@ template <typename BUFFERTYPE>
 void DeepPotPD::get_buffer(const std::string& buffer_name,
                            std::vector<BUFFERTYPE>& buffer_array) {
   auto buffer_tensor = predictor->GetOutputHandle(buffer_name);
-  int buffer_size = numel(*buffer_tensor);
+  size_t buffer_size = numel(*buffer_tensor);
   buffer_array.resize(buffer_size);
   buffer_tensor->CopyToCpu(buffer_array.data());
 }