From e9624cf1bc45ed4c08c44a20ad8e1f26f356bd0f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 05:06:30 +0000 Subject: [PATCH 01/11] Initial plan From 3ca69f9ecca226625689f7c0d7b3e0fbdf9ee2ed Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 05:15:35 +0000 Subject: [PATCH 02/11] Implement C++ PyTorch profiler with environment variable controls Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- doc/env.md | 16 +++++ source/api_cc/include/DeepPotPT.h | 9 +++ source/api_cc/include/DeepSpinPT.h | 9 +++ source/api_cc/include/common.h | 9 +++ source/api_cc/src/DeepPotPT.cc | 36 +++++++++- source/api_cc/src/DeepSpinPT.cc | 28 +++++++- source/api_cc/src/common.cc | 17 +++++ source/api_cc/tests/test_pytorch_profiler.cc | 76 ++++++++++++++++++++ 8 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 source/api_cc/tests/test_pytorch_profiler.cc diff --git a/doc/env.md b/doc/env.md index 4ca7101236..28d777910b 100644 --- a/doc/env.md +++ b/doc/env.md @@ -90,3 +90,19 @@ These environment variables also apply to third-party programs using the C++ int List of customized OP plugin libraries to load, such as `/path/to/plugin1.so:/path/to/plugin2.so` on Linux and `/path/to/plugin1.dll;/path/to/plugin2.dll` on Windows. ::: + +:::{envvar} DP_ENABLE_PYTORCH_PROFILER + +**Choices**: `0`, `1`, `true`; **Default**: `0` + +{{ pytorch_icon }} Enable PyTorch profiler for C++ backend. This is for development purposes. + +::: + +:::{envvar} DP_PYTORCH_PROFILER_OUTPUT_DIR + +**Default**: `./profiler_output` + +{{ pytorch_icon }} Output directory for PyTorch profiler traces when `DP_ENABLE_PYTORCH_PROFILER` is enabled. + +::: diff --git a/source/api_cc/include/DeepPotPT.h b/source/api_cc/include/DeepPotPT.h index 207a13286c..ddcce2bda7 100644 --- a/source/api_cc/include/DeepPotPT.h +++ b/source/api_cc/include/DeepPotPT.h @@ -3,6 +3,9 @@ #include #include +#ifdef BUILD_PYTORCH +#include +#endif #include "DeepPot.h" @@ -340,6 +343,12 @@ class DeepPotPT : public DeepPotBackend { at::Tensor firstneigh_tensor; c10::optional mapping_tensor; torch::Dict comm_dict; + // PyTorch profiler + bool profiler_enabled; + std::string profiler_output_dir; +#ifdef BUILD_PYTORCH + std::unique_ptr profiler; +#endif /** * @brief Translate PyTorch exceptions to the DeePMD-kit exception. * @param[in] f The function to run. diff --git a/source/api_cc/include/DeepSpinPT.h b/source/api_cc/include/DeepSpinPT.h index be4c85d898..9b531dc084 100644 --- a/source/api_cc/include/DeepSpinPT.h +++ b/source/api_cc/include/DeepSpinPT.h @@ -3,6 +3,9 @@ #include #include +#ifdef BUILD_PYTORCH +#include +#endif #include "DeepSpin.h" @@ -262,6 +265,12 @@ class DeepSpinPT : public DeepSpinBackend { at::Tensor firstneigh_tensor; c10::optional mapping_tensor; torch::Dict comm_dict; + // PyTorch profiler + bool profiler_enabled; + std::string profiler_output_dir; +#ifdef BUILD_PYTORCH + std::unique_ptr profiler; +#endif /** * @brief Translate PyTorch exceptions to the DeePMD-kit exception. * @param[in] f The function to run. diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h index 612f699ea4..f485666c86 100644 --- a/source/api_cc/include/common.h +++ b/source/api_cc/include/common.h @@ -163,6 +163,15 @@ void select_map_inv(typename std::vector::iterator out, **/ void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads); +/** + * @brief Get PyTorch profiler configuration from environment variables. + * @param[out] enable_profiler Whether to enable the profiler. Read from + *DP_ENABLE_PYTORCH_PROFILER. + * @param[out] output_dir Output directory for profiler traces. Read from + *DP_PYTORCH_PROFILER_OUTPUT_DIR. + **/ +void get_env_pytorch_profiler(bool& enable_profiler, std::string& output_dir); + /** * @brief Dynamically load OP library. This should be called before loading * graphs. diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index 0f3a72b87f..0b070f25a2 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -46,11 +46,11 @@ torch::Tensor createNlistTensor(const std::vector>& data) { int nnei = nloc > 0 ? total_size / nloc : 0; return flat_tensor.view({1, nloc, nnei}); } -DeepPotPT::DeepPotPT() : inited(false) {} +DeepPotPT::DeepPotPT() : inited(false), profiler_enabled(false) {} DeepPotPT::DeepPotPT(const std::string& model, const int& gpu_rank, const std::string& file_content) - : inited(false) { + : inited(false), profiler_enabled(false) { try { translate_error([&] { init(model, gpu_rank, file_content); }); } catch (...) { @@ -110,6 +110,22 @@ void DeepPotPT::init(const std::string& model, } } + // Initialize PyTorch profiler + get_env_pytorch_profiler(profiler_enabled, profiler_output_dir); + if (profiler_enabled) { +#ifdef BUILD_PYTORCH + // Create output directory if it doesn't exist + std::string mkdir_cmd = "mkdir -p " + profiler_output_dir; + std::system(mkdir_cmd.c_str()); + + std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; + profiler = std::make_unique( + profiler_output_dir + "/pytorch_profiler_trace.json"); +#else + std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; +#endif + } + auto rcut_ = module.run_method("get_rcut").toDouble(); rcut = static_cast(rcut_); ntypes = module.run_method("get_ntypes").toInt(); @@ -234,6 +250,14 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, options) .to(device); } + + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif + c10::Dict outputs = (do_message_passing) ? module @@ -383,6 +407,14 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, inputs.push_back(aparam_tensor); bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); + + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif + c10::Dict outputs = module.forward(inputs).toGenericDict(); c10::IValue energy_ = outputs.at("energy"); diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index 8ccf2fd383..1c8791b85c 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -46,11 +46,11 @@ torch::Tensor createNlistTensor2(const std::vector>& data) { int nnei = nloc > 0 ? total_size / nloc : 0; return flat_tensor.view({1, nloc, nnei}); } -DeepSpinPT::DeepSpinPT() : inited(false) {} +DeepSpinPT::DeepSpinPT() : inited(false), profiler_enabled(false) {} DeepSpinPT::DeepSpinPT(const std::string& model, const int& gpu_rank, const std::string& file_content) - : inited(false) { + : inited(false), profiler_enabled(false) { try { translate_error([&] { init(model, gpu_rank, file_content); }); } catch (...) { @@ -110,6 +110,22 @@ void DeepSpinPT::init(const std::string& model, } } + // Initialize PyTorch profiler + get_env_pytorch_profiler(profiler_enabled, profiler_output_dir); + if (profiler_enabled) { +#ifdef BUILD_PYTORCH + // Create output directory if it doesn't exist + std::string mkdir_cmd = "mkdir -p " + profiler_output_dir; + std::system(mkdir_cmd.c_str()); + + std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; + profiler = std::make_unique( + profiler_output_dir + "/pytorch_profiler_trace.json"); +#else + std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; +#endif + } + auto rcut_ = module.run_method("get_rcut").toDouble(); rcut = static_cast(rcut_); ntypes = module.run_method("get_ntypes").toInt(); @@ -410,6 +426,14 @@ void DeepSpinPT::compute(ENERGYVTYPE& ener, inputs.push_back(aparam_tensor); bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); + + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif + c10::Dict outputs = module.forward(inputs).toGenericDict(); c10::IValue energy_ = outputs.at("energy"); diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index eace577f89..2ecc0085ab 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -381,6 +381,23 @@ void deepmd::get_env_nthreads(int& num_intra_nthreads, } } +void deepmd::get_env_pytorch_profiler(bool& enable_profiler, std::string& output_dir) { + enable_profiler = false; + output_dir = "./profiler_output"; // default directory + + const char* env_enable = std::getenv("DP_ENABLE_PYTORCH_PROFILER"); + if (env_enable && + std::string(env_enable) != std::string("") && + (std::string(env_enable) == "1" || std::string(env_enable) == "true")) { + enable_profiler = true; + } + + const char* env_output_dir = std::getenv("DP_PYTORCH_PROFILER_OUTPUT_DIR"); + if (env_output_dir && std::string(env_output_dir) != std::string("")) { + output_dir = std::string(env_output_dir); + } +} + static inline void _load_library_path(std::string dso_path) { #if defined(_WIN32) void* dso_handle = LoadLibrary(dso_path.c_str()); diff --git a/source/api_cc/tests/test_pytorch_profiler.cc b/source/api_cc/tests/test_pytorch_profiler.cc new file mode 100644 index 0000000000..ca07cfbc11 --- /dev/null +++ b/source/api_cc/tests/test_pytorch_profiler.cc @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#include + +#include +#include + +#include "common.h" + +class TestPyTorchProfiler : public ::testing::Test { + protected: + void SetUp() override { + // Clean any existing environment variables + unsetenv("DP_ENABLE_PYTORCH_PROFILER"); + unsetenv("DP_PYTORCH_PROFILER_OUTPUT_DIR"); + } + + void TearDown() override { + // Clean up environment variables + unsetenv("DP_ENABLE_PYTORCH_PROFILER"); + unsetenv("DP_PYTORCH_PROFILER_OUTPUT_DIR"); + } +}; + +TEST_F(TestPyTorchProfiler, test_profiler_disabled_by_default) { + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_FALSE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_profiler_enabled_with_env) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "1", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_TRUE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_profiler_enabled_with_true) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "true", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_TRUE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_custom_output_dir) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "1", 1); + setenv("DP_PYTORCH_PROFILER_OUTPUT_DIR", "/custom/path", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_TRUE(enable_profiler); + EXPECT_EQ(output_dir, "/custom/path"); +} + +TEST_F(TestPyTorchProfiler, test_profiler_disabled_with_zero) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "0", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_FALSE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} \ No newline at end of file From eeb7c0668612a381a93fbe7c973c322cf8ff95a2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 05:19:05 +0000 Subject: [PATCH 03/11] Fix PyTorch profiler API usage and add integration documentation Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- PYTORCH_PROFILER_INTEGRATION.md | 32 ++++++++++++++++++++++++++++++++ source/api_cc/src/DeepPotPT.cc | 33 ++++++++++++++++++--------------- source/api_cc/src/DeepSpinPT.cc | 26 +++++++++++++++++--------- 3 files changed, 67 insertions(+), 24 deletions(-) create mode 100644 PYTORCH_PROFILER_INTEGRATION.md diff --git a/PYTORCH_PROFILER_INTEGRATION.md b/PYTORCH_PROFILER_INTEGRATION.md new file mode 100644 index 0000000000..b97c2eeb5e --- /dev/null +++ b/PYTORCH_PROFILER_INTEGRATION.md @@ -0,0 +1,32 @@ +# PyTorch C++ Profiler Integration Test + +This test demonstrates the PyTorch profiler integration with the C++ backend. + +## Usage + +1. Set environment variables: +```bash +export DP_ENABLE_PYTORCH_PROFILER=1 +export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results +``` + +2. Run your DeepMD-kit C++ application + +3. Check for profiler output in the specified directory: +```bash +ls -la ./profiler_results/pytorch_profiler_trace.json +``` + +## Environment Variables + +- `DP_ENABLE_PYTORCH_PROFILER`: Set to `1` or `true` to enable profiling +- `DP_PYTORCH_PROFILER_OUTPUT_DIR`: Directory for profiler output (default: `./profiler_output`) + +## Implementation Details + +The profiler uses PyTorch's `torch::autograd::profiler::RecordProfile` and automatically: +- Creates the output directory if it doesn't exist +- Profiles all forward pass operations in DeepPotPT and DeepSpinPT +- Saves profiling results to a JSON file when the object is destroyed + +This is intended for development and debugging purposes. \ No newline at end of file diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index 0b070f25a2..3a32163671 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -119,8 +119,8 @@ void DeepPotPT::init(const std::string& model, std::system(mkdir_cmd.c_str()); std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; - profiler = std::make_unique( - profiler_output_dir + "/pytorch_profiler_trace.json"); + // Initialize profiler with default configuration + profiler = std::make_unique(); #else std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; #endif @@ -135,7 +135,20 @@ void DeepPotPT::init(const std::string& model, aparam_nall = module.run_method("is_aparam_nall").toBool(); inited = true; } -DeepPotPT::~DeepPotPT() {} +DeepPotPT::~DeepPotPT() { +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + try { + // Save profiler results to file + std::string output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + profiler->save(output_file); + std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + } catch (const std::exception& e) { + std::cerr << "Warning: Failed to save profiler results: " << e.what() << std::endl; + } + } +#endif +} template void DeepPotPT::compute(ENERGYVTYPE& ener, @@ -251,12 +264,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, .to(device); } - // Start profiling if enabled -#ifdef BUILD_PYTORCH - if (profiler_enabled && profiler) { - profiler->step(); - } -#endif + // Profiling is automatically active when RecordProfile is constructed c10::Dict outputs = (do_message_passing) @@ -408,12 +416,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); - // Start profiling if enabled -#ifdef BUILD_PYTORCH - if (profiler_enabled && profiler) { - profiler->step(); - } -#endif + // Profiling is automatically active when RecordProfile is constructed c10::Dict outputs = module.forward(inputs).toGenericDict(); diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index 1c8791b85c..4e80d41968 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -119,8 +119,8 @@ void DeepSpinPT::init(const std::string& model, std::system(mkdir_cmd.c_str()); std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; - profiler = std::make_unique( - profiler_output_dir + "/pytorch_profiler_trace.json"); + // Initialize profiler with default configuration + profiler = std::make_unique(); #else std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; #endif @@ -135,7 +135,20 @@ void DeepSpinPT::init(const std::string& model, aparam_nall = module.run_method("is_aparam_nall").toBool(); inited = true; } -DeepSpinPT::~DeepSpinPT() {} +DeepSpinPT::~DeepSpinPT() { +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + try { + // Save profiler results to file + std::string output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + profiler->save(output_file); + std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + } catch (const std::exception& e) { + std::cerr << "Warning: Failed to save profiler results: " << e.what() << std::endl; + } + } +#endif +} template void DeepSpinPT::compute(ENERGYVTYPE& ener, @@ -427,12 +440,7 @@ void DeepSpinPT::compute(ENERGYVTYPE& ener, bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); - // Start profiling if enabled -#ifdef BUILD_PYTORCH - if (profiler_enabled && profiler) { - profiler->step(); - } -#endif + // Profiling is automatically active when RecordProfile is constructed c10::Dict outputs = module.forward(inputs).toGenericDict(); From 737878fa81e3db01f7e900cb8010892aea6d1423 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 05:40:35 +0000 Subject: [PATCH 04/11] Update PyTorch profiler to use new torch::profiler API instead of deprecated torch::autograd::profiler Co-authored-by: caic99 <78061359+caic99@users.noreply.github.com> --- source/api_cc/include/DeepPotPT.h | 4 ++-- source/api_cc/include/DeepSpinPT.h | 4 ++-- source/api_cc/src/DeepPotPT.cc | 30 +++++++++++++++++++++++------- source/api_cc/src/DeepSpinPT.cc | 23 +++++++++++++++++------ 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/source/api_cc/include/DeepPotPT.h b/source/api_cc/include/DeepPotPT.h index ddcce2bda7..b353dbc7e5 100644 --- a/source/api_cc/include/DeepPotPT.h +++ b/source/api_cc/include/DeepPotPT.h @@ -4,7 +4,7 @@ #include #include #ifdef BUILD_PYTORCH -#include +#include #endif #include "DeepPot.h" @@ -347,7 +347,7 @@ class DeepPotPT : public DeepPotBackend { bool profiler_enabled; std::string profiler_output_dir; #ifdef BUILD_PYTORCH - std::unique_ptr profiler; + std::shared_ptr profiler_result; #endif /** * @brief Translate PyTorch exceptions to the DeePMD-kit exception. diff --git a/source/api_cc/include/DeepSpinPT.h b/source/api_cc/include/DeepSpinPT.h index 9b531dc084..c8c39b5905 100644 --- a/source/api_cc/include/DeepSpinPT.h +++ b/source/api_cc/include/DeepSpinPT.h @@ -4,7 +4,7 @@ #include #include #ifdef BUILD_PYTORCH -#include +#include #endif #include "DeepSpin.h" @@ -269,7 +269,7 @@ class DeepSpinPT : public DeepSpinBackend { bool profiler_enabled; std::string profiler_output_dir; #ifdef BUILD_PYTORCH - std::unique_ptr profiler; + std::shared_ptr profiler_result; #endif /** * @brief Translate PyTorch exceptions to the DeePMD-kit exception. diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index 3a32163671..b155e264ea 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -119,8 +119,11 @@ void DeepPotPT::init(const std::string& model, std::system(mkdir_cmd.c_str()); std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; - // Initialize profiler with default configuration - profiler = std::make_unique(); + // Start profiling using new API + torch::profiler::profile({ + torch::profiler::ProfilerActivity::CPU, + torch::profiler::ProfilerActivity::CUDA, + }, true, true, false); // record_shapes, profile_memory, with_stack #else std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; #endif @@ -137,12 +140,15 @@ void DeepPotPT::init(const std::string& model, } DeepPotPT::~DeepPotPT() { #ifdef BUILD_PYTORCH - if (profiler_enabled && profiler) { + if (profiler_enabled) { try { // Save profiler results to file std::string output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; - profiler->save(output_file); - std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + profiler_result = torch::profiler::disableProfiler(); + if (profiler_result) { + profiler_result->save(output_file); + std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + } } catch (const std::exception& e) { std::cerr << "Warning: Failed to save profiler results: " << e.what() << std::endl; } @@ -264,7 +270,12 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, .to(device); } - // Profiling is automatically active when RecordProfile is constructed + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif c10::Dict outputs = (do_message_passing) @@ -416,7 +427,12 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); - // Profiling is automatically active when RecordProfile is constructed + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif c10::Dict outputs = module.forward(inputs).toGenericDict(); diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index 4e80d41968..e4f1aa6f17 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -119,8 +119,11 @@ void DeepSpinPT::init(const std::string& model, std::system(mkdir_cmd.c_str()); std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; - // Initialize profiler with default configuration - profiler = std::make_unique(); + // Start profiling using new API + torch::profiler::profile({ + torch::profiler::ProfilerActivity::CPU, + torch::profiler::ProfilerActivity::CUDA, + }, true, true, false); // record_shapes, profile_memory, with_stack #else std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; #endif @@ -137,12 +140,15 @@ void DeepSpinPT::init(const std::string& model, } DeepSpinPT::~DeepSpinPT() { #ifdef BUILD_PYTORCH - if (profiler_enabled && profiler) { + if (profiler_enabled) { try { // Save profiler results to file std::string output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; - profiler->save(output_file); - std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + profiler_result = torch::profiler::disableProfiler(); + if (profiler_result) { + profiler_result->save(output_file); + std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + } } catch (const std::exception& e) { std::cerr << "Warning: Failed to save profiler results: " << e.what() << std::endl; } @@ -440,7 +446,12 @@ void DeepSpinPT::compute(ENERGYVTYPE& ener, bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); - // Profiling is automatically active when RecordProfile is constructed + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif c10::Dict outputs = module.forward(inputs).toGenericDict(); From a87995528f4546714285124493262e68e9a93450 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 05:41:16 +0000 Subject: [PATCH 05/11] Update documentation to reflect new PyTorch profiler API Co-authored-by: caic99 <78061359+caic99@users.noreply.github.com> --- PYTORCH_PROFILER_INTEGRATION.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PYTORCH_PROFILER_INTEGRATION.md b/PYTORCH_PROFILER_INTEGRATION.md index b97c2eeb5e..9b56780cf5 100644 --- a/PYTORCH_PROFILER_INTEGRATION.md +++ b/PYTORCH_PROFILER_INTEGRATION.md @@ -24,7 +24,7 @@ ls -la ./profiler_results/pytorch_profiler_trace.json ## Implementation Details -The profiler uses PyTorch's `torch::autograd::profiler::RecordProfile` and automatically: +The profiler uses PyTorch's modern `torch::profiler` API and automatically: - Creates the output directory if it doesn't exist - Profiles all forward pass operations in DeepPotPT and DeepSpinPT - Saves profiling results to a JSON file when the object is destroyed From 101a901e1e53bcfd5cfa1a64e5b6a247a37298c6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 05:55:53 +0000 Subject: [PATCH 06/11] Add MPI rank support for PyTorch profiler output files Co-authored-by: caic99 <78061359+caic99@users.noreply.github.com> --- PYTORCH_PROFILER_INTEGRATION.md | 12 +++++++++++ source/api_cc/include/common.h | 6 ++++++ source/api_cc/src/DeepPotPT.cc | 12 +++++++++-- source/api_cc/src/DeepSpinPT.cc | 12 +++++++++-- source/api_cc/src/common.cc | 21 ++++++++++++++++++++ source/api_cc/tests/test_pytorch_profiler.cc | 6 ++++++ 6 files changed, 65 insertions(+), 4 deletions(-) diff --git a/PYTORCH_PROFILER_INTEGRATION.md b/PYTORCH_PROFILER_INTEGRATION.md index 9b56780cf5..f2d7ea5804 100644 --- a/PYTORCH_PROFILER_INTEGRATION.md +++ b/PYTORCH_PROFILER_INTEGRATION.md @@ -14,7 +14,11 @@ export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results 3. Check for profiler output in the specified directory: ```bash +# For single-rank or non-MPI usage ls -la ./profiler_results/pytorch_profiler_trace.json + +# For MPI usage, each rank gets its own file +ls -la ./profiler_results/pytorch_profiler_trace_rank*.json ``` ## Environment Variables @@ -28,5 +32,13 @@ The profiler uses PyTorch's modern `torch::profiler` API and automatically: - Creates the output directory if it doesn't exist - Profiles all forward pass operations in DeepPotPT and DeepSpinPT - Saves profiling results to a JSON file when the object is destroyed +- Automatically includes MPI rank in filename when MPI is available and initialized + +## Output Files + +- **Single-rank or non-MPI usage**: `pytorch_profiler_trace.json` +- **MPI usage**: `pytorch_profiler_trace_rank{rank}.json` (e.g., `pytorch_profiler_trace_rank0.json`, `pytorch_profiler_trace_rank1.json`) + +This ensures that each MPI rank saves its profiling data to a separate file, preventing conflicts in multi-rank simulations. This is intended for development and debugging purposes. \ No newline at end of file diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h index f485666c86..566fce097b 100644 --- a/source/api_cc/include/common.h +++ b/source/api_cc/include/common.h @@ -172,6 +172,12 @@ void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads); **/ void get_env_pytorch_profiler(bool& enable_profiler, std::string& output_dir); +/** + * @brief Get MPI rank if MPI is available and initialized, otherwise return 0. + * @return The MPI rank or 0 if MPI is not available/initialized. + **/ +int get_mpi_rank(); + /** * @brief Dynamically load OP library. This should be called before loading * graphs. diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index b155e264ea..d407e6b2b5 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -142,8 +142,16 @@ DeepPotPT::~DeepPotPT() { #ifdef BUILD_PYTORCH if (profiler_enabled) { try { - // Save profiler results to file - std::string output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + // Save profiler results to file with MPI rank if available + int rank = get_mpi_rank(); + std::string output_file; + if (rank >= 0) { + // MPI is available and initialized, include rank in filename + output_file = profiler_output_dir + "/pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"; + } else { + // MPI not available or not initialized, use original filename + output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + } profiler_result = torch::profiler::disableProfiler(); if (profiler_result) { profiler_result->save(output_file); diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index e4f1aa6f17..2ca7446384 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -142,8 +142,16 @@ DeepSpinPT::~DeepSpinPT() { #ifdef BUILD_PYTORCH if (profiler_enabled) { try { - // Save profiler results to file - std::string output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + // Save profiler results to file with MPI rank if available + int rank = get_mpi_rank(); + std::string output_file; + if (rank >= 0) { + // MPI is available and initialized, include rank in filename + output_file = profiler_output_dir + "/pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"; + } else { + // MPI not available or not initialized, use original filename + output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + } profiler_result = torch::profiler::disableProfiler(); if (profiler_result) { profiler_result->save(output_file); diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index 2ecc0085ab..5d5b6a0a3f 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -8,6 +8,13 @@ #include #include +// Try to include MPI if available - this will be a no-op if MPI is not available +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + #include "AtomMap.h" #include "device.h" #if defined(_WIN32) @@ -398,6 +405,20 @@ void deepmd::get_env_pytorch_profiler(bool& enable_profiler, std::string& output } } +int deepmd::get_mpi_rank() { + int rank = -1; // Use -1 to indicate MPI not available/initialized + // Try to detect MPI at runtime + #ifdef MPI_H + int initialized = 0; + if (MPI_Initialized(&initialized) == MPI_SUCCESS && initialized) { + if (MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) { + rank = -1; // fallback to -1 if MPI_Comm_rank fails + } + } + #endif + return rank; +} + static inline void _load_library_path(std::string dso_path) { #if defined(_WIN32) void* dso_handle = LoadLibrary(dso_path.c_str()); diff --git a/source/api_cc/tests/test_pytorch_profiler.cc b/source/api_cc/tests/test_pytorch_profiler.cc index ca07cfbc11..70e0690de3 100644 --- a/source/api_cc/tests/test_pytorch_profiler.cc +++ b/source/api_cc/tests/test_pytorch_profiler.cc @@ -73,4 +73,10 @@ TEST_F(TestPyTorchProfiler, test_profiler_disabled_with_zero) { EXPECT_FALSE(enable_profiler); EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_mpi_rank_detection) { + // Test that MPI rank detection returns valid rank (-1 when MPI not initialized, >= 0 when initialized) + int rank = deepmd::get_mpi_rank(); + EXPECT_GE(rank, -1); // Rank should be -1 (not available) or >= 0 (valid rank) } \ No newline at end of file From 4c60b922eb860444e16856ce11bbc0f76463aff8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 06:35:49 +0000 Subject: [PATCH 07/11] Fix cross-platform compatibility for directory creation and path handling Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- .../development/pytorch-profiler.md | 0 source/api_cc/include/common.h | 19 +++++++- source/api_cc/src/DeepPotPT.cc | 9 ++-- source/api_cc/src/DeepSpinPT.cc | 9 ++-- source/api_cc/src/common.cc | 48 +++++++++++++++++++ 5 files changed, 75 insertions(+), 10 deletions(-) rename PYTORCH_PROFILER_INTEGRATION.md => doc/development/pytorch-profiler.md (100%) diff --git a/PYTORCH_PROFILER_INTEGRATION.md b/doc/development/pytorch-profiler.md similarity index 100% rename from PYTORCH_PROFILER_INTEGRATION.md rename to doc/development/pytorch-profiler.md diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h index 566fce097b..a3550b7d12 100644 --- a/source/api_cc/include/common.h +++ b/source/api_cc/include/common.h @@ -173,11 +173,26 @@ void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads); void get_env_pytorch_profiler(bool& enable_profiler, std::string& output_dir); /** - * @brief Get MPI rank if MPI is available and initialized, otherwise return 0. - * @return The MPI rank or 0 if MPI is not available/initialized. + * @brief Get MPI rank if MPI is available and initialized, otherwise return -1. + * @return The MPI rank or -1 if MPI is not available/initialized. **/ int get_mpi_rank(); +/** + * @brief Create directories recursively in a cross-platform way. + * @param path The path to create. + * @return true if successful or directory already exists, false otherwise. + **/ +bool create_directories(const std::string& path); + +/** + * @brief Join two path components using platform-appropriate separator. + * @param path1 The first path component. + * @param path2 The second path component. + * @return The joined path. + **/ +std::string join_path(const std::string& path1, const std::string& path2); + /** * @brief Dynamically load OP library. This should be called before loading * graphs. diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index d407e6b2b5..b8c933eb6a 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -115,8 +115,9 @@ void DeepPotPT::init(const std::string& model, if (profiler_enabled) { #ifdef BUILD_PYTORCH // Create output directory if it doesn't exist - std::string mkdir_cmd = "mkdir -p " + profiler_output_dir; - std::system(mkdir_cmd.c_str()); + if (!create_directories(profiler_output_dir)) { + std::cerr << "Warning: Failed to create profiler output directory: " << profiler_output_dir << std::endl; + } std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; // Start profiling using new API @@ -147,10 +148,10 @@ DeepPotPT::~DeepPotPT() { std::string output_file; if (rank >= 0) { // MPI is available and initialized, include rank in filename - output_file = profiler_output_dir + "/pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"; + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"); } else { // MPI not available or not initialized, use original filename - output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace.json"); } profiler_result = torch::profiler::disableProfiler(); if (profiler_result) { diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index 2ca7446384..14198f70eb 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -115,8 +115,9 @@ void DeepSpinPT::init(const std::string& model, if (profiler_enabled) { #ifdef BUILD_PYTORCH // Create output directory if it doesn't exist - std::string mkdir_cmd = "mkdir -p " + profiler_output_dir; - std::system(mkdir_cmd.c_str()); + if (!create_directories(profiler_output_dir)) { + std::cerr << "Warning: Failed to create profiler output directory: " << profiler_output_dir << std::endl; + } std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; // Start profiling using new API @@ -147,10 +148,10 @@ DeepSpinPT::~DeepSpinPT() { std::string output_file; if (rank >= 0) { // MPI is available and initialized, include rank in filename - output_file = profiler_output_dir + "/pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"; + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"); } else { // MPI not available or not initialized, use original filename - output_file = profiler_output_dir + "/pytorch_profiler_trace.json"; + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace.json"); } profiler_result = torch::profiler::disableProfiler(); if (profiler_result) { diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index 5d5b6a0a3f..e44dc874f8 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include // Try to include MPI if available - this will be a no-op if MPI is not available #ifdef __has_include @@ -27,6 +30,7 @@ #define PSAPI_VERSION 2 #include #include +#include // for _mkdir #define O_RDONLY _O_RDONLY #else // not windows @@ -419,6 +423,50 @@ int deepmd::get_mpi_rank() { return rank; } +bool deepmd::create_directories(const std::string& path) { + if (path.empty()) { + return false; + } + + // Check if directory already exists + struct stat st; + if (stat(path.c_str(), &st) == 0) { + return S_ISDIR(st.st_mode); + } + + // Find the parent directory + size_t pos = path.find_last_of("/\\"); + if (pos != std::string::npos && pos > 0) { + std::string parent = path.substr(0, pos); + if (!create_directories(parent)) { + return false; + } + } + + // Create this directory +#if defined(_WIN32) + return _mkdir(path.c_str()) == 0 || errno == EEXIST; +#else + return mkdir(path.c_str(), 0755) == 0 || errno == EEXIST; +#endif +} + +std::string deepmd::join_path(const std::string& path1, const std::string& path2) { + if (path1.empty()) return path2; + if (path2.empty()) return path1; + + char sep = '/'; +#if defined(_WIN32) + sep = '\\'; +#endif + + if (path1.back() == '/' || path1.back() == '\\') { + return path1 + path2; + } else { + return path1 + sep + path2; + } +} + static inline void _load_library_path(std::string dso_path) { #if defined(_WIN32) void* dso_handle = LoadLibrary(dso_path.c_str()); From e422ee22e9d07f135a632dcdb59196ba05246a46 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 06:38:57 +0000 Subject: [PATCH 08/11] Remove MPI dependency from api_cc library to avoid linking issues Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- doc/development/pytorch-profiler.md | 16 +++++++++------- source/api_cc/include/common.h | 4 ++-- source/api_cc/src/common.cc | 25 ++++++++----------------- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/doc/development/pytorch-profiler.md b/doc/development/pytorch-profiler.md index f2d7ea5804..b92d4178de 100644 --- a/doc/development/pytorch-profiler.md +++ b/doc/development/pytorch-profiler.md @@ -14,11 +14,15 @@ export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results 3. Check for profiler output in the specified directory: ```bash -# For single-rank or non-MPI usage ls -la ./profiler_results/pytorch_profiler_trace.json +``` -# For MPI usage, each rank gets its own file -ls -la ./profiler_results/pytorch_profiler_trace_rank*.json +For MPI applications, you can use different output directories per rank: +```bash +# Example for rank 0 +export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results_rank0 +# Example for rank 1 +export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results_rank1 ``` ## Environment Variables @@ -32,13 +36,11 @@ The profiler uses PyTorch's modern `torch::profiler` API and automatically: - Creates the output directory if it doesn't exist - Profiles all forward pass operations in DeepPotPT and DeepSpinPT - Saves profiling results to a JSON file when the object is destroyed -- Automatically includes MPI rank in filename when MPI is available and initialized ## Output Files -- **Single-rank or non-MPI usage**: `pytorch_profiler_trace.json` -- **MPI usage**: `pytorch_profiler_trace_rank{rank}.json` (e.g., `pytorch_profiler_trace_rank0.json`, `pytorch_profiler_trace_rank1.json`) +- **All usage**: `pytorch_profiler_trace.json` -This ensures that each MPI rank saves its profiling data to a separate file, preventing conflicts in multi-rank simulations. +For MPI applications, users can distinguish between ranks by setting different output directories per rank using the `DP_PYTORCH_PROFILER_OUTPUT_DIR` environment variable. This is intended for development and debugging purposes. \ No newline at end of file diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h index a3550b7d12..215bb42030 100644 --- a/source/api_cc/include/common.h +++ b/source/api_cc/include/common.h @@ -173,8 +173,8 @@ void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads); void get_env_pytorch_profiler(bool& enable_profiler, std::string& output_dir); /** - * @brief Get MPI rank if MPI is available and initialized, otherwise return -1. - * @return The MPI rank or -1 if MPI is not available/initialized. + * @brief Get MPI rank. Currently disabled in api_cc to avoid MPI linking dependencies. + * @return Always returns -1. Users can distinguish ranks using different output directories. **/ int get_mpi_rank(); diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index e44dc874f8..883e1c0c8c 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -11,12 +11,10 @@ #include #include -// Try to include MPI if available - this will be a no-op if MPI is not available -#ifdef __has_include - #if __has_include() - #include - #endif -#endif +// Note: MPI rank detection has been removed from api_cc library +// to avoid MPI linking dependencies. The profiler will use a generic +// filename. Users can still distinguish between ranks by using different +// output directories per rank if needed. #include "AtomMap.h" #include "device.h" @@ -410,17 +408,10 @@ void deepmd::get_env_pytorch_profiler(bool& enable_profiler, std::string& output } int deepmd::get_mpi_rank() { - int rank = -1; // Use -1 to indicate MPI not available/initialized - // Try to detect MPI at runtime - #ifdef MPI_H - int initialized = 0; - if (MPI_Initialized(&initialized) == MPI_SUCCESS && initialized) { - if (MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) { - rank = -1; // fallback to -1 if MPI_Comm_rank fails - } - } - #endif - return rank; + // MPI rank detection removed from api_cc to avoid MPI linking dependencies + // Always return -1 to indicate no MPI rank available + // Users can distinguish between ranks by using different output directories + return -1; } bool deepmd::create_directories(const std::string& path) { From e28bb867bbcd0dfb0835d7a00e4b661e73966cdb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 23 Aug 2025 02:56:35 +0000 Subject: [PATCH 09/11] Add MPI linking to api_cc library with USE_MPI macro for conditional compilation Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- doc/development/pytorch-profiler.md | 10 ++++++++-- source/api_cc/CMakeLists.txt | 15 +++++++++++++++ source/api_cc/src/common.cc | 22 +++++++++++++++------- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/doc/development/pytorch-profiler.md b/doc/development/pytorch-profiler.md index b92d4178de..308a172cae 100644 --- a/doc/development/pytorch-profiler.md +++ b/doc/development/pytorch-profiler.md @@ -14,7 +14,11 @@ export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results 3. Check for profiler output in the specified directory: ```bash +# For single-rank or non-MPI usage ls -la ./profiler_results/pytorch_profiler_trace.json + +# For MPI usage, each rank gets its own file +ls -la ./profiler_results/pytorch_profiler_trace_rank*.json ``` For MPI applications, you can use different output directories per rank: @@ -36,11 +40,13 @@ The profiler uses PyTorch's modern `torch::profiler` API and automatically: - Creates the output directory if it doesn't exist - Profiles all forward pass operations in DeepPotPT and DeepSpinPT - Saves profiling results to a JSON file when the object is destroyed +- Automatically includes MPI rank in filename when MPI is available and initialized ## Output Files -- **All usage**: `pytorch_profiler_trace.json` +- **Single-rank or non-MPI usage**: `pytorch_profiler_trace.json` +- **MPI usage**: `pytorch_profiler_trace_rank{rank}.json` (e.g., `pytorch_profiler_trace_rank0.json`, `pytorch_profiler_trace_rank1.json`) -For MPI applications, users can distinguish between ranks by setting different output directories per rank using the `DP_PYTORCH_PROFILER_OUTPUT_DIR` environment variable. +This ensures that each MPI rank saves its profiling data to a separate file, preventing conflicts in multi-rank simulations. This is intended for development and debugging purposes. \ No newline at end of file diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt index 90b7c08449..b142bfa12d 100644 --- a/source/api_cc/CMakeLists.txt +++ b/source/api_cc/CMakeLists.txt @@ -49,6 +49,21 @@ set_target_properties( INSTALL_RPATH_USE_LINK_PATH TRUE BUILD_RPATH "$ORIGIN/../op/tf;$ORIGIN/../op/pt;$ORIGIN/../op/pd") target_compile_definitions(${libname} PRIVATE TF_PRIVATE) +find_package(MPI) +if(MPI_FOUND) + include(CheckCXXSymbolExists) + set(CMAKE_REQUIRED_INCLUDES ${MPI_CXX_INCLUDE_DIRS}) + set(CMAKE_REQUIRED_LIBRARIES ${MPI_CXX_LIBRARIES}) + check_cxx_symbol_exists(MPIX_Query_cuda_support "mpi.h" CUDA_AWARE) + if(NOT CUDA_AWARE) + check_cxx_symbol_exists(MPIX_Query_cuda_support "mpi.h;mpi-ext.h" OMP_CUDA) + if(NOT OMP_CUDA) + target_compile_definitions(${libname} PRIVATE NO_CUDA_AWARE) + endif() + endif() + target_link_libraries(${libname} PRIVATE MPI::MPI_CXX) + target_compile_definitions(${libname} PRIVATE USE_MPI) +endif() if(CMAKE_TESTING_ENABLED) target_link_libraries(${libname} PRIVATE coverage_config) endif() diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index 883e1c0c8c..1c2ca57956 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -11,10 +11,9 @@ #include #include -// Note: MPI rank detection has been removed from api_cc library -// to avoid MPI linking dependencies. The profiler will use a generic -// filename. Users can still distinguish between ranks by using different -// output directories per rank if needed. +#ifdef USE_MPI +#include +#endif #include "AtomMap.h" #include "device.h" @@ -408,10 +407,19 @@ void deepmd::get_env_pytorch_profiler(bool& enable_profiler, std::string& output } int deepmd::get_mpi_rank() { - // MPI rank detection removed from api_cc to avoid MPI linking dependencies - // Always return -1 to indicate no MPI rank available - // Users can distinguish between ranks by using different output directories +#ifdef USE_MPI + int rank = -1; // Use -1 to indicate MPI not available/initialized + int initialized = 0; + if (MPI_Initialized(&initialized) == MPI_SUCCESS && initialized) { + if (MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) { + rank = -1; // fallback to -1 if MPI_Comm_rank fails + } + } + return rank; +#else + // MPI not available at compile time return -1; +#endif } bool deepmd::create_directories(const std::string& path) { From 284fc6939c9c9bc9556c4225b5fa5335696b775c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 23 Aug 2025 03:05:31 +0000 Subject: [PATCH 10/11] Remove unused MPIX_Query_cuda_support from api_cc CMakeLists.txt Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- source/api_cc/CMakeLists.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt index b142bfa12d..956aafc226 100644 --- a/source/api_cc/CMakeLists.txt +++ b/source/api_cc/CMakeLists.txt @@ -51,16 +51,6 @@ set_target_properties( target_compile_definitions(${libname} PRIVATE TF_PRIVATE) find_package(MPI) if(MPI_FOUND) - include(CheckCXXSymbolExists) - set(CMAKE_REQUIRED_INCLUDES ${MPI_CXX_INCLUDE_DIRS}) - set(CMAKE_REQUIRED_LIBRARIES ${MPI_CXX_LIBRARIES}) - check_cxx_symbol_exists(MPIX_Query_cuda_support "mpi.h" CUDA_AWARE) - if(NOT CUDA_AWARE) - check_cxx_symbol_exists(MPIX_Query_cuda_support "mpi.h;mpi-ext.h" OMP_CUDA) - if(NOT OMP_CUDA) - target_compile_definitions(${libname} PRIVATE NO_CUDA_AWARE) - endif() - endif() target_link_libraries(${libname} PRIVATE MPI::MPI_CXX) target_compile_definitions(${libname} PRIVATE USE_MPI) endif() From b18c1391f01ddca9ec09843c43106fac121ac76c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 23 Aug 2025 05:00:42 +0000 Subject: [PATCH 11/11] Remove unnecessary torch/profiler.h header includes Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- source/api_cc/include/DeepPotPT.h | 3 --- source/api_cc/include/DeepSpinPT.h | 3 --- 2 files changed, 6 deletions(-) diff --git a/source/api_cc/include/DeepPotPT.h b/source/api_cc/include/DeepPotPT.h index b353dbc7e5..52b92d1c9f 100644 --- a/source/api_cc/include/DeepPotPT.h +++ b/source/api_cc/include/DeepPotPT.h @@ -3,9 +3,6 @@ #include #include -#ifdef BUILD_PYTORCH -#include -#endif #include "DeepPot.h" diff --git a/source/api_cc/include/DeepSpinPT.h b/source/api_cc/include/DeepSpinPT.h index c8c39b5905..b4279b0679 100644 --- a/source/api_cc/include/DeepSpinPT.h +++ b/source/api_cc/include/DeepSpinPT.h @@ -3,9 +3,6 @@ #include #include -#ifdef BUILD_PYTORCH -#include -#endif #include "DeepSpin.h"