diff --git a/doc/development/pytorch-profiler.md b/doc/development/pytorch-profiler.md new file mode 100644 index 0000000000..308a172cae --- /dev/null +++ b/doc/development/pytorch-profiler.md @@ -0,0 +1,52 @@ +# PyTorch C++ Profiler Integration Test + +This test demonstrates the PyTorch profiler integration with the C++ backend. + +## Usage + +1. Set environment variables: +```bash +export DP_ENABLE_PYTORCH_PROFILER=1 +export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results +``` + +2. Run your DeepMD-kit C++ application + +3. Check for profiler output in the specified directory: +```bash +# For single-rank or non-MPI usage +ls -la ./profiler_results/pytorch_profiler_trace.json + +# For MPI usage, each rank gets its own file +ls -la ./profiler_results/pytorch_profiler_trace_rank*.json +``` + +For MPI applications, you can use different output directories per rank: +```bash +# Example for rank 0 +export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results_rank0 +# Example for rank 1 +export DP_PYTORCH_PROFILER_OUTPUT_DIR=./profiler_results_rank1 +``` + +## Environment Variables + +- `DP_ENABLE_PYTORCH_PROFILER`: Set to `1` or `true` to enable profiling +- `DP_PYTORCH_PROFILER_OUTPUT_DIR`: Directory for profiler output (default: `./profiler_output`) + +## Implementation Details + +The profiler uses PyTorch's modern `torch::profiler` API and automatically: +- Creates the output directory if it doesn't exist +- Profiles all forward pass operations in DeepPotPT and DeepSpinPT +- Saves profiling results to a JSON file when the object is destroyed +- Automatically includes MPI rank in filename when MPI is available and initialized + +## Output Files + +- **Single-rank or non-MPI usage**: `pytorch_profiler_trace.json` +- **MPI usage**: `pytorch_profiler_trace_rank{rank}.json` (e.g., `pytorch_profiler_trace_rank0.json`, `pytorch_profiler_trace_rank1.json`) + +This ensures that each MPI rank saves its profiling data to a separate file, preventing conflicts in multi-rank simulations. + +This is intended for development and debugging purposes. \ No newline at end of file diff --git a/doc/env.md b/doc/env.md index 4ca7101236..28d777910b 100644 --- a/doc/env.md +++ b/doc/env.md @@ -90,3 +90,19 @@ These environment variables also apply to third-party programs using the C++ int List of customized OP plugin libraries to load, such as `/path/to/plugin1.so:/path/to/plugin2.so` on Linux and `/path/to/plugin1.dll;/path/to/plugin2.dll` on Windows. ::: + +:::{envvar} DP_ENABLE_PYTORCH_PROFILER + +**Choices**: `0`, `1`, `true`; **Default**: `0` + +{{ pytorch_icon }} Enable PyTorch profiler for C++ backend. This is for development purposes. + +::: + +:::{envvar} DP_PYTORCH_PROFILER_OUTPUT_DIR + +**Default**: `./profiler_output` + +{{ pytorch_icon }} Output directory for PyTorch profiler traces when `DP_ENABLE_PYTORCH_PROFILER` is enabled. + +::: diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt index 90b7c08449..956aafc226 100644 --- a/source/api_cc/CMakeLists.txt +++ b/source/api_cc/CMakeLists.txt @@ -49,6 +49,11 @@ set_target_properties( INSTALL_RPATH_USE_LINK_PATH TRUE BUILD_RPATH "$ORIGIN/../op/tf;$ORIGIN/../op/pt;$ORIGIN/../op/pd") target_compile_definitions(${libname} PRIVATE TF_PRIVATE) +find_package(MPI) +if(MPI_FOUND) + target_link_libraries(${libname} PRIVATE MPI::MPI_CXX) + target_compile_definitions(${libname} PRIVATE USE_MPI) +endif() if(CMAKE_TESTING_ENABLED) target_link_libraries(${libname} PRIVATE coverage_config) endif() diff --git a/source/api_cc/include/DeepPotPT.h b/source/api_cc/include/DeepPotPT.h index 207a13286c..52b92d1c9f 100644 --- a/source/api_cc/include/DeepPotPT.h +++ b/source/api_cc/include/DeepPotPT.h @@ -340,6 +340,12 @@ class DeepPotPT : public DeepPotBackend { at::Tensor firstneigh_tensor; c10::optional mapping_tensor; torch::Dict comm_dict; + // PyTorch profiler + bool profiler_enabled; + std::string profiler_output_dir; +#ifdef BUILD_PYTORCH + std::shared_ptr profiler_result; +#endif /** * @brief Translate PyTorch exceptions to the DeePMD-kit exception. * @param[in] f The function to run. diff --git a/source/api_cc/include/DeepSpinPT.h b/source/api_cc/include/DeepSpinPT.h index be4c85d898..b4279b0679 100644 --- a/source/api_cc/include/DeepSpinPT.h +++ b/source/api_cc/include/DeepSpinPT.h @@ -262,6 +262,12 @@ class DeepSpinPT : public DeepSpinBackend { at::Tensor firstneigh_tensor; c10::optional mapping_tensor; torch::Dict comm_dict; + // PyTorch profiler + bool profiler_enabled; + std::string profiler_output_dir; +#ifdef BUILD_PYTORCH + std::shared_ptr profiler_result; +#endif /** * @brief Translate PyTorch exceptions to the DeePMD-kit exception. * @param[in] f The function to run. diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h index 612f699ea4..215bb42030 100644 --- a/source/api_cc/include/common.h +++ b/source/api_cc/include/common.h @@ -163,6 +163,36 @@ void select_map_inv(typename std::vector::iterator out, **/ void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads); +/** + * @brief Get PyTorch profiler configuration from environment variables. + * @param[out] enable_profiler Whether to enable the profiler. Read from + *DP_ENABLE_PYTORCH_PROFILER. + * @param[out] output_dir Output directory for profiler traces. Read from + *DP_PYTORCH_PROFILER_OUTPUT_DIR. + **/ +void get_env_pytorch_profiler(bool& enable_profiler, std::string& output_dir); + +/** + * @brief Get MPI rank. Currently disabled in api_cc to avoid MPI linking dependencies. + * @return Always returns -1. Users can distinguish ranks using different output directories. + **/ +int get_mpi_rank(); + +/** + * @brief Create directories recursively in a cross-platform way. + * @param path The path to create. + * @return true if successful or directory already exists, false otherwise. + **/ +bool create_directories(const std::string& path); + +/** + * @brief Join two path components using platform-appropriate separator. + * @param path1 The first path component. + * @param path2 The second path component. + * @return The joined path. + **/ +std::string join_path(const std::string& path1, const std::string& path2); + /** * @brief Dynamically load OP library. This should be called before loading * graphs. diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index 0f3a72b87f..b8c933eb6a 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -46,11 +46,11 @@ torch::Tensor createNlistTensor(const std::vector>& data) { int nnei = nloc > 0 ? total_size / nloc : 0; return flat_tensor.view({1, nloc, nnei}); } -DeepPotPT::DeepPotPT() : inited(false) {} +DeepPotPT::DeepPotPT() : inited(false), profiler_enabled(false) {} DeepPotPT::DeepPotPT(const std::string& model, const int& gpu_rank, const std::string& file_content) - : inited(false) { + : inited(false), profiler_enabled(false) { try { translate_error([&] { init(model, gpu_rank, file_content); }); } catch (...) { @@ -110,6 +110,26 @@ void DeepPotPT::init(const std::string& model, } } + // Initialize PyTorch profiler + get_env_pytorch_profiler(profiler_enabled, profiler_output_dir); + if (profiler_enabled) { +#ifdef BUILD_PYTORCH + // Create output directory if it doesn't exist + if (!create_directories(profiler_output_dir)) { + std::cerr << "Warning: Failed to create profiler output directory: " << profiler_output_dir << std::endl; + } + + std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; + // Start profiling using new API + torch::profiler::profile({ + torch::profiler::ProfilerActivity::CPU, + torch::profiler::ProfilerActivity::CUDA, + }, true, true, false); // record_shapes, profile_memory, with_stack +#else + std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; +#endif + } + auto rcut_ = module.run_method("get_rcut").toDouble(); rcut = static_cast(rcut_); ntypes = module.run_method("get_ntypes").toInt(); @@ -119,7 +139,31 @@ void DeepPotPT::init(const std::string& model, aparam_nall = module.run_method("is_aparam_nall").toBool(); inited = true; } -DeepPotPT::~DeepPotPT() {} +DeepPotPT::~DeepPotPT() { +#ifdef BUILD_PYTORCH + if (profiler_enabled) { + try { + // Save profiler results to file with MPI rank if available + int rank = get_mpi_rank(); + std::string output_file; + if (rank >= 0) { + // MPI is available and initialized, include rank in filename + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"); + } else { + // MPI not available or not initialized, use original filename + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace.json"); + } + profiler_result = torch::profiler::disableProfiler(); + if (profiler_result) { + profiler_result->save(output_file); + std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + } + } catch (const std::exception& e) { + std::cerr << "Warning: Failed to save profiler results: " << e.what() << std::endl; + } + } +#endif +} template void DeepPotPT::compute(ENERGYVTYPE& ener, @@ -234,6 +278,14 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, options) .to(device); } + + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif + c10::Dict outputs = (do_message_passing) ? module @@ -383,6 +435,14 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, inputs.push_back(aparam_tensor); bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); + + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif + c10::Dict outputs = module.forward(inputs).toGenericDict(); c10::IValue energy_ = outputs.at("energy"); diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index 8ccf2fd383..14198f70eb 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -46,11 +46,11 @@ torch::Tensor createNlistTensor2(const std::vector>& data) { int nnei = nloc > 0 ? total_size / nloc : 0; return flat_tensor.view({1, nloc, nnei}); } -DeepSpinPT::DeepSpinPT() : inited(false) {} +DeepSpinPT::DeepSpinPT() : inited(false), profiler_enabled(false) {} DeepSpinPT::DeepSpinPT(const std::string& model, const int& gpu_rank, const std::string& file_content) - : inited(false) { + : inited(false), profiler_enabled(false) { try { translate_error([&] { init(model, gpu_rank, file_content); }); } catch (...) { @@ -110,6 +110,26 @@ void DeepSpinPT::init(const std::string& model, } } + // Initialize PyTorch profiler + get_env_pytorch_profiler(profiler_enabled, profiler_output_dir); + if (profiler_enabled) { +#ifdef BUILD_PYTORCH + // Create output directory if it doesn't exist + if (!create_directories(profiler_output_dir)) { + std::cerr << "Warning: Failed to create profiler output directory: " << profiler_output_dir << std::endl; + } + + std::cout << "PyTorch profiler enabled. Output directory: " << profiler_output_dir << std::endl; + // Start profiling using new API + torch::profiler::profile({ + torch::profiler::ProfilerActivity::CPU, + torch::profiler::ProfilerActivity::CUDA, + }, true, true, false); // record_shapes, profile_memory, with_stack +#else + std::cerr << "Warning: PyTorch profiler requested but BUILD_PYTORCH not defined" << std::endl; +#endif + } + auto rcut_ = module.run_method("get_rcut").toDouble(); rcut = static_cast(rcut_); ntypes = module.run_method("get_ntypes").toInt(); @@ -119,7 +139,31 @@ void DeepSpinPT::init(const std::string& model, aparam_nall = module.run_method("is_aparam_nall").toBool(); inited = true; } -DeepSpinPT::~DeepSpinPT() {} +DeepSpinPT::~DeepSpinPT() { +#ifdef BUILD_PYTORCH + if (profiler_enabled) { + try { + // Save profiler results to file with MPI rank if available + int rank = get_mpi_rank(); + std::string output_file; + if (rank >= 0) { + // MPI is available and initialized, include rank in filename + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace_rank" + std::to_string(rank) + ".json"); + } else { + // MPI not available or not initialized, use original filename + output_file = join_path(profiler_output_dir, "pytorch_profiler_trace.json"); + } + profiler_result = torch::profiler::disableProfiler(); + if (profiler_result) { + profiler_result->save(output_file); + std::cout << "PyTorch profiler results saved to: " << output_file << std::endl; + } + } catch (const std::exception& e) { + std::cerr << "Warning: Failed to save profiler results: " << e.what() << std::endl; + } + } +#endif +} template void DeepSpinPT::compute(ENERGYVTYPE& ener, @@ -410,6 +454,14 @@ void DeepSpinPT::compute(ENERGYVTYPE& ener, inputs.push_back(aparam_tensor); bool do_atom_virial_tensor = atomic; inputs.push_back(do_atom_virial_tensor); + + // Start profiling if enabled +#ifdef BUILD_PYTORCH + if (profiler_enabled && profiler) { + profiler->step(); + } +#endif + c10::Dict outputs = module.forward(inputs).toGenericDict(); c10::IValue energy_ = outputs.at("energy"); diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index eace577f89..1c2ca57956 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -7,6 +7,13 @@ #include #include #include +#include +#include +#include + +#ifdef USE_MPI +#include +#endif #include "AtomMap.h" #include "device.h" @@ -20,6 +27,7 @@ #define PSAPI_VERSION 2 #include #include +#include // for _mkdir #define O_RDONLY _O_RDONLY #else // not windows @@ -381,6 +389,83 @@ void deepmd::get_env_nthreads(int& num_intra_nthreads, } } +void deepmd::get_env_pytorch_profiler(bool& enable_profiler, std::string& output_dir) { + enable_profiler = false; + output_dir = "./profiler_output"; // default directory + + const char* env_enable = std::getenv("DP_ENABLE_PYTORCH_PROFILER"); + if (env_enable && + std::string(env_enable) != std::string("") && + (std::string(env_enable) == "1" || std::string(env_enable) == "true")) { + enable_profiler = true; + } + + const char* env_output_dir = std::getenv("DP_PYTORCH_PROFILER_OUTPUT_DIR"); + if (env_output_dir && std::string(env_output_dir) != std::string("")) { + output_dir = std::string(env_output_dir); + } +} + +int deepmd::get_mpi_rank() { +#ifdef USE_MPI + int rank = -1; // Use -1 to indicate MPI not available/initialized + int initialized = 0; + if (MPI_Initialized(&initialized) == MPI_SUCCESS && initialized) { + if (MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) { + rank = -1; // fallback to -1 if MPI_Comm_rank fails + } + } + return rank; +#else + // MPI not available at compile time + return -1; +#endif +} + +bool deepmd::create_directories(const std::string& path) { + if (path.empty()) { + return false; + } + + // Check if directory already exists + struct stat st; + if (stat(path.c_str(), &st) == 0) { + return S_ISDIR(st.st_mode); + } + + // Find the parent directory + size_t pos = path.find_last_of("/\\"); + if (pos != std::string::npos && pos > 0) { + std::string parent = path.substr(0, pos); + if (!create_directories(parent)) { + return false; + } + } + + // Create this directory +#if defined(_WIN32) + return _mkdir(path.c_str()) == 0 || errno == EEXIST; +#else + return mkdir(path.c_str(), 0755) == 0 || errno == EEXIST; +#endif +} + +std::string deepmd::join_path(const std::string& path1, const std::string& path2) { + if (path1.empty()) return path2; + if (path2.empty()) return path1; + + char sep = '/'; +#if defined(_WIN32) + sep = '\\'; +#endif + + if (path1.back() == '/' || path1.back() == '\\') { + return path1 + path2; + } else { + return path1 + sep + path2; + } +} + static inline void _load_library_path(std::string dso_path) { #if defined(_WIN32) void* dso_handle = LoadLibrary(dso_path.c_str()); diff --git a/source/api_cc/tests/test_pytorch_profiler.cc b/source/api_cc/tests/test_pytorch_profiler.cc new file mode 100644 index 0000000000..70e0690de3 --- /dev/null +++ b/source/api_cc/tests/test_pytorch_profiler.cc @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#include + +#include +#include + +#include "common.h" + +class TestPyTorchProfiler : public ::testing::Test { + protected: + void SetUp() override { + // Clean any existing environment variables + unsetenv("DP_ENABLE_PYTORCH_PROFILER"); + unsetenv("DP_PYTORCH_PROFILER_OUTPUT_DIR"); + } + + void TearDown() override { + // Clean up environment variables + unsetenv("DP_ENABLE_PYTORCH_PROFILER"); + unsetenv("DP_PYTORCH_PROFILER_OUTPUT_DIR"); + } +}; + +TEST_F(TestPyTorchProfiler, test_profiler_disabled_by_default) { + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_FALSE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_profiler_enabled_with_env) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "1", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_TRUE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_profiler_enabled_with_true) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "true", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_TRUE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_custom_output_dir) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "1", 1); + setenv("DP_PYTORCH_PROFILER_OUTPUT_DIR", "/custom/path", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_TRUE(enable_profiler); + EXPECT_EQ(output_dir, "/custom/path"); +} + +TEST_F(TestPyTorchProfiler, test_profiler_disabled_with_zero) { + setenv("DP_ENABLE_PYTORCH_PROFILER", "0", 1); + + bool enable_profiler; + std::string output_dir; + deepmd::get_env_pytorch_profiler(enable_profiler, output_dir); + + EXPECT_FALSE(enable_profiler); + EXPECT_EQ(output_dir, "./profiler_output"); +} + +TEST_F(TestPyTorchProfiler, test_mpi_rank_detection) { + // Test that MPI rank detection returns valid rank (-1 when MPI not initialized, >= 0 when initialized) + int rank = deepmd::get_mpi_rank(); + EXPECT_GE(rank, -1); // Rank should be -1 (not available) or >= 0 (valid rank) +} \ No newline at end of file