diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f4c336f..5dc32d8 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -11,10 +11,30 @@ on: jobs: CI: + defaults: + run: + shell: bash strategy: matrix: distro: ['ubuntu:latest'] backend: ["SERIAL", "OPENMP"] + output: ['HDF5'] + include: + - distro: 'ubuntu:latest' + cxx: 'g++' + backend: 'SERIAL' + cmake_build_type: 'Debug' + output: 'SILO' + - distro: 'ubuntu:latest' + cxx: 'g++' + backend: 'SERIAL' + cmake_build_type: 'Debug' + output: 'NONE' + - distro: 'ubuntu:latest' + cxx: 'g++' + backend: 'SERIAL' + cmake_build_type: 'Debug' + output: 'BOTH' runs-on: ubuntu-20.04 container: image: ghcr.io/ecp-copa/ci-containers/${{ matrix.distro }} @@ -25,7 +45,7 @@ jobs: uses: actions/checkout@v2.2.0 with: repository: kokkos/kokkos - ref: 3.6.01 + ref: 3.7.02 path: kokkos - name: Build kokkos working-directory: kokkos @@ -37,12 +57,26 @@ jobs: uses: actions/checkout@v2.2.0 with: repository: ECP-copa/Cabana - ref: master + ref: 0.6.1 path: Cabana - name: Build Cabana working-directory: Cabana run: | - cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos" -DCabana_REQUIRE_${{ matrix.backend }}=ON + if [[ ${{ matrix.output }} == 'HDF5' ]]; then + cabana_cmake_opts+=( -DCabana_REQUIRE_HDF5=ON -DCMAKE_DISABLE_FIND_PACKAGE_SILO=ON ) + elif [[ ${{ matrix.output }} == 'SILO' ]]; then + cabana_cmake_opts+=( -DCabana_REQUIRE_SILO=ON -DCMAKE_DISABLE_FIND_PACKAGE_HDF5=ON ) + elif [[ ${{ matrix.output }} == 'BOTH' ]]; then + cabana_cmake_opts+=( -DCabana_REQUIRE_SILO=ON -DCabana_REQUIRE_HDF5=ON ) + else + cabana_cmake_opts+=( -DCMAKE_DISABLE_FIND_PACKAGE_SILO=ON -DCMAKE_DISABLE_FIND_PACKAGE_HDF5=ON ) + fi + cmake -B build \ + -DCMAKE_INSTALL_PREFIX=$HOME/Cabana \ + -DCMAKE_PREFIX_PATH="$HOME/kokkos" \ + -DCMAKE_CXX_FLAGS="-Wall -Wextra -pedantic -Werror" \ + -DCabana_REQUIRE_${{ matrix.backend }}=ON \ + ${cabana_cmake_opts[@]} cmake --build build --parallel 2 cmake --install build - name: Checkout code diff --git a/CMakeLists.txt b/CMakeLists.txt index 9333037..810857c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,13 +8,10 @@ include(GNUInstallDirs) # find dependencies set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) -find_package(Cabana REQUIRED COMPONENTS Cabana::Cajita Cabana::cabanacore) +find_package(Cabana 0.6.1 REQUIRED COMPONENTS Cabana::Grid Cabana::Core) if( NOT Cabana_ENABLE_MPI ) message( FATAL_ERROR "Cabana must be compiled with MPI" ) endif() -if( NOT Cabana_ENABLE_CAJITA ) - message( FATAL_ERROR "Cabana must be compiled with Cajita" ) -endif() # find Clang Format find_package( CLANG_FORMAT 14 ) diff --git a/examples/dam_break.cpp b/examples/dam_break.cpp index c60b30a..b38e240 100644 --- a/examples/dam_break.cpp +++ b/examples/dam_break.cpp @@ -3,7 +3,7 @@ #include -#include +#include #include @@ -87,7 +87,7 @@ void damBreak( const double cell_size, const int ppc, const int halo_size, int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); std::array ranks_per_dim = { 1, comm_size, 1 }; - Cajita::ManualBlockPartitioner<3> partitioner( ranks_per_dim ); + Cabana::Grid::ManualBlockPartitioner<3> partitioner( ranks_per_dim ); // Material properties. double bulk_modulus = 1.0e5; @@ -118,7 +118,26 @@ void damBreak( const double cell_size, const int ppc, const int halo_size, //---------------------------------------------------------------------------// int main( int argc, char* argv[] ) { - MPI_Init( &argc, &argv ); + + // enable the use of subfiling by setting enviroment H5FD_SUBFILING + const char* env_val = std::getenv( "H5FD_SUBFILING" ); + if ( env_val != NULL ) + { + int mpi_thread_required = MPI_THREAD_MULTIPLE; + int mpi_thread_provided = 0; + + // HDF5 Subfiling VFD requires MPI_Init_thread with MPI_THREAD_MULTIPLE + + MPI_Init_thread( &argc, &argv, mpi_thread_required, + &mpi_thread_provided ); + if ( mpi_thread_provided < mpi_thread_required ) + { + printf( "MPI_THREAD_MULTIPLE not supported\n" ); + MPI_Abort( MPI_COMM_WORLD, -1 ); + } + } + else + MPI_Init( &argc, &argv ); Kokkos::initialize( argc, argv ); diff --git a/examples/free_fall.cpp b/examples/free_fall.cpp index 24b26c9..35d2e38 100644 --- a/examples/free_fall.cpp +++ b/examples/free_fall.cpp @@ -3,7 +3,7 @@ #include -#include +#include #include @@ -84,7 +84,7 @@ void freeFall( const double cell_size, const int ppc, const int halo_size, int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); std::array ranks_per_dim = { 1, comm_size, 1 }; - Cajita::ManualBlockPartitioner<3> partitioner( ranks_per_dim ); + Cabana::Grid::ManualBlockPartitioner<3> partitioner( ranks_per_dim ); // Material properties. double bulk_modulus = 5.0e5; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5c723f2..9a2f2c2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,8 +20,8 @@ install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) add_library(exampm ${SOURCES}) target_link_libraries(exampm - Cabana::cabanacore - Cabana::Cajita + Cabana::Core + Cabana::Grid ) target_include_directories(exampm diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index 0ec2941..16b39a2 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -12,7 +12,7 @@ #ifndef EXAMPM_MESH_HPP #define EXAMPM_MESH_HPP -#include +#include #include @@ -39,7 +39,7 @@ class Mesh Mesh( const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const Cabana::Grid::BlockPartitioner<3>& partitioner, const int halo_cell_width, const int minimum_halo_cell_width, MPI_Comm comm ) { @@ -91,20 +91,21 @@ class Mesh } // Create the global mesh. - auto global_mesh = Cajita::createUniformGlobalMesh( + auto global_mesh = Cabana::Grid::createUniformGlobalMesh( global_low_corner, global_high_corner, num_cell ); // Build the global grid. - auto global_grid = Cajita::createGlobalGrid( comm, global_mesh, - periodic, partitioner ); + auto global_grid = Cabana::Grid::createGlobalGrid( + comm, global_mesh, periodic, partitioner ); // Build the local grid. int halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); - _local_grid = Cajita::createLocalGrid( global_grid, halo_width ); + _local_grid = Cabana::Grid::createLocalGrid( global_grid, halo_width ); } // Get the local grid. - const std::shared_ptr>>& + const std::shared_ptr< + Cabana::Grid::LocalGrid>>& localGrid() const { return _local_grid; @@ -129,7 +130,8 @@ class Mesh } public: - std::shared_ptr>> _local_grid; + std::shared_ptr>> + _local_grid; Kokkos::Array _min_domain_global_node_index; Kokkos::Array _max_domain_global_node_index; diff --git a/src/ExaMPM_ParticleInit.hpp b/src/ExaMPM_ParticleInit.hpp index 8b29b61..80a7343 100644 --- a/src/ExaMPM_ParticleInit.hpp +++ b/src/ExaMPM_ParticleInit.hpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include @@ -114,11 +114,11 @@ void initializeParticles( const ExecSpace& exec_space, using particle_type = typename ParticleList::tuple_type; // Create a local mesh. - auto local_mesh = Cajita::createLocalMesh( local_grid ); + auto local_mesh = Cabana::Grid::createLocalMesh( local_grid ); // Get the local set of owned cell indices. - auto owned_cells = - local_grid.indexSpace( Cajita::Own(), Cajita::Cell(), Cajita::Local() ); + auto owned_cells = local_grid.indexSpace( + Cabana::Grid::Own(), Cabana::Grid::Cell(), Cabana::Grid::Local() ); // Allocate enough space for the case the particles consume the entire // local grid. @@ -136,7 +136,7 @@ void initializeParticles( const ExecSpace& exec_space, int local_num_create = 0; Kokkos::parallel_reduce( "init_particles_uniform", - Cajita::createExecutionPolicy( owned_cells, exec_space ), + Cabana::Grid::createExecutionPolicy( owned_cells, exec_space ), KOKKOS_LAMBDA( const int i, const int j, const int k, int& create_count ) { // Compute the owned local cell id. @@ -150,12 +150,14 @@ void initializeParticles( const ExecSpace& exec_space, // Get the coordinates of the low cell node. int low_node[3] = { i, j, k }; double low_coords[3]; - local_mesh.coordinates( Cajita::Node(), low_node, low_coords ); + local_mesh.coordinates( Cabana::Grid::Node(), low_node, + low_coords ); // Get the coordinates of the high cell node. int high_node[3] = { i + 1, j + 1, k + 1 }; double high_coords[3]; - local_mesh.coordinates( Cajita::Node(), high_node, high_coords ); + local_mesh.coordinates( Cabana::Grid::Node(), high_node, + high_coords ); // Compute the particle spacing in each dimension. double spacing[3] = { ( high_coords[Dim::I] - low_coords[Dim::I] ) / diff --git a/src/ExaMPM_ProblemManager.hpp b/src/ExaMPM_ProblemManager.hpp index adee139..3659b89 100644 --- a/src/ExaMPM_ProblemManager.hpp +++ b/src/ExaMPM_ProblemManager.hpp @@ -17,7 +17,7 @@ #include -#include +#include #include @@ -91,13 +91,15 @@ class ProblemManager using particle_list = Cabana::AoSoA; using particle_type = typename particle_list::tuple_type; - using node_array = Cajita::Array, MemorySpace>; + using node_array = + Cabana::Grid::Array, MemorySpace>; - using cell_array = Cajita::Array, MemorySpace>; + using cell_array = + Cabana::Grid::Array, MemorySpace>; - using halo = Cajita::Halo; + using halo = Cabana::Grid::Halo; using mesh_type = Mesh; @@ -117,37 +119,38 @@ class ProblemManager initializeParticles( exec_space, *( _mesh->localGrid() ), particles_per_cell, create_functor, _particles ); - auto node_vector_layout = - Cajita::createArrayLayout( _mesh->localGrid(), 3, Cajita::Node() ); - auto node_scalar_layout = - Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Node() ); - auto cell_scalar_layout = - Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Cell() ); + auto node_vector_layout = Cabana::Grid::createArrayLayout( + _mesh->localGrid(), 3, Cabana::Grid::Node() ); + auto node_scalar_layout = Cabana::Grid::createArrayLayout( + _mesh->localGrid(), 1, Cabana::Grid::Node() ); + auto cell_scalar_layout = Cabana::Grid::createArrayLayout( + _mesh->localGrid(), 1, Cabana::Grid::Cell() ); - _momentum = Cajita::createArray( + _momentum = Cabana::Grid::createArray( "momentum", node_vector_layout ); - _mass = Cajita::createArray( "mass", - node_scalar_layout ); - _force = Cajita::createArray( "force", - node_vector_layout ); - _velocity = Cajita::createArray( + _mass = Cabana::Grid::createArray( + "mass", node_scalar_layout ); + _force = Cabana::Grid::createArray( + "force", node_vector_layout ); + _velocity = Cabana::Grid::createArray( "velocity", node_vector_layout ); - _position_correction = Cajita::createArray( + _position_correction = Cabana::Grid::createArray( "position_correction", node_vector_layout ); - _density = Cajita::createArray( + _density = Cabana::Grid::createArray( "density", cell_scalar_layout ); - _mark = Cajita::createArray( "mark", - cell_scalar_layout ); - - _node_scatter_halo = Cajita::createHalo( - Cajita::NodeHaloPattern<3>(), -1, *_momentum, *_mass, *_force ); - _node_gather_halo = - Cajita::createHalo( Cajita::NodeHaloPattern<3>(), -1, *_velocity ); - _node_correction_halo = Cajita::createHalo( - Cajita::NodeHaloPattern<3>(), -1, *_position_correction ); - _cell_halo = Cajita::createHalo( Cajita::NodeHaloPattern<3>(), -1, - *_density, *_mark ); + _mark = Cabana::Grid::createArray( + "mark", cell_scalar_layout ); + + _node_scatter_halo = + Cabana::Grid::createHalo( Cabana::Grid::NodeHaloPattern<3>(), -1, + *_momentum, *_mass, *_force ); + _node_gather_halo = Cabana::Grid::createHalo( + Cabana::Grid::NodeHaloPattern<3>(), -1, *_velocity ); + _node_correction_halo = Cabana::Grid::createHalo( + Cabana::Grid::NodeHaloPattern<3>(), -1, *_position_correction ); + _cell_halo = Cabana::Grid::createHalo( + Cabana::Grid::NodeHaloPattern<3>(), -1, *_density, *_mark ); } std::size_t numParticle() const { return _particles.size(); } @@ -237,21 +240,22 @@ class ProblemManager void scatter( Location::Node ) const { _node_scatter_halo->scatter( execution_space(), - Cajita::ScatterReduce::Sum(), *_momentum, - *_mass, *_force ); + Cabana::Grid::ScatterReduce::Sum(), + *_momentum, *_mass, *_force ); } void scatter( Location::Node, Field::PositionCorrection ) const { _node_correction_halo->scatter( execution_space(), - Cajita::ScatterReduce::Sum(), + Cabana::Grid::ScatterReduce::Sum(), *_position_correction ); } void scatter( Location::Cell ) const { - _cell_halo->scatter( execution_space(), Cajita::ScatterReduce::Sum(), - *_density, *_mark ); + _cell_halo->scatter( execution_space(), + Cabana::Grid::ScatterReduce::Sum(), *_density, + *_mark ); } void gather( Location::Node ) const @@ -268,8 +272,8 @@ class ProblemManager void communicateParticles( const int minimum_halo_width ) { auto positions = get( Location::Particle(), Field::Position() ); - Cajita::particleGridMigrate( *( _mesh->localGrid() ), positions, - _particles, minimum_halo_width ); + Cabana::Grid::particleGridMigrate( *( _mesh->localGrid() ), positions, + _particles, minimum_halo_width ); } private: diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 5e0108f..f918611 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -25,9 +25,15 @@ #include #include +#include +#include +#include +#include namespace ExaMPM { +int nfork; + //---------------------------------------------------------------------------// class SolverBase { @@ -45,7 +51,7 @@ class Solver : public SolverBase Solver( MPI_Comm comm, const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const Cabana::Grid::BlockPartitioner<3>& partitioner, const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, const double bulk_modulus, const double density, const double gamma, const double kappa, @@ -80,7 +86,10 @@ class Solver : public SolverBase while ( _time < t_final ) { if ( 0 == _rank && 0 == _step % write_freq ) - printf( "Time %f / %f\n", _time, t_final ); + printf( "Time %12.5e / %12.5e [iostats, mean min max (s): " + "%12.5e %12.5e %12.5e] \n", + _time, t_final, io_stats.mean, io_stats.min, + io_stats.max ); // Fixed timestep is guaranteed only when sufficently low dt // does not violate the CFL condition (otherwise user-set dt is @@ -99,6 +108,33 @@ class Solver : public SolverBase if ( 0 == ( _step ) % write_freq ) outputParticles(); } + + // Wait for all the h5fuse processes to complete + if ( shmrank == 0 ) + { + int status; + for ( int i = 0; i < nfork; i++ ) + { + waitpid( -1, &status, 0 ); + if ( WIFEXITED( status ) ) + { + int ret; + if ( ( ret = WEXITSTATUS( status ) ) != 0 ) + { + printf( "h5fuse process exited with error code %d\n", + ret ); + fflush( stdout ); + MPI_Abort( MPI_COMM_WORLD, -1 ); + } + } + else + { + printf( "h5fuse process terminated abnormally\n" ); + fflush( stdout ); + MPI_Abort( MPI_COMM_WORLD, -1 ); + } + } + } } void outputParticles() @@ -106,15 +142,156 @@ class Solver : public SolverBase // Prefer HDF5 output over Silo. Only output if one is enabled. #ifdef Cabana_ENABLE_HDF5 Cabana::Experimental::HDF5ParticleOutput::HDF5Config h5_config; + const char* env_val = std::getenv( "H5FD_SUBFILING" ); + if ( env_val != NULL ) + h5_config.subfiling = true; + + // Sets the HDF5 alignment equal subfiling's stripe size + env_val = std::getenv( "H5FD_SUBFILING_STRIPE_SIZE" ); + if ( env_val != NULL ) + { + h5_config.align = true; + h5_config.threshold = 0; + h5_config.alignment = std::atoi( env_val ); + } + + env_val = std::getenv( "H5FUSE" ); + if ( env_val != NULL ) { + h5_config.h5fuse_info = true; + env_val = std::getenv( "LOC" ); + if ( env_val != NULL ) + h5_config.h5fuse_local = true; + } + + double t1, t2; + t1 = MPI_Wtime(); Cabana::Experimental::HDF5ParticleOutput::writeTimeStep( h5_config, "particles", _mesh->localGrid()->globalGrid().comm(), _step, _time, _pm->numParticle(), _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); + t2 = MPI_Wtime(); + timer_stats( t2 - t1, MPI_COMM_WORLD, 0, &io_stats ); + + // Setting enviroment H5FUSE enables fusing the subfiles into + // an HDF5 file. Assumes h5fuse is in the same directory + // as the executable. + env_val = std::getenv( "H5FUSE" ); + if ( env_val != NULL ) + { + if ( h5_config.subfiling ) + { + + // if (h5_config.h5fuse_info) + // std::cout << "LEN " << h5_config.subfilenames_len << std::endl; + + //if(!h5_config.subfilenames.empty()) { + // int l_mpi_rank; + // MPI_Comm_rank(MPI_COMM_WORLD, &l_mpi_rank); + // std::cout << "ExaMPM " << h5_config.subfilenames << std::endl; + //} + + if (!h5_config.h5fuse_local) { + + if(!h5_config.subfilenames.empty()) { + { + pid_t pid = 0; + int status; + + pid = fork(); + nfork++; + if ( pid == 0 ) + { + std::stringstream filename_hdf5; + filename_hdf5 << "particles" + << "_" << _step << ".h5"; + + // Directory containing the subfiling configuration file + std::stringstream config_dir; + if ( const char* env_value = std::getenv( + H5FD_SUBFILING_CONFIG_FILE_PREFIX ) ) + config_dir << env_value; + else + config_dir << "."; + // Find the name of the subfiling configuration file + struct stat file_info; + stat( filename_hdf5.str().c_str(), &file_info ); + + char config_filename[PATH_MAX]; + snprintf( config_filename, PATH_MAX, + "%s/" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, + config_dir.str().c_str(), + filename_hdf5.str().c_str(), + (uint64_t)file_info.st_ino ); + + // Call the h5fuse utility + // Removes the subfiles in the process + char* args[] = { strdup( "./h5fuse" ), + strdup( "-l" ), strdup( h5_config.subfilenames.c_str() ), + //strdup( "-v" ), + strdup( "-f" ), config_filename, NULL }; + execvp( args[0], args ); + } + } + } + } else { + + MPI_Comm shmcomm; + MPI_Comm_split_type( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, + MPI_INFO_NULL, &shmcomm ); + + MPI_Comm_rank( shmcomm, &shmrank ); + + // One rank from each node executes h5fuse + + if ( shmrank == 0 ) + { + pid_t pid = 0; + int status; + + pid = fork(); + nfork++; + if ( pid == 0 ) + { + std::stringstream filename_hdf5; + filename_hdf5 << "particles" + << "_" << _step << ".h5"; + + // Directory containing the subfiling configuration file + std::stringstream config_dir; + if ( const char* env_value = std::getenv( + H5FD_SUBFILING_CONFIG_FILE_PREFIX ) ) + config_dir << env_value; + else + config_dir << "."; + // Find the name of the subfiling configuration file + struct stat file_info; + stat( filename_hdf5.str().c_str(), &file_info ); + + char config_filename[PATH_MAX]; + snprintf( config_filename, PATH_MAX, + "%s/" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, + config_dir.str().c_str(), + filename_hdf5.str().c_str(), + (uint64_t)file_info.st_ino ); + + // Call the h5fuse utility + // Removes the subfiles in the process + char* args[] = { strdup( "./h5fuse" ), + strdup( "-r" ), + strdup( "-f" ), config_filename, NULL }; + + execvp( args[0], args ); + } + } + MPI_Comm_free( &shmcomm ); + } + } + } #else #ifdef Cabana_ENABLE_SILO - Cajita::Experimental::SiloParticleOutput::writeTimeStep( + Cabana::Grid::Experimental::SiloParticleOutput::writeTimeStep( "particles", _mesh->localGrid()->globalGrid(), _step, _time, _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), @@ -123,7 +300,7 @@ class Solver : public SolverBase if ( _rank == 0 ) std::cout << "No particle output enabled in Cabana. Add " "Cabana_REQUIRE_HDF5=ON or Cabana_REQUIRE_SILO=ON to " - "the Cabana build if needed." + "the Cabana build if needed."; #endif #endif } @@ -138,6 +315,61 @@ class Solver : public SolverBase std::shared_ptr> _mesh; std::shared_ptr> _pm; int _rank; + int shmrank; + + struct timer_statsinfo + { + double min; + double max; + double mean; + double std; + }; + + timer_statsinfo io_stats; + + // Collect statistics of timers on all ranks + // timer - elapsed time for rank + // comm - communicator for collecting stats + // destrank - the rank to which to collect stats + // stats - pointer to timer stats + // + + void timer_stats( double timer, MPI_Comm comm, int destrank, + timer_statsinfo* stats ) + { + int rank, nprocs, i; + double* rtimers = NULL; /* All timers from ranks */ + + MPI_Comm_rank( comm, &rank ); + MPI_Comm_size( comm, &nprocs ); + if ( rank == destrank ) + { + rtimers = (double*)malloc( nprocs * sizeof( double ) ); + stats->mean = 0.; + stats->min = timer; + stats->max = timer; + stats->std = 0.f; + } + MPI_Gather( &timer, 1, MPI_DOUBLE, rtimers, 1, MPI_DOUBLE, destrank, + comm ); + if ( rank == destrank ) + { + for ( i = 0; i < nprocs; i++ ) + { + if ( rtimers[i] > stats->max ) + stats->max = rtimers[i]; + if ( rtimers[i] < stats->min ) + stats->min = rtimers[i]; + stats->mean += rtimers[i]; + } + stats->mean /= nprocs; + for ( i = 0; i < nprocs; i++ ) + stats->std += + ( rtimers[i] - stats->mean ) * ( rtimers[i] - stats->mean ); + stats->std = sqrt( stats->std / nprocs ); + free( rtimers ); + } + } }; //---------------------------------------------------------------------------// @@ -148,7 +380,7 @@ createSolver( const std::string& device, MPI_Comm comm, const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const Cabana::Grid::BlockPartitioner<3>& partitioner, const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, const double bulk_modulus, const double density, const double gamma, const double kappa, diff --git a/src/ExaMPM_TimeIntegrator.hpp b/src/ExaMPM_TimeIntegrator.hpp index d67a545..d0a90b2 100644 --- a/src/ExaMPM_TimeIntegrator.hpp +++ b/src/ExaMPM_TimeIntegrator.hpp @@ -16,7 +16,7 @@ #include #include -#include +#include #include @@ -59,8 +59,8 @@ void p2g( const ExecutionSpace& exec_space, const ProblemManagerType& pm ) double gamma = pm.gamma(); // Build the local mesh. - auto local_mesh = - Cajita::createLocalMesh( *( pm.mesh()->localGrid() ) ); + auto local_mesh = Cabana::Grid::createLocalMesh( + *( pm.mesh()->localGrid() ) ); // Loop over particles. Kokkos::parallel_for( @@ -71,16 +71,16 @@ void p2g( const ExecutionSpace& exec_space, const ProblemManagerType& pm ) double x[3] = { x_p( p, 0 ), x_p( p, 1 ), x_p( p, 2 ) }; // Setup interpolation to the nodes. - Cajita::SplineData sd; - Cajita::evaluateSpline( local_mesh, x, sd ); + Cabana::Grid::SplineData sd; + Cabana::Grid::evaluateSpline( local_mesh, x, sd ); // Compute the pressure on the particle with an equation of // state. double pressure = -bulk_mod * ( pow( j_p( p ), -gamma ) - 1.0 ); // Project the pressure gradient to the grid. - Cajita::P2G::gradient( -v_p( p ) * j_p( p ) * pressure, sd, - f_i_sv ); + Cabana::Grid::P2G::gradient( -v_p( p ) * j_p( p ) * pressure, sd, + f_i_sv ); // Extract the particle velocity double vel_p[3] = { u_p( p, 0 ), u_p( p, 1 ), u_p( p, 2 ) }; @@ -95,7 +95,7 @@ void p2g( const ExecutionSpace& exec_space, const ProblemManagerType& pm ) APIC::p2g( m_p( p ), vel_p, aff_p, sd, mu_i_sv ); // Project mass to the grid. - Cajita::P2G::value( m_p( p ), sd, m_i_sv ); + Cabana::Grid::P2G::value( m_p( p ), sd, m_i_sv ); } ); // Complete local scatter. @@ -127,12 +127,12 @@ void fieldSolve( const ExecutionSpace& exec_space, const ProblemManagerType& pm, double mass_epsilon = 1.0e-12; // Compute the velocity. - auto l2g = Cajita::IndexConversion::createL2G( *( pm.mesh()->localGrid() ), - Cajita::Node() ); + auto l2g = Cabana::Grid::IndexConversion::createL2G( + *( pm.mesh()->localGrid() ), Cabana::Grid::Node() ); auto local_nodes = pm.mesh()->localGrid()->indexSpace( - Cajita::Ghost(), Cajita::Node(), Cajita::Local() ); + Cabana::Grid::Ghost(), Cabana::Grid::Node(), Cabana::Grid::Local() ); Kokkos::parallel_for( - Cajita::createExecutionPolicy( local_nodes, exec_space ), + Cabana::Grid::createExecutionPolicy( local_nodes, exec_space ), KOKKOS_LAMBDA( const int li, const int lj, const int lk ) { int gi, gj, gk; l2g( li, lj, lk, gi, gj, gk ); @@ -188,8 +188,8 @@ void g2p( const ExecutionSpace& exec_space, const ProblemManagerType& pm, auto k_c_sv = Kokkos::Experimental::create_scatter_view( k_c ); // Build the local mesh. - auto local_mesh = - Cajita::createLocalMesh( *( pm.mesh()->localGrid() ) ); + auto local_mesh = Cabana::Grid::createLocalMesh( + *( pm.mesh()->localGrid() ) ); auto cell_size = pm.mesh()->localGrid()->globalGrid().globalMesh().cellSize( 0 ); auto cell_volume = cell_size * cell_size * cell_size; @@ -206,8 +206,8 @@ void g2p( const ExecutionSpace& exec_space, const ProblemManagerType& pm, double x[3] = { x_p( p, 0 ), x_p( p, 1 ), x_p( p, 2 ) }; // Setup interpolation from the nodes. - Cajita::SplineData sd_i; - Cajita::evaluateSpline( local_mesh, x, sd_i ); + Cabana::Grid::SplineData sd_i; + Cabana::Grid::evaluateSpline( local_mesh, x, sd_i ); // Update particle velocity. double vel_p[3]; @@ -222,7 +222,7 @@ void g2p( const ExecutionSpace& exec_space, const ProblemManagerType& pm, // Compute the velocity divergence (this is the trace of the // velocity gradient). double div_u; - Cajita::G2P::divergence( u_i, sd_i, div_u ); + Cabana::Grid::G2P::divergence( u_i, sd_i, div_u ); // Update the deformation gradient determinant. j_p( p ) *= exp( delta_t * div_u ); @@ -235,14 +235,14 @@ void g2p( const ExecutionSpace& exec_space, const ProblemManagerType& pm, } // Project density to cell. - Cajita::SplineData sd_c1; - Cajita::evaluateSpline( local_mesh, x, sd_c1 ); - Cajita::P2G::value( m_p( p ) / cell_volume, sd_c1, r_c_sv ); + Cabana::Grid::SplineData sd_c1; + Cabana::Grid::evaluateSpline( local_mesh, x, sd_c1 ); + Cabana::Grid::P2G::value( m_p( p ) / cell_volume, sd_c1, r_c_sv ); // Mark cells. Indicates whether or not cells have particles. - Cajita::SplineData sd_c0; - Cajita::evaluateSpline( local_mesh, x, sd_c0 ); - Cajita::P2G::value( 1.0, sd_c0, k_c_sv ); + Cabana::Grid::SplineData sd_c0; + Cabana::Grid::evaluateSpline( local_mesh, x, sd_c0 ); + Cabana::Grid::P2G::value( 1.0, sd_c0, k_c_sv ); } ); // Complete local scatter. @@ -280,24 +280,24 @@ void correctParticlePositions( const ExecutionSpace& exec_space, double density = pm.density(); // Build the local mesh. - auto local_mesh = - Cajita::createLocalMesh( *( pm.mesh()->localGrid() ) ); + auto local_mesh = Cabana::Grid::createLocalMesh( + *( pm.mesh()->localGrid() ) ); // Compute nodal correction. auto local_cells = pm.mesh()->localGrid()->indexSpace( - Cajita::Own(), Cajita::Cell(), Cajita::Local() ); + Cabana::Grid::Own(), Cabana::Grid::Cell(), Cabana::Grid::Local() ); Kokkos::parallel_for( "compute_position_correction", - Cajita::createExecutionPolicy( local_cells, exec_space ), + Cabana::Grid::createExecutionPolicy( local_cells, exec_space ), KOKKOS_LAMBDA( const int i, const int j, const int k ) { // Get the cell center. int idx[3] = { i, j, k }; double x[3]; - local_mesh.coordinates( Cajita::Cell(), idx, x ); + local_mesh.coordinates( Cabana::Grid::Cell(), idx, x ); // Setup interpolation from cell center to nodes. - Cajita::SplineData sd_i; - Cajita::evaluateSpline( local_mesh, x, sd_i ); + Cabana::Grid::SplineData sd_i; + Cabana::Grid::evaluateSpline( local_mesh, x, sd_i ); // Clamp the density outside the fluid. double rho = ( k_c( i, j, k, 0 ) > 0.0 ) @@ -307,7 +307,7 @@ void correctParticlePositions( const ExecutionSpace& exec_space, // Compute correction. double correction = -delta_t * delta_t * kappa * ( 1 - rho / density ) / density; - Cajita::P2G::gradient( correction, sd_i, x_i_sv ); + Cabana::Grid::P2G::gradient( correction, sd_i, x_i_sv ); } ); // Complete local scatter. @@ -321,12 +321,12 @@ void correctParticlePositions( const ExecutionSpace& exec_space, // Apply boundary condition to position correction. // Compute the velocity. - auto l2g = Cajita::IndexConversion::createL2G( *( pm.mesh()->localGrid() ), - Cajita::Node() ); + auto l2g = Cabana::Grid::IndexConversion::createL2G( + *( pm.mesh()->localGrid() ), Cabana::Grid::Node() ); auto local_nodes = pm.mesh()->localGrid()->indexSpace( - Cajita::Ghost(), Cajita::Node(), Cajita::Local() ); + Cabana::Grid::Ghost(), Cabana::Grid::Node(), Cabana::Grid::Local() ); Kokkos::parallel_for( - Cajita::createExecutionPolicy( local_nodes, exec_space ), + Cabana::Grid::createExecutionPolicy( local_nodes, exec_space ), KOKKOS_LAMBDA( const int li, const int lj, const int lk ) { int gi, gj, gk; l2g( li, lj, lk, gi, gj, gk ); @@ -343,12 +343,12 @@ void correctParticlePositions( const ExecutionSpace& exec_space, double x[3] = { x_p( p, 0 ), x_p( p, 1 ), x_p( p, 2 ) }; // Setup interpolation from the nodes. - Cajita::SplineData sd_i; - Cajita::evaluateSpline( local_mesh, x, sd_i ); + Cabana::Grid::SplineData sd_i; + Cabana::Grid::evaluateSpline( local_mesh, x, sd_i ); // Correct the particle position. double delta_x[3]; - Cajita::G2P::value( x_i, sd_i, delta_x ); + Cabana::Grid::G2P::value( x_i, sd_i, delta_x ); for ( int d = 0; d < 3; ++d ) x_p( p, d ) += delta_x[d]; } ); diff --git a/src/ExaMPM_TimeStepControl.hpp b/src/ExaMPM_TimeStepControl.hpp index 067a95f..34a82a6 100644 --- a/src/ExaMPM_TimeStepControl.hpp +++ b/src/ExaMPM_TimeStepControl.hpp @@ -14,8 +14,6 @@ #include -#include - #include #include @@ -44,14 +42,8 @@ template double momentumCFL( MPI_Comm comm, ExecutionSpace, const ProblemManagerType& pm, const double current_dt, const double cfl ) { - -#if KOKKOS_VERSION >= 30700 using Kokkos::abs; using Kokkos::sqrt; -#else - using Kokkos::Experimental::abs; - using Kokkos::Experimental::sqrt; -#endif // Get the particle data we need. auto m_p = pm.get( Location::Particle(), Field::Mass() ); @@ -98,12 +90,7 @@ template double maxVelocity( MPI_Comm comm, ExecutionSpace, const ProblemManagerType& pm, const double current_dt, const double cfl ) { - -#if KOKKOS_VERSION >= 30700 using Kokkos::sqrt; -#else - using Kokkos::Experimental::sqrt; -#endif // Get the particle data we need. auto u_p = pm.get( Location::Particle(), Field::Velocity() ); diff --git a/src/ExaMPM_Types.hpp b/src/ExaMPM_Types.hpp index 59426d4..ac2cdc6 100644 --- a/src/ExaMPM_Types.hpp +++ b/src/ExaMPM_Types.hpp @@ -12,13 +12,13 @@ #ifndef EXAMPM_TYPES_HPP #define EXAMPM_TYPES_HPP -#include +#include namespace ExaMPM { //---------------------------------------------------------------------------// // Logical dimension index. -using Dim = Cajita::Dim; +using Dim = Cabana::Grid::Dim; //---------------------------------------------------------------------------// diff --git a/src/ExaMPM_VelocityInterpolation.hpp b/src/ExaMPM_VelocityInterpolation.hpp index d6050fb..1757db6 100644 --- a/src/ExaMPM_VelocityInterpolation.hpp +++ b/src/ExaMPM_VelocityInterpolation.hpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include @@ -55,11 +55,11 @@ p2g( const typename MomentumView::original_value_type m_p, const typename MomentumView::original_value_type B_p[3][3], const SplineDataType& sd, const MomentumView& node_momentum, typename std::enable_if< - ( Cajita::isNode::value && + ( Cabana::Grid::isNode::value && ( SplineDataType::order == 2 || SplineDataType::order == 3 ) ), void*>::type = 0 ) { - static_assert( Cajita::P2G::is_scatter_view::value, + static_assert( Cabana::Grid::P2G::is_scatter_view::value, "P2G requires a Kokkos::ScatterView" ); auto momentum_access = node_momentum.access(); @@ -106,11 +106,11 @@ p2g( const typename MomentumView::original_value_type m_p, const typename MomentumView::original_value_type B_p[3][3], const SplineDataType& sd, const MomentumView& node_momentum, typename std::enable_if< - ( Cajita::isNode::value && + ( Cabana::Grid::isNode::value && ( SplineDataType::order == 1 ) ), void*>::type = 0 ) { - static_assert( Cajita::P2G::is_scatter_view::value, + static_assert( Cabana::Grid::P2G::is_scatter_view::value, "P2G requires a Kokkos::ScatterView" ); auto momentum_access = node_momentum.access(); @@ -155,7 +155,7 @@ g2p( const VelocityView& node_velocity, const SplineDataType& sd, typename VelocityView::value_type u_p[3], typename VelocityView::value_type B_p[3][3], typename std::enable_if< - Cajita::isNode::value, + Cabana::Grid::isNode::value, void*>::type = 0 ) { using value_type = typename VelocityView::value_type;