diff --git a/src/CabanaPD_BodyTerm.hpp b/src/CabanaPD_BodyTerm.hpp index b575ce44..ef1678c1 100644 --- a/src/CabanaPD_BodyTerm.hpp +++ b/src/CabanaPD_BodyTerm.hpp @@ -16,6 +16,8 @@ #include +#include + namespace CabanaPD { @@ -24,6 +26,8 @@ struct BodyTerm { UserFunctor _user_functor; + Timer _timer; + BodyTerm( UserFunctor user ) : _user_functor( user ) { @@ -34,12 +38,17 @@ struct BodyTerm template void apply( ExecSpace, ParticleType& particles, const double time ) { + _timer.start(); Kokkos::RangePolicy policy( 0, particles.n_local ); auto user = _user_functor; Kokkos::parallel_for( "CabanaPD::BodyTerm::apply", policy, KOKKOS_LAMBDA( const int p ) { user( p, time ); } ); + _timer.stop(); } + + auto time() { return _timer.time(); }; + auto timeInit() { return 0.0; }; }; template diff --git a/src/CabanaPD_Boundary.hpp b/src/CabanaPD_Boundary.hpp index 6a792f2a..23be020d 100644 --- a/src/CabanaPD_Boundary.hpp +++ b/src/CabanaPD_Boundary.hpp @@ -16,6 +16,8 @@ #include +#include + namespace CabanaPD { @@ -51,6 +53,8 @@ struct BoundaryIndexSpace index_view_type _view; index_view_type _count; + Timer _timer; + // Default for empty case. BoundaryIndexSpace() {} @@ -59,6 +63,8 @@ struct BoundaryIndexSpace std::vector planes, const double initial_guess ) { + _timer.start(); + _view = index_view_type( "boundary_indices", particles.n_local * initial_guess ); _count = index_view_type( "count", 1 ); @@ -74,6 +80,8 @@ struct BoundaryIndexSpace { Kokkos::resize( _view, count_host( 0 ) ); } + + _timer.stop(); } template @@ -112,6 +120,8 @@ struct BoundaryIndexSpace index_functor ); } } + + auto time() { return _timer.time(); }; }; template @@ -141,6 +151,8 @@ struct BoundaryCondition BCIndexSpace _index_space; UserFunctor _user_functor; + Timer _timer; + BoundaryCondition( BCIndexSpace bc_index_space, UserFunctor user ) : _index_space( bc_index_space ) , _user_functor( user ) @@ -157,6 +169,7 @@ struct BoundaryCondition template void apply( ExecSpace, ParticleType& ) { + _timer.start(); auto user = _user_functor; auto index_space = _index_space._view; Kokkos::RangePolicy policy( 0, index_space.size() ); @@ -165,12 +178,18 @@ struct BoundaryCondition auto pid = index_space( b ); user( pid ); } ); + _timer.stop(); } + + auto time() { return _timer.time(); }; + auto timeInit() { return _index_space.time(); }; }; template struct BoundaryCondition { + Timer _timer; + template void update( ExecSpace, Particles, RegionBoundary ) { @@ -180,6 +199,9 @@ struct BoundaryCondition void apply( ExecSpace, ParticleType ) { } + + auto time() { return _timer.time(); }; + auto timeInit() { return 0.0; }; }; template @@ -188,6 +210,8 @@ struct BoundaryCondition double _value; BCIndexSpace _index_space; + Timer _timer; + BoundaryCondition( const double value, BCIndexSpace bc_index_space ) : _value( value ) , _index_space( bc_index_space ) @@ -204,6 +228,7 @@ struct BoundaryCondition template void apply( ExecSpace, ParticleType& particles ) { + _timer.start(); auto f = particles.sliceForce(); auto index_space = _index_space._view; Kokkos::RangePolicy policy( 0, index_space.size() ); @@ -214,7 +239,11 @@ struct BoundaryCondition for ( int d = 0; d < 3; d++ ) f( pid, d ) = value; } ); + _timer.stop(); } + + auto time() { return _timer.time(); }; + auto timeInit() { return _index_space.time(); }; }; template @@ -223,6 +252,8 @@ struct BoundaryCondition double _value; BCIndexSpace _index_space; + Timer _timer; + BoundaryCondition( const double value, BCIndexSpace bc_index_space ) : _value( value ) , _index_space( bc_index_space ) @@ -239,6 +270,8 @@ struct BoundaryCondition template void apply( ExecSpace, ParticleType& particles ) { + _timer.start(); + auto f = particles.sliceForce(); auto index_space = _index_space._view; Kokkos::RangePolicy policy( 0, index_space.size() ); @@ -249,7 +282,12 @@ struct BoundaryCondition for ( int d = 0; d < 3; d++ ) f( pid, d ) += value; } ); + + _timer.stop(); } + + auto time() { return _timer.time(); }; + auto timeInit() { return _index_space.time(); }; }; // FIXME: relatively large initial guess for allocation. diff --git a/src/CabanaPD_Comm.hpp b/src/CabanaPD_Comm.hpp index 0a882495..a5f8b698 100644 --- a/src/CabanaPD_Comm.hpp +++ b/src/CabanaPD_Comm.hpp @@ -18,6 +18,7 @@ #include +#include #include namespace CabanaPD @@ -251,6 +252,7 @@ class Comm Comm( ParticleType& particles, int max_export_guess = 100 ) : max_export( max_export_guess ) { + _init_timer.start(); auto local_grid = particles.local_grid; MPI_Comm_size( local_grid->globalGrid().comm(), &mpi_size ); MPI_Comm_rank( local_grid->globalGrid().comm(), &mpi_rank ); @@ -281,6 +283,8 @@ class Comm gather_u = std::make_shared( *halo, particles._aosoa_u ); gather_u->apply(); + + _init_timer.stop(); } ~Comm() {} @@ -297,10 +301,22 @@ class Comm // We assume here that the particle count has not changed and no resize // is necessary. - void gatherDisplacement() { gather_u->apply(); } + void gatherDisplacement() + { + _timer.start(); + gather_u->apply(); + _timer.stop(); + } // No-op to make solvers simpler. void gatherDilatation() {} void gatherWeightedVolume() {} + + auto timeInit() { return _init_timer.time(); }; + auto time() { return _timer.time(); }; + + protected: + Timer _init_timer; + Timer _timer; }; template @@ -313,6 +329,9 @@ class Comm : public Comm using base_type::gather_u; using base_type::halo; + using base_type::_init_timer; + using base_type::_timer; + using gather_m_type = Cabana::Gather; using gather_theta_type = @@ -323,14 +342,28 @@ class Comm : public Comm Comm( ParticleType& particles, int max_export_guess = 100 ) : base_type( particles, max_export_guess ) { + _init_timer.start(); + gather_m = std::make_shared( *halo, particles._aosoa_m ); gather_theta = std::make_shared( *halo, particles._aosoa_theta ); + + _init_timer.stop(); } ~Comm() {} - void gatherDilatation() { gather_theta->apply(); } - void gatherWeightedVolume() { gather_m->apply(); } + void gatherDilatation() + { + _timer.start(); + gather_theta->apply(); + _timer.stop(); + } + void gatherWeightedVolume() + { + _timer.start(); + gather_m->apply(); + _timer.stop(); + } }; } // namespace CabanaPD diff --git a/src/CabanaPD_Force.hpp b/src/CabanaPD_Force.hpp index 3390f58e..2f614e8e 100644 --- a/src/CabanaPD_Force.hpp +++ b/src/CabanaPD_Force.hpp @@ -133,7 +133,7 @@ class Force; ******************************************************************************/ template -void computeForce( const ForceType& force, ParticleType& particles, +void computeForce( ForceType& force, ParticleType& particles, const NeighListType& neigh_list, const ParallelType& neigh_op_tag ) { @@ -163,7 +163,7 @@ void computeForce( const ForceType& force, ParticleType& particles, template -double computeEnergy( const ForceType force, ParticleType& particles, +double computeEnergy( ForceType& force, ParticleType& particles, const NeighListType& neigh_list, const ParallelType& neigh_op_tag ) { @@ -192,7 +192,7 @@ double computeEnergy( const ForceType force, ParticleType& particles, // Forces with bond breaking. template -void computeForce( const ForceType& force, ParticleType& particles, +void computeForce( ForceType& force, ParticleType& particles, const NeighListType& neigh_list, NeighborView& mu, const ParallelType& neigh_op_tag ) { @@ -223,7 +223,7 @@ void computeForce( const ForceType& force, ParticleType& particles, // Energy and damage. template -double computeEnergy( const ForceType force, ParticleType& particles, +double computeEnergy( ForceType& force, ParticleType& particles, const NeighListType& neigh_list, NeighborView& mu, const ParallelType& neigh_op_tag ) { diff --git a/src/CabanaPD_Integrate.hpp b/src/CabanaPD_Integrate.hpp index ff917707..0cf4b483 100644 --- a/src/CabanaPD_Integrate.hpp +++ b/src/CabanaPD_Integrate.hpp @@ -63,6 +63,7 @@ #include #include +#include namespace CabanaPD { @@ -72,6 +73,7 @@ class Integrator using exec_space = ExecutionSpace; double _dt, _half_dt; + Timer _timer; public: Integrator( double dt ) @@ -85,6 +87,8 @@ class Integrator template void initialHalfStep( ParticlesType& p ) { + _timer.start(); + auto u = p.sliceDisplacement(); auto v = p.sliceVelocity(); auto f = p.sliceForce(); @@ -105,11 +109,15 @@ class Integrator Kokkos::RangePolicy policy( 0, v.size() ); Kokkos::parallel_for( "CabanaPD::Integrator::Initial", policy, init_func ); + + _timer.stop(); } template void finalHalfStep( ParticlesType& p ) { + _timer.start(); + auto v = p.sliceVelocity(); auto f = p.sliceForce(); auto rho = p.sliceDensity(); @@ -125,7 +133,12 @@ class Integrator Kokkos::RangePolicy policy( 0, v.size() ); Kokkos::parallel_for( "CabanaPD::Integrator::Final", policy, final_func ); + + _timer.stop(); } + + double timeInit() { return 0.0; }; + auto time() { return _timer.time(); }; }; } // namespace CabanaPD diff --git a/src/CabanaPD_Particles.hpp b/src/CabanaPD_Particles.hpp index 34a70c28..02d691c2 100644 --- a/src/CabanaPD_Particles.hpp +++ b/src/CabanaPD_Particles.hpp @@ -72,6 +72,7 @@ #include #include #include +#include #include namespace CabanaPD @@ -141,6 +142,7 @@ class Particles // Default constructor. Particles() { + _init_timer.start(); for ( int d = 0; d < dim; d++ ) { global_mesh_ext[d] = 0.0; @@ -150,6 +152,7 @@ class Particles ghost_mesh_hi[d] = 0.0; local_mesh_ext[d] = 0.0; } + _init_timer.stop(); resize( 0, 0 ); } @@ -185,6 +188,7 @@ class Particles std::array high_corner, const std::array num_cells ) { + _init_timer.start(); // Create the MPI partitions. Cabana::Grid::DimBlockPartitioner partitioner; @@ -218,6 +222,7 @@ class Particles local_mesh.highCorner( Cabana::Grid::Ghost(), d ); local_mesh_ext[d] = local_mesh.extent( Cabana::Grid::Own(), d ); } + _init_timer.stop(); } template @@ -233,6 +238,7 @@ class Particles template void createParticles( const ExecSpace& exec_space, UserFunctor user_create ) { + _init_timer.start(); // Create a local mesh and owned space. auto owned_cells = local_grid->indexSpace( Cabana::Grid::Own(), Cabana::Grid::Cell(), Cabana::Grid::Local() ); @@ -292,15 +298,18 @@ class Particles // Not using Allreduce because global count is only used for printing. MPI_Reduce( &n_local, &n_global, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD ); + _init_timer.stop(); } template void updateParticles( const ExecSpace, const FunctorType init_functor ) { + _timer.start(); Kokkos::RangePolicy policy( 0, n_local ); Kokkos::parallel_for( "CabanaPD::Particles::update_particles", policy, KOKKOS_LAMBDA( const int pid ) { init_functor( pid ); } ); + _timer.stop(); } auto sliceReferencePosition() @@ -387,6 +396,7 @@ class Particles void updateCurrentPosition() { + _timer.start(); // Not using slice function because this is called inside. auto y = Cabana::slice<0>( _aosoa_y, "current_positions" ); auto x = sliceReferencePosition(); @@ -399,10 +409,12 @@ class Particles }; Kokkos::parallel_for( "CabanaPD::CalculateCurrentPositions", policy, sum_x_u ); + _timer.stop(); } void resize( int new_local, int new_ghost ) { + _timer.start(); n_local = new_local; n_ghost = new_ghost; @@ -414,6 +426,7 @@ class Particles _aosoa_other.resize( new_local ); _aosoa_nofail.resize( new_local + new_ghost ); size = _plist_x.size(); + _timer.stop(); }; auto getPosition( const bool use_reference ) @@ -428,6 +441,8 @@ class Particles [[maybe_unused]] const double output_time, [[maybe_unused]] const bool use_reference = true ) { + _output_timer.start(); + #ifdef Cabana_ENABLE_HDF5 Cabana::Experimental::HDF5ParticleOutput::writeTimeStep( h5_config, "particles", MPI_COMM_WORLD, output_step, output_time, @@ -445,8 +460,14 @@ class Particles log( std::cout, "No particle output enabled." ); #endif #endif + + _output_timer.stop(); } + auto timeInit() { return _init_timer.time(); }; + auto timeOutput() { return _output_timer.time(); }; + auto time() { return _timer.time(); }; + friend class Comm; protected: @@ -462,6 +483,10 @@ class Particles #ifdef Cabana_ENABLE_HDF5 Cabana::Experimental::HDF5ParticleOutput::HDF5Config h5_config; #endif + + Timer _init_timer; + Timer _output_timer; + Timer _timer; }; template @@ -508,8 +533,10 @@ class Particles Particles() : base_type() { + _init_timer.start(); _aosoa_m = aosoa_m_type( "Particle Weighted Volumes", 0 ); _aosoa_theta = aosoa_theta_type( "Particle Dilatations", 0 ); + _init_timer.stop(); } // Constructor which initializes particles on regular grid. @@ -520,17 +547,21 @@ class Particles : base_type( exec_space, low_corner, high_corner, num_cells, max_halo_width ) { + _init_timer.start(); _aosoa_m = aosoa_m_type( "Particle Weighted Volumes", n_local ); _aosoa_theta = aosoa_theta_type( "Particle Dilatations", n_local ); init_lps(); + _init_timer.stop(); } template void createParticles( const ExecSpace& exec_space ) { base_type::createParticles( exec_space ); + _init_timer.start(); _aosoa_m.resize( 0 ); _aosoa_theta.resize( 0 ); + _init_timer.stop(); } auto sliceDilatation() @@ -553,14 +584,18 @@ class Particles void resize( int new_local, int new_ghost ) { base_type::resize( new_local, new_ghost ); + _timer.start(); _aosoa_theta.resize( new_local + new_ghost ); _aosoa_m.resize( new_local + new_ghost ); + _timer.stop(); } void output( [[maybe_unused]] const int output_step, [[maybe_unused]] const double output_time, [[maybe_unused]] const bool use_reference = true ) { + _output_timer.start(); + #ifdef Cabana_ENABLE_HDF5 Cabana::Experimental::HDF5ParticleOutput::writeTimeStep( h5_config, "particles", MPI_COMM_WORLD, output_step, output_time, @@ -583,6 +618,8 @@ class Particles log( std::cout, "No particle output enabled." ); #endif #endif + + _output_timer.stop(); } friend class Comm; @@ -603,6 +640,10 @@ class Particles #ifdef Cabana_ENABLE_HDF5 using base_type::h5_config; #endif + + using base_type::_init_timer; + using base_type::_output_timer; + using base_type::_timer; }; } // namespace CabanaPD diff --git a/src/CabanaPD_Prenotch.hpp b/src/CabanaPD_Prenotch.hpp index d412ed52..23687f60 100644 --- a/src/CabanaPD_Prenotch.hpp +++ b/src/CabanaPD_Prenotch.hpp @@ -202,6 +202,8 @@ struct Prenotch Kokkos::Array _v2; Kokkos::Array, num_notch> _p0_list; + Timer _timer; + Prenotch() {} Prenotch( Kokkos::Array v1, Kokkos::Array v2, @@ -217,6 +219,8 @@ struct Prenotch void create( ExecSpace, NeighborView& mu, Particles& particles, Neighbors& neighbors ) { + _timer.start(); + auto x = particles.sliceReferencePosition(); Kokkos::RangePolicy policy( 0, particles.n_local ); @@ -250,7 +254,9 @@ struct Prenotch }; Kokkos::parallel_for( "CabanaPD::Prenotch", policy, notch_functor ); } + _timer.stop(); } + auto time() { return _timer.time(); }; }; } // namespace CabanaPD diff --git a/src/CabanaPD_Solver.hpp b/src/CabanaPD_Solver.hpp index c1e48135..6f46bfb5 100644 --- a/src/CabanaPD_Solver.hpp +++ b/src/CabanaPD_Solver.hpp @@ -79,6 +79,7 @@ #include #include #include +#include namespace CabanaPD { @@ -116,18 +117,8 @@ class SolverElastic : inputs( _inputs ) , particles( _particles ) , boundary_condition( bc ) + , _init_time( 0.0 ) { - neighbor_time = 0; - force_time = 0; - integrate_time = 0; - comm_time = 0; - energy_time = 0; - output_time = 0; - last_time = 0; - init_time = 0; - total_timer.reset(); - init_timer.reset(); - num_steps = inputs["num_steps"]; output_frequency = inputs["output_frequency"]; output_reference = inputs["output_reference"]; @@ -140,7 +131,7 @@ class SolverElastic comm = std::make_shared( *particles ); // Create the neighbor list. - neighbor_timer.reset(); + _neighbor_timer.start(); double mesh_min[3] = { particles->ghost_mesh_lo[0], particles->ghost_mesh_lo[1], particles->ghost_mesh_lo[2] }; @@ -151,8 +142,9 @@ class SolverElastic neighbors = std::make_shared( x, 0, particles->n_local, force_model.delta, 1.0, mesh_min, mesh_max ); - neighbor_time += neighbor_timer.seconds(); + _neighbor_timer.stop(); + _init_timer.start(); unsigned max_neighbors; unsigned max_local_neighbors = Cabana::NeighborList::maxNeighbor( *neighbors ); @@ -191,12 +183,11 @@ class SolverElastic ", Total neighbors: ", total_neighbors, "\n" ); out.close(); } - init_time += init_timer.seconds(); + _init_timer.stop(); } void init_force() { - init_timer.reset(); // Compute/communicate LPS weighted volume (does nothing for PMB). force->computeWeightedVolume( *particles, *neighbors, neigh_iter_tag{} ); @@ -213,7 +204,6 @@ class SolverElastic boundary_condition.apply( exec_space(), *particles ); particles->output( 0, 0.0, output_reference ); - init_time += init_timer.seconds(); } void run() @@ -223,52 +213,43 @@ class SolverElastic // Main timestep loop. for ( int step = 1; step <= num_steps; step++ ) { + _step_timer.start(); + // Integrate - velocity Verlet first half. - integrate_timer.reset(); integrator->initialHalfStep( *particles ); - integrate_time += integrate_timer.seconds(); // Update ghost particles. - comm_timer.reset(); comm->gatherDisplacement(); - comm_time += comm_timer.seconds(); // Do not need to recompute LPS weighted volume here without damage. // Compute/communicate LPS dilatation (does nothing for PMB). - force_timer.reset(); force->computeDilatation( *particles, *neighbors, neigh_iter_tag{} ); - force_time += force_timer.seconds(); - comm_timer.reset(); comm->gatherDilatation(); - comm_time += comm_timer.seconds(); // Compute internal forces. - force_timer.reset(); computeForce( *force, *particles, *neighbors, neigh_iter_tag{} ); - force_time += force_timer.seconds(); // Add boundary condition. boundary_condition.apply( exec_space(), *particles ); // Integrate - velocity Verlet second half. - integrate_timer.reset(); integrator->finalHalfStep( *particles ); - integrate_time += integrate_timer.seconds(); // Print output. if ( step % output_frequency == 0 ) { - energy_timer.reset(); auto W = computeEnergy( *force, *particles, *neighbors, neigh_iter_tag() ); - energy_time += energy_timer.seconds(); - output_timer.reset(); - step_output( step, W ); particles->output( step / output_frequency, step * dt, output_reference ); - output_time += output_timer.seconds(); + _step_timer.stop(); + step_output( step, W ); + } + else + { + _step_timer.stop(); } } @@ -280,7 +261,11 @@ class SolverElastic { // Output after construction and initial forces. std::ofstream out( output_file, std::ofstream::app ); - log( out, "Init-Time(s): ", init_time, "\n" ); + _init_time += _init_timer.time() + _neighbor_timer.time() + + particles->timeInit() + boundary_condition.timeInit() + + comm->timeInit() + integrator->timeInit(); + log( out, "Init-Time(s): ", _init_time ); + log( out, "Init-Neighbor-Time(s): ", _neighbor_timer.time(), "\n" ); log( out, "#Timestep/Total-steps Simulation-time Total-strain-energy " "Step-Time(s) Force-Time(s) Comm-Time(s) Integrate-Time(s) " "Energy-Time(s) Output-Time(s) Particle*steps/s" ); @@ -294,15 +279,21 @@ class SolverElastic log( std::cout, step, "/", num_steps, " ", std::scientific, std::setprecision( 2 ), step * dt ); - total_time = total_timer.seconds(); - double rate = 1.0 * particles->n_global * output_frequency / - ( total_time - last_time ); + double step_time = _step_timer.time(); + double comm_time = comm->time(); + double integrate_time = integrator->time(); + double force_time = force->time(); + double energy_time = force->timeEnergy(); + double output_time = particles->timeOutput(); + _total_time += step_time; + auto rate = static_cast( particles->n_global * + output_frequency / ( step_time ) ); + _step_timer.reset(); log( out, std::fixed, std::setprecision( 6 ), step, "/", num_steps, " ", std::scientific, std::setprecision( 2 ), step * dt, " ", - W, " ", std::fixed, total_time, " ", force_time, " ", + W, " ", std::fixed, _total_time, " ", force_time, " ", comm_time, " ", integrate_time, " ", energy_time, " ", output_time, " ", std::scientific, rate ); - last_time = total_time; out.close(); } } @@ -312,21 +303,29 @@ class SolverElastic if ( print ) { std::ofstream out( output_file, std::ofstream::app ); - total_time = total_timer.seconds(); - double steps_per_sec = 1.0 * num_steps / total_time; + double comm_time = comm->time(); + double integrate_time = integrator->time(); + double force_time = force->time(); + double energy_time = force->timeEnergy(); + double output_time = particles->timeOutput(); + double neighbor_time = _neighbor_timer.time(); + _total_time = _init_time + comm_time + integrate_time + force_time + + energy_time + output_time + particles->time(); + + double steps_per_sec = 1.0 * num_steps / _total_time; double p_steps_per_sec = particles->n_global * steps_per_sec; log( out, std::fixed, std::setprecision( 2 ), "\n#Procs Particles | Total Force Comm Integrate Energy " "Output Init Init_Neighbor |\n", - comm->mpi_size, " ", particles->n_global, " | ", total_time, + comm->mpi_size, " ", particles->n_global, " | \t", _total_time, " ", force_time, " ", comm_time, " ", integrate_time, " ", - energy_time, " ", output_time, " ", init_time, " ", + energy_time, " ", output_time, " ", _init_time, " ", neighbor_time, " | PERFORMANCE\n", std::fixed, comm->mpi_size, - " ", particles->n_global, " | ", 1.0, " ", - force_time / total_time, " ", comm_time / total_time, " ", - integrate_time / total_time, " ", energy_time / total_time, - " ", output_time / total_time, " ", init_time / total_time, - " ", neighbor_time / total_time, " | FRACTION\n\n", + " ", particles->n_global, " | \t", 1.0, " ", + force_time / _total_time, " ", comm_time / _total_time, " ", + integrate_time / _total_time, " ", energy_time / _total_time, + " ", output_time / _total_time, " ", _init_time / _total_time, + " ", neighbor_time / _total_time, " | FRACTION\n\n", "#Steps/s Particle-steps/s Particle-steps/proc/s\n", std::scientific, steps_per_sec, " ", p_steps_per_sec, " ", p_steps_per_sec / comm->mpi_size ); @@ -351,23 +350,12 @@ class SolverElastic std::string output_file; std::string error_file; - double total_time; - double force_time; - double integrate_time; - double comm_time; - double energy_time; - double output_time; - double init_time; - double last_time; - double neighbor_time; - Kokkos::Timer total_timer; - Kokkos::Timer init_timer; - Kokkos::Timer force_timer; - Kokkos::Timer comm_timer; - Kokkos::Timer integrate_timer; - Kokkos::Timer energy_timer; - Kokkos::Timer output_timer; - Kokkos::Timer neighbor_timer; + // Combined from many class timers. + double _init_time; + Timer _init_timer; + Timer _neighbor_timer; + Timer _step_timer; + double _total_time; bool print; }; @@ -400,14 +388,11 @@ class SolverFracture prenotch_type prenotch ) : base_type( _inputs, _particles, force_model, bc ) { - init_timer.reset(); - init_mu(); // Create prenotch. prenotch.create( exec_space{}, mu, *particles, *neighbors ); - - init_time += init_timer.seconds(); + _init_time += prenotch.time(); } SolverFracture( input_type _inputs, @@ -415,15 +400,12 @@ class SolverFracture force_model_type force_model, bc_type bc ) : base_type( _inputs, _particles, force_model, bc ) { - init_timer.reset(); - init_mu(); - - init_time += init_timer.seconds(); } void init_mu() { + _init_timer.start(); // Create View to track broken bonds. int max_neighbors = Cabana::NeighborList::maxNeighbor( *neighbors ); @@ -431,11 +413,11 @@ class SolverFracture Kokkos::ViewAllocateWithoutInitializing( "broken_bonds" ), particles->n_local, max_neighbors ); Kokkos::deep_copy( mu, 1 ); + _init_timer.stop(); } void init_force() { - init_timer.reset(); // Compute/communicate weighted volume for LPS (does nothing for PMB). force->computeWeightedVolume( *particles, *neighbors, mu ); comm->gatherWeightedVolume(); @@ -451,7 +433,6 @@ class SolverFracture boundary_condition.apply( exec_space(), *particles ); particles->output( 0, 0.0, output_reference ); - init_time += init_timer.seconds(); } void run() @@ -461,58 +442,45 @@ class SolverFracture // Main timestep loop. for ( int step = 1; step <= num_steps; step++ ) { + _step_timer.start(); + // Integrate - velocity Verlet first half. - integrate_timer.reset(); integrator->initialHalfStep( *particles ); - integrate_time += integrate_timer.seconds(); // Update ghost particles. - comm_timer.reset(); comm->gatherDisplacement(); - comm_time += comm_timer.seconds(); // Compute/communicate LPS weighted volume (does nothing for PMB). - force_timer.reset(); force->computeWeightedVolume( *particles, *neighbors, mu ); - force_time += force_timer.seconds(); - comm_timer.reset(); comm->gatherWeightedVolume(); - comm_time += comm_timer.seconds(); // Compute/communicate LPS dilatation (does nothing for PMB). - force_timer.reset(); force->computeDilatation( *particles, *neighbors, mu ); - force_time += force_timer.seconds(); - comm_timer.reset(); comm->gatherDilatation(); - comm_time += comm_timer.seconds(); // Compute internal forces. - force_timer.reset(); computeForce( *force, *particles, *neighbors, mu, neigh_iter_tag{} ); - force_time += force_timer.seconds(); // Add boundary condition. boundary_condition.apply( exec_space{}, *particles ); // Integrate - velocity Verlet second half. - integrate_timer.reset(); integrator->finalHalfStep( *particles ); - integrate_time += integrate_timer.seconds(); // Print output. if ( step % output_frequency == 0 ) { - energy_timer.reset(); auto W = computeEnergy( *force, *particles, *neighbors, mu, neigh_iter_tag() ); - energy_time += energy_timer.seconds(); - output_timer.reset(); - this->step_output( step, W ); particles->output( step / output_frequency, step * dt, output_reference ); - output_time += output_timer.seconds(); + _step_timer.stop(); + this->step_output( step, W ); + } + else + { + _step_timer.stop(); } } @@ -537,22 +505,9 @@ class SolverFracture using NeighborView = typename Kokkos::View; NeighborView mu; - using base_type::comm_time; - using base_type::energy_time; - using base_type::force_time; - using base_type::init_time; - using base_type::integrate_time; - using base_type::output_time; - using base_type::total_time; - - using base_type::comm_timer; - using base_type::energy_timer; - using base_type::force_timer; - using base_type::init_timer; - using base_type::integrate_timer; - using base_type::output_timer; - using base_type::total_timer; - + using base_type::_init_time; + using base_type::_init_timer; + using base_type::_step_timer; using base_type::print; }; diff --git a/src/CabanaPD_Timer.hpp b/src/CabanaPD_Timer.hpp new file mode 100644 index 00000000..24ffa09d --- /dev/null +++ b/src/CabanaPD_Timer.hpp @@ -0,0 +1,68 @@ +/**************************************************************************** + * Copyright (c) 2022-2023 by Oak Ridge National Laboratory * + * All rights reserved. * + * * + * This file is part of CabanaPD. CabanaPD is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef TIMER_H +#define TIMER_H + +#include "mpi.h" +#include +#include + +namespace CabanaPD +{ +class Timer +{ + double _time = 0.0; + double _start_time = 0.0; + double _last_time = 0.0; + double _max_time = 0.0; + double _min_time = 0.0; + int _num_calls = 0; + bool _running = false; + + public: + void start() + { + if ( _running ) + throw std::runtime_error( "Timer already running" ); + + _start_time = MPI_Wtime(); + _running = true; + } + void stop() + { + if ( !_running ) + throw std::runtime_error( "Timer not running." ); + + _last_time = MPI_Wtime() - _start_time; + _time += _last_time; + _num_calls++; + _running = false; + } + void reset() { _time = 0.0; } + bool running() { return _running; } + auto time() { return _time; } + auto minTime() { return _min_time; } + auto maxTime() { return _max_time; } + auto numCalls() { return _num_calls; } + auto lastTime() { return _last_time; } + + void reduceMPI() + { + MPI_Allreduce( &_time, &_max_time, 1, MPI_DOUBLE, MPI_MAX, + MPI_COMM_WORLD ); + MPI_Allreduce( &_time, &_min_time, 1, MPI_DOUBLE, MPI_MIN, + MPI_COMM_WORLD ); + } +}; +} // namespace CabanaPD + +#endif diff --git a/src/force/CabanaPD_Force_LPS.hpp b/src/force/CabanaPD_Force_LPS.hpp index ffc99fa4..3ade3c05 100644 --- a/src/force/CabanaPD_Force_LPS.hpp +++ b/src/force/CabanaPD_Force_LPS.hpp @@ -76,6 +76,9 @@ class Force> bool _half_neigh; ForceModel _model; + Timer _timer; + Timer _energy_timer; + public: using exec_space = ExecutionSpace; @@ -90,6 +93,8 @@ class Force> const NeighListType& neigh_list, const ParallelType neigh_op_tag ) { + _timer.start(); + auto n_local = particles.n_local; auto x = particles.sliceReferencePosition(); auto u = particles.sliceDisplacement(); @@ -111,13 +116,17 @@ class Force> Cabana::neighbor_parallel_for( policy, weighted_volume, neigh_list, Cabana::FirstNeighborsTag(), neigh_op_tag, "CabanaPD::ForceLPS::computeWeightedVolume" ); + + _timer.stop(); } template void computeDilatation( ParticleType& particles, const NeighListType& neigh_list, - const ParallelType neigh_op_tag ) const + const ParallelType neigh_op_tag ) { + _timer.start(); + auto n_local = particles.n_local; const auto x = particles.sliceReferencePosition(); auto u = particles.sliceDisplacement(); @@ -141,6 +150,8 @@ class Force> Cabana::neighbor_parallel_for( policy, dilatation, neigh_list, Cabana::FirstNeighborsTag(), neigh_op_tag, "CabanaPD::ForceLPS::computeDilatation" ); + + _timer.stop(); } template > void computeForceFull( ForceType& f, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, const int n_local, - ParallelType& neigh_op_tag ) const + ParallelType& neigh_op_tag ) { + _timer.start(); + auto theta_coeff = _model.theta_coeff; auto s_coeff = _model.s_coeff; auto model = _model; @@ -183,6 +196,8 @@ class Force> Cabana::neighbor_parallel_for( policy, force_full, neigh_list, Cabana::FirstNeighborsTag(), neigh_op_tag, "CabanaPD::ForceLPS::computeFull" ); + + _timer.stop(); } template > double computeEnergyFull( WType& W, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, - const int n_local, - ParallelType& neigh_op_tag ) const + const int n_local, ParallelType& neigh_op_tag ) { + _energy_timer.start(); + auto theta_coeff = _model.theta_coeff; auto s_coeff = _model.s_coeff; auto model = _model; @@ -228,8 +244,12 @@ class Force> neigh_op_tag, strain_energy, "CabanaPD::ForceLPS::computeEnergyFull" ); + _energy_timer.stop(); return strain_energy; } + + auto time() { return _timer.time(); }; + auto timeEnergy() { return _energy_timer.time(); }; }; template @@ -241,6 +261,9 @@ class Force> using base_type::_half_neigh; ForceModel _model; + using base_type::_energy_timer; + using base_type::_timer; + public: using exec_space = ExecutionSpace; @@ -255,6 +278,8 @@ class Force> const NeighListType& neigh_list, const MuView& mu ) { + _timer.start(); + auto n_local = particles.n_local; auto x = particles.sliceReferencePosition(); auto u = particles.sliceDisplacement(); @@ -287,13 +312,16 @@ class Force> Kokkos::RangePolicy policy( 0, n_local ); Kokkos::parallel_for( "CabanaPD::ForceLPSDamage::computeWeightedVolume", policy, weighted_volume ); + + _timer.stop(); } template void computeDilatation( ParticleType& particles, - const NeighListType& neigh_list, - const MuView& mu ) const + const NeighListType& neigh_list, const MuView& mu ) { + _timer.start(); + auto n_local = particles.n_local; const auto x = particles.sliceReferencePosition(); auto u = particles.sliceDisplacement(); @@ -331,6 +359,8 @@ class Force> Kokkos::RangePolicy policy( 0, n_local ); Kokkos::parallel_for( "CabanaPD::ForceLPSDamage::computeDilatation", policy, dilatation ); + + _timer.stop(); } template > void computeForceFull( ForceType& f, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, MuView& mu, - const int n_local, ParallelType& ) const + const int n_local, ParallelType& ) { + _timer.start(); + auto break_coeff = _model.bond_break_coeff; auto theta_coeff = _model.theta_coeff; auto s_coeff = _model.s_coeff; @@ -400,6 +432,8 @@ class Force> Kokkos::RangePolicy policy( 0, n_local ); Kokkos::parallel_for( "CabanaPD::ForceLPSDamage::computeFull", policy, force_full ); + + _timer.stop(); } template > double computeEnergyFull( WType& W, const PosType& x, const PosType& u, DamageType& phi, const ParticleType& particles, const NeighListType& neigh_list, MuView& mu, - const int n_local, ParallelType& ) const + const int n_local, ParallelType& ) { + _energy_timer.start(); + auto theta_coeff = _model.theta_coeff; auto s_coeff = _model.s_coeff; auto model = _model; @@ -457,6 +493,7 @@ class Force> Kokkos::parallel_reduce( "CabanaPD::ForceLPSDamage::computeEnergyFull", policy, energy_full, strain_energy ); + _energy_timer.stop(); return strain_energy; } }; @@ -470,6 +507,9 @@ class Force> using base_type::_half_neigh; ForceModel _model; + using base_type::_energy_timer; + using base_type::_timer; + public: using exec_space = ExecutionSpace; @@ -484,8 +524,10 @@ class Force> void computeForceFull( ForceType& f, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, const int n_local, - ParallelType& neigh_op_tag ) const + ParallelType& neigh_op_tag ) { + _timer.start(); + auto theta_coeff = _model.theta_coeff; auto s_coeff = _model.s_coeff; auto model = _model; @@ -525,6 +567,8 @@ class Force> Cabana::neighbor_parallel_for( policy, force_full, neigh_list, Cabana::FirstNeighborsTag(), neigh_op_tag, "CabanaPD::ForceLPS::computeFull" ); + + _timer.stop(); } template > double computeEnergyFull( WType& W, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, - const int n_local, - ParallelType& neigh_op_tag ) const + const int n_local, ParallelType& neigh_op_tag ) { + _energy_timer.start(); + auto theta_coeff = _model.theta_coeff; auto s_coeff = _model.s_coeff; auto model = _model; @@ -572,6 +617,7 @@ class Force> neigh_op_tag, strain_energy, "CabanaPD::ForceLPS::computeEnergyFull" ); + _energy_timer.stop(); return strain_energy; } }; diff --git a/src/force/CabanaPD_Force_PMB.hpp b/src/force/CabanaPD_Force_PMB.hpp index 2b1ba5dc..36f1223b 100644 --- a/src/force/CabanaPD_Force_PMB.hpp +++ b/src/force/CabanaPD_Force_PMB.hpp @@ -76,6 +76,9 @@ class Force> bool _half_neigh; ForceModel _model; + Timer _timer; + Timer _energy_timer; + public: using exec_space = ExecutionSpace; @@ -87,12 +90,12 @@ class Force> template void computeWeightedVolume( ParticleType&, const NeighListType&, - const ParallelType ) + const ParallelType ) const { } template void computeDilatation( ParticleType&, const NeighListType&, - const ParallelType ) + const ParallelType ) const { } @@ -101,8 +104,10 @@ class Force> void computeForceFull( ForceType& f, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, const int n_local, - ParallelType& neigh_op_tag ) const + ParallelType& neigh_op_tag ) { + _timer.start(); + auto c = _model.c; const auto vol = particles.sliceVolume(); @@ -129,6 +134,8 @@ class Force> Cabana::neighbor_parallel_for( policy, force_full, neigh_list, Cabana::FirstNeighborsTag(), neigh_op_tag, "CabanaPD::ForcePMB::computeFull" ); + + _timer.stop(); } template > double computeEnergyFull( WType& W, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, - const int n_local, - ParallelType& neigh_op_tag ) const + const int n_local, ParallelType& neigh_op_tag ) { + _energy_timer.start(); + auto c = _model.c; const auto vol = particles.sliceVolume(); @@ -163,8 +171,12 @@ class Force> neigh_op_tag, strain_energy, "CabanaPD::ForcePMB::computeEnergyFull" ); + _energy_timer.stop(); return strain_energy; } + + auto time() { return _timer.time(); }; + auto timeEnergy() { return _energy_timer.time(); }; }; template @@ -176,6 +188,9 @@ class Force> using base_type::_half_neigh; ForceModel _model; + using base_type::_energy_timer; + using base_type::_timer; + public: using exec_space = ExecutionSpace; @@ -190,8 +205,10 @@ class Force> void computeForceFull( ForceType& f, const PosType& x, const PosType& u, const ParticleType& particles, const NeighListType& neigh_list, MuView& mu, - const int n_local, ParallelType& ) const + const int n_local, ParallelType& ) { + _timer.start(); + auto c = _model.c; auto break_coeff = _model.bond_break_coeff; const auto vol = particles.sliceVolume(); @@ -242,6 +259,8 @@ class Force> Kokkos::RangePolicy policy( 0, n_local ); Kokkos::parallel_for( "CabanaPD::ForcePMBDamage::computeFull", policy, force_full ); + + _timer.stop(); } template > double computeEnergyFull( WType& W, const PosType& x, const PosType& u, DamageType& phi, const ParticleType& particles, const NeighListType& neigh_list, MuView& mu, - const int n_local, ParallelType& ) const + const int n_local, ParallelType& ) { + _energy_timer.start(); + auto c = _model.c; const auto vol = particles.sliceVolume(); @@ -287,6 +308,7 @@ class Force> Kokkos::parallel_reduce( "CabanaPD::ForcePMBDamage::computeEnergyFull", policy, energy_full, strain_energy ); + _energy_timer.stop(); return strain_energy; } }; @@ -300,6 +322,9 @@ class Force> using base_type::_half_neigh; ForceModel _model; + using base_type::_energy_timer; + using base_type::_timer; + public: using exec_space = ExecutionSpace; @@ -314,8 +339,10 @@ class Force> void computeForceFull( ForceType& f, const PosType& x, const PosType& u, ParticleType& particles, const NeighListType& neigh_list, const int n_local, - ParallelType& neigh_op_tag ) const + ParallelType& neigh_op_tag ) { + _timer.start(); + auto c = _model.c; const auto vol = particles.sliceVolume(); @@ -345,15 +372,19 @@ class Force> Cabana::neighbor_parallel_for( policy, force_full, neigh_list, Cabana::FirstNeighborsTag(), neigh_op_tag, "CabanaPD::ForceLinearPMB::computeFull" ); + + _timer.stop(); } template - double - computeEnergyFull( WType& W, const PosType& x, const PosType& u, - ParticleType& particles, const NeighListType& neigh_list, - const int n_local, ParallelType& neigh_op_tag ) const + double computeEnergyFull( WType& W, const PosType& x, const PosType& u, + ParticleType& particles, + const NeighListType& neigh_list, + const int n_local, ParallelType& neigh_op_tag ) { + _energy_timer.start(); + auto c = _model.c; const auto vol = particles.sliceVolume(); @@ -378,6 +409,7 @@ class Force> neigh_op_tag, strain_energy, "CabanaPD::ForceLinearPMB::computeEnergyFull" ); + _energy_timer.stop(); return strain_energy; } }; diff --git a/unit_test/tstForce.hpp b/unit_test/tstForce.hpp index b3fc96a6..be48fca7 100644 --- a/unit_test/tstForce.hpp +++ b/unit_test/tstForce.hpp @@ -620,7 +620,7 @@ struct NoDamageTag }; template -double computeEnergyAndForce( NoDamageTag, const ForceType force, +double computeEnergyAndForce( NoDamageTag, ForceType force, ParticleType& particles, const NeighborList& neigh_list, const int ) { @@ -630,10 +630,9 @@ double computeEnergyAndForce( NoDamageTag, const ForceType force, return Phi; } template -double computeEnergyAndForce( DamageTag, const ForceType force, - ParticleType& particles, - const NeighborList& neigh_list, - const int max_neighbors ) +double +computeEnergyAndForce( DamageTag, ForceType force, ParticleType& particles, + const NeighborList& neigh_list, const int max_neighbors ) { Kokkos::View mu( Kokkos::ViewAllocateWithoutInitializing( "broken_bonds" ),