Skip to content

Commit

Permalink
Store copies large vectors in ActionWithVector so that we can avoid a…
Browse files Browse the repository at this point in the history
… lot of malloc and optimise the code
  • Loading branch information
Gareth Aneurin Tribello authored and Gareth Aneurin Tribello committed Sep 22, 2024
1 parent 46f53ba commit 8628615
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 56 deletions.
2 changes: 1 addition & 1 deletion src/adjmat/TopologyMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ double TopologyMatrix::calculateWeight( const Vector& pos1, const Vector& pos2,
// Now run through all sea atoms
HistogramBead bead; bead.isNotPeriodic(); bead.setKernelType( kerneltype );
Vector g1derivf,g2derivf,lderivf; Tensor vir; double binlength = maxbins * binw_mat;
MultiValue tvals( maxbins, myvals.getNumberOfDerivatives() );
MultiValue tvals; tvals.resize( maxbins, myvals.getNumberOfDerivatives() );
for(unsigned i=0; i<natoms; ++i) {
// Position of sea atom (this will be the origin)
Vector d2 = getPosition(i,myvals);
Expand Down
51 changes: 27 additions & 24 deletions src/core/ActionWithVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ void ActionWithVector::runAllTasks() {
unsigned nt=OpenMP::getNumThreads();
if( nt*stride*10>nactive_tasks ) nt=nactive_tasks/stride/10;
if( nt==0 ) nt=1;
if( myvals.size()!=nt ) myvals.resize(nt);

// Get the total number of streamed quantities that we need
// Get size for buffer
Expand All @@ -210,28 +211,31 @@ void ActionWithVector::runAllTasks() {

// Recover the number of derivatives we require
if( !doNotCalculateDerivatives() && !gridsInStream ) {
unsigned nargs = getNumberOfArguments(); int nmasks=getNumberOfMasks();
if( nargs>=nmasks && nmasks>0 ) nargs = nargs - nmasks;
if( getNumberOfAtoms()>0 ) nderivatives += 3*getNumberOfAtoms() + 9;
for(unsigned i=0; i<getNumberOfArguments(); ++i) nderivatives += getPntrToArgument(i)->getNumberOfValues();
}

#pragma omp parallel num_threads(nt)
{
std::vector<double> omp_buffer;
const unsigned t=OpenMP::getThreadNum();
if( nt>1 ) omp_buffer.resize( bufsize, 0.0 );
MultiValue myvals( getNumberOfComponents(), nderivatives, 0 );
myvals.clearAll();
if( myvals[t].getNumberOfValues()!=getNumberOfComponents() || myvals[t].getNumberOfDerivatives()!=nderivatives ) myvals[t].resize( getNumberOfComponents(), nderivatives );
myvals[t].clearAll();

#pragma omp for nowait
for(unsigned i=rank; i<nactive_tasks; i+=stride) {
// Calculate the stuff in the loop for this action
runTask( partialTaskList[i], myvals );
runTask( partialTaskList[i], myvals[t] );

// Now transfer the data to the actions that accumulate values from the calculated quantities
if( nt>1 ) gatherAccumulators( partialTaskList[i], myvals, omp_buffer );
else gatherAccumulators( partialTaskList[i], myvals, buffer );
if( nt>1 ) gatherAccumulators( partialTaskList[i], myvals[t], omp_buffer );
else gatherAccumulators( partialTaskList[i], myvals[t], buffer );

// Clear the value
myvals.clearAll();
myvals[t].clearAll();
}
#pragma omp critical
if( nt>1 ) for(unsigned i=0; i<bufsize; ++i) buffer[i]+=omp_buffer[i];
Expand Down Expand Up @@ -349,18 +353,14 @@ bool ActionWithVector::checkForForces() {
unsigned nt=OpenMP::getNumThreads();
if( nt*stride*10>nf_tasks ) nt=nf_tasks/stride/10;
if( nt==0 ) nt=1;
if( myvals.size()!=nt ) myvals.resize(nt);
if( omp_forces.size()!=nt ) omp_forces.resize(nt);

// Now determine how big the multivalue needs to be
unsigned nmatrices=0; ActionWithMatrix* am=dynamic_cast<ActionWithMatrix*>(this);
if(am) {
for(unsigned i=0; i<getNumberOfComponents(); ++i) {
if( getConstPntrToComponent(i)->getRank()==2 && !getConstPntrToComponent(i)->hasDerivatives() ) { nmatrices=getNumberOfComponents(); }
}
}
// Recover the number of derivatives we require (this should be equal to the number of forces)
unsigned nderiv=0;
unsigned nderiv=0, nargs = getNumberOfArguments(); int nmasks = getNumberOfMasks();
if( nargs>=nmasks && nmasks>0 ) nargs = nargs - nmasks;
if( getNumberOfAtoms()>0 ) nderiv += 3*getNumberOfAtoms() + 9;
for(unsigned i=0; i<getNumberOfArguments(); ++i) {
for(unsigned i=0; i<nargs; ++i) {
nderiv += getPntrToArgument(i)->getNumberOfStoredValues();
}
if( forcesForApply.size()!=nderiv ) forcesForApply.resize( nderiv );
Expand All @@ -369,23 +369,26 @@ bool ActionWithVector::checkForForces() {

#pragma omp parallel num_threads(nt)
{
std::vector<double> omp_forces;
if( nt>1 ) omp_forces.resize( forcesForApply.size(), 0.0 );
MultiValue myvals( getNumberOfComponents(), nderiv, nmatrices );
myvals.clearAll();
const unsigned t=OpenMP::getThreadNum();
if( nt>1 ) {
if( omp_forces[t].size()!=forcesForApply.size() ) omp_forces[t].resize( forcesForApply.size(), 0.0 );
else omp_forces[t].assign( forcesForApply.size(), 0.0 );
}
if( myvals[t].getNumberOfValues()!=getNumberOfComponents() || myvals[t].getNumberOfDerivatives()!=nderiv ) myvals[t].resize( getNumberOfComponents(), nderiv );
myvals[t].clearAll();

#pragma omp for nowait
for(unsigned i=rank; i<nf_tasks; i+=stride) {
runTask( force_tasks[i], myvals );
runTask( force_tasks[i], myvals[t] );

// Now get the forces
if( nt>1 ) gatherForces( force_tasks[i], myvals, omp_forces );
else gatherForces( force_tasks[i], myvals, forcesForApply );
if( nt>1 ) gatherForces( force_tasks[i], myvals[t], omp_forces[t] );
else gatherForces( force_tasks[i], myvals[t], forcesForApply );

myvals.clearAll();
myvals[t].clearAll();
}
#pragma omp critical
if(nt>1) for(unsigned i=0; i<forcesForApply.size(); ++i) forcesForApply[i]+=omp_forces[i];
if(nt>1) for(unsigned i=0; i<forcesForApply.size(); ++i) forcesForApply[i]+=omp_forces[t][i];
}
// MPI Gather on forces
if( !serial ) comm.Sum( forcesForApply );
Expand Down
4 changes: 4 additions & 0 deletions src/core/ActionWithVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ class ActionWithVector:
bool forwardPass;
/// The buffer that we use (we keep a copy here to avoid resizing)
std::vector<double> buffer;
/// A tempory vector of MultiValue so we can avoid doing lots of resizes
std::vector<MultiValue> myvals;
/// A tempory set of vectors for holding forces over threads
std::vector<std::vector<double> > omp_forces;
/// The list of active tasks
std::vector<unsigned> active_tasks;
/// Clear all the bookeeping arrays
Expand Down
5 changes: 3 additions & 2 deletions src/function/FunctionOfVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,9 @@ void FunctionOfVector<T>::prepare() {

template <class T>
void FunctionOfVector<T>::performTask( const unsigned& current, MultiValue& myvals ) const {
unsigned argstart=myfunc.getArgStart(); std::vector<double> args( getNumberOfArguments()-argstart);
for(unsigned i=argstart; i<getNumberOfArguments(); ++i) {
unsigned nargs=getNumberOfArguments(); if( getNumberOfMasks()>0 ) nargs = nargs - getNumberOfMasks();
unsigned argstart=myfunc.getArgStart(); std::vector<double> args( nargs-argstart);
for(unsigned i=argstart; i<nargs; ++i) {
if( getPntrToArgument(i)->getRank()==1 ) args[i-argstart]=getPntrToArgument(i)->get(current);
else args[i-argstart] = getPntrToArgument(i)->get();
}
Expand Down
2 changes: 1 addition & 1 deletion src/refdist/MatrixProductDiagonal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ void MatrixProductDiagonal::performTask( const unsigned& task_index, MultiValue&
void MatrixProductDiagonal::calculate() {
if( getPntrToArgument(1)->getRank()==1 ) {
unsigned nder = getNumberOfDerivatives();
MultiValue myvals( 1, nder, 0 ); performTask( 0, myvals );
MultiValue myvals; myvals.resize( 1, nder ); performTask( 0, myvals );

Value* myval=getPntrToComponent(0); myval->set( myvals.get(0) );
for(unsigned i=0; i<nder; ++i) myval->setDerivative( i, myvals.getDerivative(0,i) );
Expand Down
3 changes: 0 additions & 3 deletions src/secondarystructure/SecondaryStructureRMSD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,6 @@ void SecondaryStructureRMSD::calculate() {
}

void SecondaryStructureRMSD::performTask( const unsigned& current, MultiValue& myvals ) const {
// Resize the derivatives if need be
unsigned nderi = 3*getNumberOfAtoms()+9;
if( myvals.getNumberOfDerivatives()!=nderi ) myvals.resize( myvals.getNumberOfValues(), nderi, 0 );
// Retrieve the positions
const unsigned natoms = colvar_atoms[current].size();
std::vector<Vector> pos( natoms ), deriv( natoms );
Expand Down
26 changes: 3 additions & 23 deletions src/tools/MultiValue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,11 @@

namespace PLMD {

MultiValue::MultiValue( const size_t& nvals, const size_t& nder, const size_t& nmat ):
task_index(0),
task2_index(0),
values(nvals),
nderivatives(nder),
derivatives(nvals*nder),
hasderiv(nvals*nder,false),
nactive(nvals),
active_list(nvals*nder),
atLeastOneSet(false),
vector_call(false),
nindices(0),
nsplit(0),
matrix_force_stash(0),
matrix_row_nderivatives(0),
matrix_row_derivative_indices(nder)
{
if( nmat>0 ) matrix_force_stash.resize(nder,0);
}

void MultiValue::resize( const size_t& nvals, const size_t& nder, const size_t& nmat ) {
if( values.size()==nvals && nderivatives==nder ) return;
void MultiValue::resize( const size_t& nvals, const size_t& nder ) {
if( values.size()==nvals && nderivatives>nder ) return;
values.resize(nvals); nderivatives=nder; derivatives.resize( nvals*nder );
hasderiv.resize(nvals*nder,false); nactive.resize(nvals); active_list.resize(nvals*nder);
if( nmat>0 ) matrix_force_stash.resize(nder,0);
matrix_force_stash.resize(nder,0);
matrix_row_nderivatives=0; matrix_row_derivative_indices.resize(nder); atLeastOneSet=false;
}

Expand Down
4 changes: 2 additions & 2 deletions src/tools/MultiValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class MultiValue {
std::vector<Tensor> tmp_atom_virial;
std::vector<std::vector<double> > tmp_vectors;
public:
MultiValue( const std::size_t& nvals, const std::size_t& nder, const std::size_t& nmat=0 );
void resize( const std::size_t& nvals, const std::size_t& nder, const std::size_t& nmat=0 );
MultiValue() : task_index(0), task2_index(0), nderivatives(0), atLeastOneSet(false), vector_call(false), nindices(0), nsplit(0), matrix_row_nderivatives(0) {}
void resize( const std::size_t& nvals, const std::size_t& nder );
/// Set the task index prior to the loop
void setTaskIndex( const std::size_t& tindex );
///
Expand Down

1 comment on commit 8628615

@PlumedBot
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found broken examples in automatic/ANGLES.tmp
Found broken examples in automatic/ANN.tmp
Found broken examples in automatic/CAVITY.tmp
Found broken examples in automatic/CLASSICAL_MDS.tmp
Found broken examples in automatic/CLUSTER_DIAMETER.tmp
Found broken examples in automatic/CLUSTER_DISTRIBUTION.tmp
Found broken examples in automatic/CLUSTER_PROPERTIES.tmp
Found broken examples in automatic/CONSTANT.tmp
Found broken examples in automatic/CONTACT_MATRIX.tmp
Found broken examples in automatic/CONTACT_MATRIX_PROPER.tmp
Found broken examples in automatic/COORDINATIONNUMBER.tmp
Found broken examples in automatic/DFSCLUSTERING.tmp
Found broken examples in automatic/DISTANCE_FROM_CONTOUR.tmp
Found broken examples in automatic/EDS.tmp
Found broken examples in automatic/EMMI.tmp
Found broken examples in automatic/ENVIRONMENTSIMILARITY.tmp
Found broken examples in automatic/FIND_CONTOUR.tmp
Found broken examples in automatic/FIND_CONTOUR_SURFACE.tmp
Found broken examples in automatic/FIND_SPHERICAL_CONTOUR.tmp
Found broken examples in automatic/FOURIER_TRANSFORM.tmp
Found broken examples in automatic/FUNCPATHGENERAL.tmp
Found broken examples in automatic/FUNCPATHMSD.tmp
Found broken examples in automatic/FUNNEL.tmp
Found broken examples in automatic/FUNNEL_PS.tmp
Found broken examples in automatic/GHBFIX.tmp
Found broken examples in automatic/GPROPERTYMAP.tmp
Found broken examples in automatic/HBOND_MATRIX.tmp
Found broken examples in automatic/INCLUDE.tmp
Found broken examples in automatic/INCYLINDER.tmp
Found broken examples in automatic/INENVELOPE.tmp
Found broken examples in automatic/INTERPOLATE_GRID.tmp
Found broken examples in automatic/LOCAL_AVERAGE.tmp
Found broken examples in automatic/MAZE_OPTIMIZER_BIAS.tmp
Found broken examples in automatic/MAZE_RANDOM_ACCELERATION_MD.tmp
Found broken examples in automatic/MAZE_SIMULATED_ANNEALING.tmp
Found broken examples in automatic/MAZE_STEERED_MD.tmp
Found broken examples in automatic/METATENSOR.tmp
Found broken examples in automatic/MULTICOLVARDENS.tmp
Found broken examples in automatic/OUTPUT_CLUSTER.tmp
Found broken examples in automatic/PAMM.tmp
Found broken examples in automatic/PCA.tmp
Found broken examples in automatic/PCAVARS.tmp
Found broken examples in automatic/PIV.tmp
Found broken examples in automatic/PLUMED.tmp
Found broken examples in automatic/PYCVINTERFACE.tmp
Found broken examples in automatic/PYTHONFUNCTION.tmp
Found broken examples in automatic/Q3.tmp
Found broken examples in automatic/Q4.tmp
Found broken examples in automatic/Q6.tmp
Found broken examples in automatic/QUATERNION.tmp
Found broken examples in automatic/SIZESHAPE_POSITION_LINEAR_PROJ.tmp
Found broken examples in automatic/SIZESHAPE_POSITION_MAHA_DIST.tmp
Found broken examples in automatic/SPRINT.tmp
Found broken examples in automatic/TETRAHEDRALPORE.tmp
Found broken examples in automatic/TORSIONS.tmp
Found broken examples in automatic/WHAM_WEIGHTS.tmp
Found broken examples in AnalysisPP.md
Found broken examples in CollectiveVariablesPP.md
Found broken examples in MiscelaneousPP.md

Please sign in to comment.