Skip to content

Commit

Permalink
Don't reallocate LJEn and REn in BoxInterGPU if the previous size is …
Browse files Browse the repository at this point in the history
…big enough
  • Loading branch information
LSchwiebert committed Jul 25, 2024
1 parent 44cedc1 commit 09ef528
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 16 deletions.
20 changes: 6 additions & 14 deletions src/GPU/CalculateEnergyCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
int numberOfCells = neighborList.size();
int *gpu_particleKind, *gpu_particleMol;
int *gpu_neighborList, *gpu_cellStartIndex;
double *gpu_REn, *gpu_LJEn;

// Run the kernel
int threadsPerBlock = THREADS_PER_BLOCK;
Expand All @@ -57,10 +56,7 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
CUMALLOC((void **)&gpu_cellStartIndex, cellStartIndex.size() * sizeof(int));
CUMALLOC((void **)&gpu_particleKind, particleKind.size() * sizeof(int));
CUMALLOC((void **)&gpu_particleMol, particleMol.size() * sizeof(int));
CUMALLOC((void **)&gpu_LJEn, energyVectorLen * sizeof(double));
if (electrostatic) {
CUMALLOC((void **)&gpu_REn, energyVectorLen * sizeof(double));
}
UpdateEnergyVecs(vars, energyVectorLen, electrostatic);

// Copy necessary data to GPU
cudaMemcpy(gpu_neighborList, &neighborlist1D[0],
Expand Down Expand Up @@ -92,8 +88,8 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
BoxInterGPU<<<blocksPerGrid, threadsPerBlock>>>(
gpu_cellStartIndex, vars->gpu_cellVector, gpu_neighborList, numberOfCells,
vars->gpu_x, vars->gpu_y, vars->gpu_z, axis, halfAx, electrostatic,
vars->gpu_particleCharge, gpu_particleKind, gpu_particleMol, gpu_REn,
gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n,
vars->gpu_particleCharge, gpu_particleKind, gpu_particleMol, vars->gpu_REn,
vars->gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n,
vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut,
vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha,
vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth,
Expand All @@ -111,17 +107,17 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
void *d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
// LJ ReduceSum
DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, gpu_LJEn,
DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, vars->gpu_LJEn,
vars->gpu_finalVal, energyVectorLen);
CubDebugExit(CUMALLOC(&d_temp_storage, temp_storage_bytes));
DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, gpu_LJEn,
DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, vars->gpu_LJEn,
vars->gpu_finalVal, energyVectorLen);
// Copy back the result to CPU ! :)
CubDebugExit(cudaMemcpy(&LJEn, vars->gpu_finalVal, sizeof(double),
cudaMemcpyDeviceToHost));
if (electrostatic) {
// Real Term ReduceSum
DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, gpu_REn,
DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, vars->gpu_REn,
vars->gpu_finalVal, energyVectorLen);
// Copy back the result to CPU ! :)
CubDebugExit(cudaMemcpy(&REn, vars->gpu_finalVal, sizeof(double),
Expand All @@ -133,10 +129,6 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
CUFREE(d_temp_storage);
CUFREE(gpu_particleKind);
CUFREE(gpu_particleMol);
CUFREE(gpu_LJEn);
if (electrostatic) {
CUFREE(gpu_REn);
}
CUFREE(gpu_neighborList);
CUFREE(gpu_cellStartIndex);
}
Expand Down
22 changes: 21 additions & 1 deletion src/GPU/ConstantDefinitionsCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,24 @@ void UpdateInvCellBasisCUDA(VariablesCUDA *vars, uint box,
#endif
}

void UpdateEnergyVecs(VariablesCUDA *vars, int newVecLen, bool electrostatic) {
// If we haven't exceeded the previous maximum size, we can reuse the storage
if (vars->gpu_energyVecLen >= newVecLen) return;

// Free the current allocations if this isn't the first allocation
if (vars->gpu_energyVecLen > 0) {
CUFREE(vars->gpu_LJEn);
if (electrostatic) {
CUFREE(vars->gpu_REn);
}
}
vars->gpu_energyVecLen = newVecLen;
CUMALLOC((void **)&vars->gpu_LJEn, vars->gpu_energyVecLen * sizeof(double));
if (electrostatic) {
CUMALLOC((void **)&vars->gpu_REn, vars->gpu_energyVecLen * sizeof(double));
}
}

void DestroyEwaldCUDAVars(VariablesCUDA *vars) {
for (uint b = 0; b < BOX_TOTAL; b++) {
CUFREE(vars->gpu_kx[b]);
Expand Down Expand Up @@ -375,6 +393,8 @@ void DestroyCUDAVars(VariablesCUDA *vars) {
CUFREE(vars->gpu_comx);
CUFREE(vars->gpu_comy);
CUFREE(vars->gpu_comz);
CUFREE(vars->gpu_LJEn);
CUFREE(vars->gpu_REn);
CUFREE(vars->gpu_r_k_x);
CUFREE(vars->gpu_r_k_y);
CUFREE(vars->gpu_r_k_z);
Expand Down Expand Up @@ -410,7 +430,7 @@ void DestroyCUDAVars(VariablesCUDA *vars) {
CUFREE(vars->gpu_Invcell_z[b]);
}

// delete GPU memory for lambda variables
// free GPU memory for lambda variables
CUFREE(vars->gpu_molIndex);
CUFREE(vars->gpu_lambdaVDW);
CUFREE(vars->gpu_lambdaCoulomb);
Expand Down
1 change: 1 addition & 0 deletions src/GPU/ConstantDefinitionsCUDAKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void UpdateCellBasisCUDA(VariablesCUDA *vars, uint box, double *cellBasis_x,
void UpdateInvCellBasisCUDA(VariablesCUDA *vars, uint box,
double *invCellBasis_x, double *invCellBasis_y,
double *invCellBasis_z);
void UpdateEnergyVecs(VariablesCUDA *vars, int newVecLen, bool electrostatic);
void DestroyEwaldCUDAVars(VariablesCUDA *vars);
void DestroyExp6CUDAVars(VariablesCUDA *vars);
void DestroyCUDAVars(VariablesCUDA *vars);
Expand Down
7 changes: 6 additions & 1 deletion src/GPU/VariablesCUDA.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ public:
gpu_mForcey = nullptr;
gpu_mForcez = nullptr;
gpu_startAtomIdx = nullptr;
gpu_energyVecLen = 0;
gpu_LJEn = nullptr;
gpu_REn = nullptr;

// setting lambda valuesy to null
// setting lambda values to nullptr
gpu_molIndex = nullptr;
gpu_lambdaVDW = nullptr;
gpu_lambdaCoulomb = nullptr;
Expand Down Expand Up @@ -121,6 +124,8 @@ public:
double **gpu_hsqr, **gpu_hsqrRef;
double *gpu_recipEnergies;
double *gpu_comx, *gpu_comy, *gpu_comz;
int gpu_energyVecLen;
double *gpu_LJEn, *gpu_REn;
double *gpu_rT11, *gpu_rT12, *gpu_rT13;
double *gpu_rT22, *gpu_rT23, *gpu_rT33;
double *gpu_vT11, *gpu_vT12, *gpu_vT13;
Expand Down

0 comments on commit 09ef528

Please sign in to comment.