diff --git a/src/GPU/CalculateEnergyCUDAKernel.cu b/src/GPU/CalculateEnergyCUDAKernel.cu index 02b6f261b..a64d631fc 100644 --- a/src/GPU/CalculateEnergyCUDAKernel.cu +++ b/src/GPU/CalculateEnergyCUDAKernel.cu @@ -38,7 +38,6 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, int numberOfCells = neighborList.size(); int *gpu_particleKind, *gpu_particleMol; int *gpu_neighborList, *gpu_cellStartIndex; - double *gpu_REn, *gpu_LJEn; // Run the kernel int threadsPerBlock = THREADS_PER_BLOCK; @@ -57,10 +56,7 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, CUMALLOC((void **)&gpu_cellStartIndex, cellStartIndex.size() * sizeof(int)); CUMALLOC((void **)&gpu_particleKind, particleKind.size() * sizeof(int)); CUMALLOC((void **)&gpu_particleMol, particleMol.size() * sizeof(int)); - CUMALLOC((void **)&gpu_LJEn, energyVectorLen * sizeof(double)); - if (electrostatic) { - CUMALLOC((void **)&gpu_REn, energyVectorLen * sizeof(double)); - } + UpdateEnergyVecs(vars, energyVectorLen, electrostatic); // Copy necessary data to GPU cudaMemcpy(gpu_neighborList, &neighborlist1D[0], @@ -92,8 +88,8 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, BoxInterGPU<<>>( gpu_cellStartIndex, vars->gpu_cellVector, gpu_neighborList, numberOfCells, vars->gpu_x, vars->gpu_y, vars->gpu_z, axis, halfAx, electrostatic, - vars->gpu_particleCharge, gpu_particleKind, gpu_particleMol, gpu_REn, - gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, + vars->gpu_particleCharge, gpu_particleKind, gpu_particleMol, vars->gpu_REn, + vars->gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, @@ -111,17 +107,17 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, void *d_temp_storage = NULL; size_t temp_storage_bytes = 0; // LJ ReduceSum - DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, gpu_LJEn, + DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, vars->gpu_LJEn, vars->gpu_finalVal, energyVectorLen); CubDebugExit(CUMALLOC(&d_temp_storage, temp_storage_bytes)); - DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, gpu_LJEn, + DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, vars->gpu_LJEn, vars->gpu_finalVal, energyVectorLen); // Copy back the result to CPU ! :) CubDebugExit(cudaMemcpy(&LJEn, vars->gpu_finalVal, sizeof(double), cudaMemcpyDeviceToHost)); if (electrostatic) { // Real Term ReduceSum - DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, gpu_REn, + DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, vars->gpu_REn, vars->gpu_finalVal, energyVectorLen); // Copy back the result to CPU ! :) CubDebugExit(cudaMemcpy(&REn, vars->gpu_finalVal, sizeof(double), @@ -133,10 +129,6 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, CUFREE(d_temp_storage); CUFREE(gpu_particleKind); CUFREE(gpu_particleMol); - CUFREE(gpu_LJEn); - if (electrostatic) { - CUFREE(gpu_REn); - } CUFREE(gpu_neighborList); CUFREE(gpu_cellStartIndex); } diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cu b/src/GPU/ConstantDefinitionsCUDAKernel.cu index ea9c9d203..940cda819 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cu +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cu @@ -305,6 +305,24 @@ void UpdateInvCellBasisCUDA(VariablesCUDA *vars, uint box, #endif } +void UpdateEnergyVecs(VariablesCUDA *vars, int newVecLen, bool electrostatic) { + // If we haven't exceeded the previous maximum size, we can reuse the storage + if (vars->gpu_energyVecLen >= newVecLen) return; + + // Free the current allocations if this isn't the first allocation + if (vars->gpu_energyVecLen > 0) { + CUFREE(vars->gpu_LJEn); + if (electrostatic) { + CUFREE(vars->gpu_REn); + } + } + vars->gpu_energyVecLen = newVecLen; + CUMALLOC((void **)&vars->gpu_LJEn, vars->gpu_energyVecLen * sizeof(double)); + if (electrostatic) { + CUMALLOC((void **)&vars->gpu_REn, vars->gpu_energyVecLen * sizeof(double)); + } +} + void DestroyEwaldCUDAVars(VariablesCUDA *vars) { for (uint b = 0; b < BOX_TOTAL; b++) { CUFREE(vars->gpu_kx[b]); @@ -375,6 +393,8 @@ void DestroyCUDAVars(VariablesCUDA *vars) { CUFREE(vars->gpu_comx); CUFREE(vars->gpu_comy); CUFREE(vars->gpu_comz); + CUFREE(vars->gpu_LJEn); + CUFREE(vars->gpu_REn); CUFREE(vars->gpu_r_k_x); CUFREE(vars->gpu_r_k_y); CUFREE(vars->gpu_r_k_z); @@ -410,7 +430,7 @@ void DestroyCUDAVars(VariablesCUDA *vars) { CUFREE(vars->gpu_Invcell_z[b]); } - // delete GPU memory for lambda variables + // free GPU memory for lambda variables CUFREE(vars->gpu_molIndex); CUFREE(vars->gpu_lambdaVDW); CUFREE(vars->gpu_lambdaCoulomb); diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 18e8e7947..5209b9be2 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -42,6 +42,7 @@ void UpdateCellBasisCUDA(VariablesCUDA *vars, uint box, double *cellBasis_x, void UpdateInvCellBasisCUDA(VariablesCUDA *vars, uint box, double *invCellBasis_x, double *invCellBasis_y, double *invCellBasis_z); +void UpdateEnergyVecs(VariablesCUDA *vars, int newVecLen, bool electrostatic); void DestroyEwaldCUDAVars(VariablesCUDA *vars); void DestroyExp6CUDAVars(VariablesCUDA *vars); void DestroyCUDAVars(VariablesCUDA *vars); diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh index e14a2893b..2f12f53d1 100644 --- a/src/GPU/VariablesCUDA.cuh +++ b/src/GPU/VariablesCUDA.cuh @@ -86,8 +86,11 @@ public: gpu_mForcey = nullptr; gpu_mForcez = nullptr; gpu_startAtomIdx = nullptr; + gpu_energyVecLen = 0; + gpu_LJEn = nullptr; + gpu_REn = nullptr; - // setting lambda valuesy to null + // setting lambda values to nullptr gpu_molIndex = nullptr; gpu_lambdaVDW = nullptr; gpu_lambdaCoulomb = nullptr; @@ -121,6 +124,8 @@ public: double **gpu_hsqr, **gpu_hsqrRef; double *gpu_recipEnergies; double *gpu_comx, *gpu_comy, *gpu_comz; + int gpu_energyVecLen; + double *gpu_LJEn, *gpu_REn; double *gpu_rT11, *gpu_rT12, *gpu_rT13; double *gpu_rT22, *gpu_rT23, *gpu_rT33; double *gpu_vT11, *gpu_vT12, *gpu_vT13;