diff --git a/src/Ewald.cpp b/src/Ewald.cpp index e5abe5ef0..9f7b9e6f3 100644 --- a/src/Ewald.cpp +++ b/src/Ewald.cpp @@ -221,7 +221,7 @@ void Ewald::BoxReciprocalSetup(uint box, XYZArray const &molCoords) { } CallBoxReciprocalSetupGPU( ff.particles->getCUDAVars(), thisBoxCoords, kx[box], ky[box], kz[box], - chargeBox, imageSize[box], sumRnew[box], sumInew[box], prefact[box], + chargeBox, imageSize[box], prefact[box], hsqr[box], currentEnergyRecip[box], box); #else #ifdef _OPENMP @@ -308,8 +308,8 @@ void Ewald::BoxReciprocalSums(uint box, XYZArray const &molCoords) { thisMol++; } CallBoxReciprocalSumsGPU(ff.particles->getCUDAVars(), thisBoxCoords, - chargeBox, imageSizeRef[box], sumRnew[box], - sumInew[box], currentEnergyRecip[box], box); + chargeBox, imageSizeRef[box], + currentEnergyRecip[box], box); #else #ifdef _OPENMP #pragma omp parallel sections default(none) shared(box) @@ -421,14 +421,26 @@ double Ewald::MolReciprocal(XYZArray const &molCoords, const uint molIndex, double lambdaCoef = GetLambdaCoef(molIndex, box); #ifdef GOMC_CUDA XYZArray cCoords(length); - std::vector MolCharge; - for (uint p = 0; p < length; p++) { + std::vector molCharge; + int charges = 0; + for (uint p = 0; p < length; ++p) { cCoords.Set(p, currentCoords[startAtom + p]); - MolCharge.push_back(thisKind.AtomCharge(p) * lambdaCoef); + molCharge.push_back(thisKind.AtomCharge(p) * lambdaCoef); + if (thisKind.AtomCharge(p) != 0.0) charges++; + } + + // If there are no charged particles, the energy doesn't change, but we need + // to copy the sumRref and sumIref arrays to the sumRnew and sumInew arrays + // in case the move is accepted + if (charges == 0) { + CopyRefToNewCUDA(ff.particles->getCUDAVars(), box, imageSizeRef[box]); + energyRecipNew = sysPotRef.boxEnergy[box].recip; + } + else { + CallMolReciprocalGPU(ff.particles->getCUDAVars(), cCoords, molCoords, + molCharge, imageSizeRef[box], + energyRecipNew, box); } - CallMolReciprocalGPU(ff.particles->getCUDAVars(), cCoords, molCoords, - MolCharge, imageSizeRef[box], sumRnew[box], - sumInew[box], energyRecipNew, box); #else #ifdef _OPENMP #pragma omp parallel for default(none) shared(lambdaCoef, molCoords, \ @@ -509,8 +521,7 @@ double Ewald::SwapDestRecip(const cbmc::TrialMol &newMol, const uint box, } else { CallSwapReciprocalGPU(ff.particles->getCUDAVars(), molCoords, molCharges, - imageSizeRef[box], sumRnew[box], sumInew[box], - sumRref[box], sumIref[box], insert, energyRecipNew, box); + imageSizeRef[box], insert, energyRecipNew, box); } #else uint startAtom = mols.MolStart(molIndex); @@ -568,7 +579,7 @@ double Ewald::ChangeLambdaRecip(XYZArray const &molCoords, } CallChangeLambdaMolReciprocalGPU( ff.particles->getCUDAVars(), molCoords, MolCharge, imageSizeRef[box], - sumRnew[box], sumInew[box], energyRecipNew, lambdaCoef, box); + energyRecipNew, lambdaCoef, box); #else #ifdef _OPENMP #pragma omp parallel for default(none) shared(lambdaCoef, molCoords, thisKind) \ @@ -704,8 +715,7 @@ double Ewald::SwapSourceRecip(const cbmc::TrialMol &oldMol, const uint box, } else { CallSwapReciprocalGPU(ff.particles->getCUDAVars(), molCoords, molCharges, - imageSizeRef[box], sumRnew[box], sumInew[box], - sumRref[box], sumIref[box], insert, energyRecipNew, box); + imageSizeRef[box], insert, energyRecipNew, box); } #else uint startAtom = mols.MolStart(molIndex); @@ -755,50 +765,50 @@ double Ewald::MolExchangeReciprocal(const std::vector &newMol, if (box < BOXES_WITH_U_NB) { GOMC_EVENT_START(1, GomcProfileEvent::RECIP_MEMC_ENERGY); - -#ifdef GOMC_CUDA - //Build a vector of only the charged particles in the new and old molecules - std::vector chargeBox; MoleculeKind const& thisKindNew = newMol[0].GetKind(); - uint lengthNew = thisKindNew.NumAtoms() * newMol.size(); MoleculeKind const& thisKindOld = oldMol[0].GetKind(); - uint lengthOld = thisKindOld.NumAtoms() * oldMol.size(); - // The maximum size of this array is all charged particles - XYZArray molCoords = XYZArray(lengthNew + lengthOld); + uint lengthNew = thisKindNew.NumAtoms(); + uint lengthOld = thisKindOld.NumAtoms(); + +#ifdef GOMC_CUDA + //Build a vector of only the charged particles in the new and old molecules + std::vector particleCharge; + // The maximum size of this array is all particles have charges + XYZArray molCoords = XYZArray(lengthNew + lengthOld); - int numChargedParticles = 0; + int numChargedParticles = 0; for (uint m = 0; m < newMol.size(); ++m) { uint moleculeIndex = molIndexNew[m]; double lambdaCoef = GetLambdaCoef(moleculeIndex, box); - XYZArray currMolCoords = newMol[m].GetCoords(); + XYZArray currMolCoords = newMol[m].GetCoords(); for (uint p = 0; p < lengthNew; ++p) { unsigned long currentAtom = mols.MolStart(moleculeIndex) + p; if (!particleHasNoCharge[currentAtom]) { - molCoords.Set(numChargedParticles, + molCoords.Set(numChargedParticles, currMolCoords.x[p], currMolCoords.y[p], currMolCoords.z[p]); numChargedParticles++; - chargeBox.push_back(thisKindNew.AtomCharge(p) * lambdaCoef); + particleCharge.push_back(thisKindNew.AtomCharge(p) * lambdaCoef); } - } + } } for (uint m = 0; m < oldMol.size(); ++m) { uint moleculeIndex = molIndexOld[m]; double lambdaCoef = GetLambdaCoef(moleculeIndex, box); - XYZArray currMolCoords = oldMol[m].GetCoords(); + XYZArray currMolCoords = oldMol[m].GetCoords(); for (uint p = 0; p < lengthOld; ++p) { unsigned long currentAtom = mols.MolStart(moleculeIndex) + p; if (!particleHasNoCharge[currentAtom]) { - molCoords.Set(numChargedParticles, + molCoords.Set(numChargedParticles, currMolCoords.x[p], currMolCoords.y[p], - currMolCoords.z[p]); + currMolCoords.z[p]); numChargedParticles++; // Invert these charges since we subtract them in the energy calc - chargeBox.push_back(thisKindOld.AtomCharge(p) * -lambdaCoef); + particleCharge.push_back(thisKindOld.AtomCharge(p) * -lambdaCoef); } } } @@ -817,18 +827,13 @@ double Ewald::MolExchangeReciprocal(const std::vector &newMol, } else { CallMolExchangeReciprocalGPU(ff.particles->getCUDAVars(), - imageSizeRef[box], - sumRnew[box], sumInew[box], box, - chargeBox, + imageSizeRef[box], box, + particleCharge, numChargedParticles, energyRecipNew, molCoords); } #else - MoleculeKind const& thisKindNew = newMol[0].GetKind(); - MoleculeKind const& thisKindOld = oldMol[0].GetKind(); - uint lengthNew = thisKindNew.NumAtoms(); - uint lengthOld = thisKindOld.NumAtoms(); #ifdef _OPENMP #pragma omp parallel for default(none) shared(box, first_call, lengthNew, lengthOld, \ newMol, oldMol, thisKindNew, thisKindOld, molIndexNew, molIndexOld) \ diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 8e9fb0616..11d012d5d 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -33,11 +33,10 @@ void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords, double const *kx, double const *ky, double const *kz, const std::vector &particleCharge, - uint imageSize, double *sumRnew, double *sumInew, - double *prefact, double *hsqr, + uint imageSize, double *prefact, double *hsqr, double &energyRecip, uint box) { double *gpu_particleCharge; - int atomNumber = coords.Count(); + int atomNumber = particleCharge.size(); CUMALLOC((void **)&gpu_particleCharge, particleCharge.size() * sizeof(double)); @@ -101,10 +100,9 @@ void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords, // with the current volume. void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, double *sumRnew, double *sumInew, - double &energyRecip, uint box) { + uint imageSize, double &energyRecip, uint box) { double *gpu_particleCharge; - int atomNumber = coords.Count(); + int atomNumber = particleCharge.size(); CUMALLOC((void **)&gpu_particleCharge, particleCharge.size() * sizeof(double)); @@ -211,11 +209,10 @@ __global__ void BoxReciprocalGPU(double *gpu_prefact, double *gpu_sumRnew, void CallMolReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, XYZArray const &newCoords, const std::vector &particleCharge, - uint imageSize, double *sumRnew, double *sumInew, - double &energyRecipNew, uint box) { - // Calculate atom number - int atomNumber = currentCoords.Count(); - int newCoordsNumber = newCoords.Count(); + uint imageSize, double &energyRecipNew, uint box) { + // Calculate atom number -- exclude uncharged particles + int atomNumber = particleCharge.size(); + int newCoordsNumber = particleCharge.size(); double *gpu_particleCharge; int blocksPerGrid, threadsPerBlock; @@ -266,11 +263,10 @@ void CallMolReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, void CallChangeLambdaMolReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, double *sumRnew, - double *sumInew, double &energyRecipNew, + uint imageSize, double &energyRecipNew, const double lambdaCoef, uint box) { - // Calculate atom number - int atomNumber = coords.Count(); + // Calculate atom number -- exclude uncharged particles + int atomNumber = particleCharge.size(); double *gpu_particleCharge; int blocksPerGrid, threadsPerBlock; @@ -311,15 +307,13 @@ void CallChangeLambdaMolReciprocalGPU(VariablesCUDA *vars, void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, double *sumRnew, double *sumInew, - double *sumRref, double *sumIref, const bool insert, - double &energyRecipNew, uint box) { - - // Calculate atom number - int atomNumber = coords.Count(); + uint imageSize, const bool insert, + double &energyRecipNew, uint box) +{ + // Calculate atom number -- exclude uncharged particles + int atomNumber = particleCharge.size(); // given coordinates double *gpu_particleCharge; - int blocksPerGrid, threadsPerBlock; CUMALLOC((void **)&gpu_particleCharge, particleCharge.size() * sizeof(double)); @@ -334,10 +328,8 @@ void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, cudaMemcpy(vars->gpu_z, coords.z, atomNumber * sizeof(double), cudaMemcpyHostToDevice); - threadsPerBlock = THREADS_PER_BLOCK; - blocksPerGrid = (imageSize + threadsPerBlock - 1) / threadsPerBlock; - // NewSwapReciprocalGPU<<>>( - // vars, atomNumber, box, gpu_particleCharge, insert, imageSize); + int threadsPerBlock = THREADS_PER_BLOCK; + int blocksPerGrid = (imageSize + threadsPerBlock - 1) / threadsPerBlock; SwapReciprocalGPU<<>>( vars->gpu_x, vars->gpu_y, vars->gpu_z, vars->gpu_kxRef[box], vars->gpu_kyRef[box], vars->gpu_kzRef[box], particleCharge.size(), @@ -360,44 +352,45 @@ void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, - double *sumRnew, - double *sumInew, - uint box, - std::vector chargeBox, + uint box, + std::vector particleCharge, int numChargedParticles, double &energyRecipNew, XYZArray molCoords) { - double *gpu_chargeBox; + // Calculate atom number -- exclude uncharged particles + int atomNumber = particleCharge.size(); + + double *gpu_particleCharge; double *gpu_MolX; double *gpu_MolY; double *gpu_MolZ; - CUMALLOC((void**) &gpu_chargeBox, chargeBox.size() * sizeof(double)); - CUMALLOC((void**) &gpu_MolX, molCoords.Count() * sizeof(double)); - CUMALLOC((void**) &gpu_MolY, molCoords.Count() * sizeof(double)); - CUMALLOC((void**) &gpu_MolZ, molCoords.Count() * sizeof(double)); + CUMALLOC((void**) &gpu_particleCharge, particleCharge.size() * sizeof(double)); + CUMALLOC((void**) &gpu_MolX, atomNumber * sizeof(double)); + CUMALLOC((void**) &gpu_MolY, atomNumber * sizeof(double)); + CUMALLOC((void**) &gpu_MolZ, atomNumber * sizeof(double)); - cudaMemcpy(gpu_chargeBox, &chargeBox[0], chargeBox.size() * sizeof(double), + cudaMemcpy(gpu_particleCharge, &particleCharge[0], particleCharge.size() * sizeof(double), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_MolX, &molCoords.x[0], molCoords.Count() * sizeof(double), + cudaMemcpy(gpu_MolX, &molCoords.x[0], atomNumber * sizeof(double), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_MolY, &molCoords.y[0], molCoords.Count() * sizeof(double), + cudaMemcpy(gpu_MolY, &molCoords.y[0], atomNumber * sizeof(double), cudaMemcpyHostToDevice); - cudaMemcpy(gpu_MolZ, &molCoords.z[0], molCoords.Count() * sizeof(double), + cudaMemcpy(gpu_MolZ, &molCoords.z[0], atomNumber * sizeof(double), cudaMemcpyHostToDevice); int threadsPerBlock = THREADS_PER_BLOCK; int blocksPerGrid = (imageSize + threadsPerBlock - 1)/threadsPerBlock; MolExchangeReciprocalGPU<<< blocksPerGrid, threadsPerBlock>>>( imageSize, - vars->gpu_kxRef[box], + vars->gpu_kxRef[box], vars->gpu_kyRef[box], vars->gpu_kzRef[box], vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box], - gpu_chargeBox, + gpu_particleCharge, numChargedParticles, gpu_MolX, gpu_MolY, @@ -413,8 +406,8 @@ void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, vars->gpu_recipEnergies, vars->gpu_finalVal, imageSize); cudaMemcpy(&energyRecipNew, vars->gpu_finalVal, sizeof(double), cudaMemcpyDeviceToHost); - CUFREE(gpu_chargeBox); - CUFREE(gpu_MolX); + CUFREE(gpu_particleCharge); + CUFREE(gpu_MolX); CUFREE(gpu_MolY); CUFREE(gpu_MolZ); } @@ -629,41 +622,6 @@ __global__ void BoxForceReciprocalGPU( atomicAdd(&gpu_mForceRecz[moleculeID], forceZ); } -__global__ void NewSwapReciprocalGPU(VariablesCUDA *vars, int atomNumber, uint box, - double *gpu_particleCharge, const bool insert, - int imageSize) { - - int threadID = blockIdx.x * blockDim.x + threadIdx.x; - if (threadID >= imageSize) return; - - double sumReal = 0.0, sumImaginary = 0.0; - - #pragma unroll 4 - for (int p=0; p < atomNumber; ++p) { - double dotProduct = - DotProductGPU((vars->gpu_kx[box])[threadID], (vars->gpu_ky[box])[threadID], (vars->gpu_kz[box])[threadID], - vars->gpu_x[p], vars->gpu_y[p], vars->gpu_z[p]); - double dotsin, dotcos; - sincos(dotProduct, &dotsin, &dotcos); - sumReal += (gpu_particleCharge[p] * dotcos); - sumImaginary += (gpu_particleCharge[p] * dotsin); - } - - // If we insert the molecule into the box, we add the sum value. - // Otherwise, we subtract the sum value - if (insert) { - (vars->gpu_sumRnew[box])[threadID] = (vars->gpu_sumRref[box])[threadID] + sumReal; - (vars->gpu_sumInew[box])[threadID] = (vars->gpu_sumIref[box])[threadID] + sumImaginary; - } else { - (vars->gpu_sumRnew[box])[threadID] = (vars->gpu_sumRref[box])[threadID] - sumReal; - (vars->gpu_sumInew[box])[threadID] = (vars->gpu_sumIref[box])[threadID] - sumImaginary; - } - - vars->gpu_recipEnergies[threadID] = - (((vars->gpu_sumRnew[box])[threadID] * (vars->gpu_sumRnew[box])[threadID] + - (vars->gpu_sumInew[box])[threadID] * (vars->gpu_sumInew[box])[threadID]) * - (vars->gpu_prefactRef[box])[threadID]); -} __global__ void SwapReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_kx, double *gpu_ky, @@ -709,14 +667,14 @@ __global__ void SwapReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, __global__ void MolExchangeReciprocalGPU( int imageSize, - double *gpu_kx, - double *gpu_ky, + double *gpu_kx, + double *gpu_ky, double *gpu_kz, double *gpu_prefactRef, double *gpu_sumRnew, double *gpu_sumInew, - double *gpu_chargeBox, - int numChargedParticles, + double *gpu_particleCharge, + int numChargedParticles, double *gpu_MolX, double *gpu_MolY, double *gpu_MolZ, @@ -736,8 +694,8 @@ __global__ void MolExchangeReciprocalGPU( gpu_MolZ[p]); double dotsin, dotcos; sincos(dotProduct, &dotsin, &dotcos); - sumRnew += gpu_chargeBox[p] * dotcos; - sumInew += gpu_chargeBox[p] * dotsin; + sumRnew += gpu_particleCharge[p] * dotcos; + sumInew += gpu_particleCharge[p] * dotsin; } gpu_sumRnew[imageID] = sumRnew; @@ -788,6 +746,7 @@ __global__ void MolReciprocalGPU(double *gpu_cx, double *gpu_cy, double *gpu_cz, gpu_prefactRef[threadID]); } + __global__ void ChangeLambdaMolReciprocalGPU( double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_kx, double *gpu_ky, double *gpu_kz, int atomNumber, double *gpu_particleCharge, diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index 695573818..2e2732125 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -38,8 +38,6 @@ void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, double const *kz, const std::vector &particleCharge, uint imageSize, - double *sumRnew, - double *sumInew, double *prefact, double *hsqr, double &energyRecip, @@ -49,8 +47,6 @@ void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, uint imageSize, - double *sumRnew, - double *sumInew, double &energyRecip, uint box); @@ -59,8 +55,6 @@ void CallMolReciprocalGPU(VariablesCUDA *vars, XYZArray const &newCoords, const std::vector &particleCharge, uint imageSize, - double *sumRnew, - double *sumInew, double &energyRecip, uint box); @@ -69,8 +63,6 @@ void CallChangeLambdaMolReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, uint imageSize, - double *sumRnew, - double *sumInew, double &energyRecip, const double lambdaCoef, uint box); @@ -79,20 +71,14 @@ void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, uint imageSize, - double *sumRnew, - double *sumInew, - double *sumRref, - double *sumIref, const bool insert, double &energyRecip, uint box); void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, - double *sumRnew, - double *sumInew, - uint box, - std::vector chargeBox, + uint box, + std::vector particleCharge, int numChargedParticles, double &energyRecipNew, XYZArray molCoords); @@ -197,21 +183,13 @@ __global__ void MolExchangeReciprocalGPU( double *gpu_prefactRef, double *gpu_sumRnew, double *gpu_sumInew, - double *gpu_chargeBox, + double *gpu_particleCharge, int numChargedParticles, double *gpu_MolX, double *gpu_MolY, double *gpu_MolZ, double *gpu_RecipEnergies); -__global__ void NewSwapReciprocalGPU(VariablesCUDA *vars, - int atomNumber, - uint box, - double *gpu_particleCharge, - const bool insert, - double *gpu_RecipEnergies, - int imageSize); - __global__ void BoxReciprocalGPU(double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew,