From 9fb370109c7a669ba13db47432f3304d9be032f3 Mon Sep 17 00:00:00 2001 From: Loren Schwiebert Date: Tue, 10 Dec 2024 21:06:38 -0500 Subject: [PATCH] Reformat files using clang format --- src/GPU/CalculateForceCUDAKernel.cu | 65 ++++++++++------- src/GPU/CalculateMinImageCUDAKernel.cuh | 2 +- src/GPU/TransformParticlesCUDAKernel.cu | 88 +++++++++++++----------- src/GPU/TransformParticlesCUDAKernel.cuh | 36 +++++----- src/GPU/VariablesCUDA.cuh | 6 +- 5 files changed, 108 insertions(+), 89 deletions(-) diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index f53f023d0..d72acb284 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -626,17 +626,25 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, } __syncthreads(); - for (int particleIdx = threadIdx.x; particleIdx < shr_particlesInsideCurrentCell; particleIdx += blockDim.x) { + for (int particleIdx = threadIdx.x; + particleIdx < shr_particlesInsideCurrentCell; + particleIdx += blockDim.x) { int particle = gpu_cellVector[shr_currentCellStartIndex + particleIdx]; int mA = gpu_particleMol[particle]; double3 forceComponents = make_double3(0.0, 0.0, 0.0); - for (int neighborCellIdx = 0; neighborCellIdx < NUMBER_OF_NEIGHBOR_CELLS; ++neighborCellIdx) { - int neighborCell = gpu_neighborList[currentCell * NUMBER_OF_NEIGHBOR_CELLS + neighborCellIdx]; + for (int neighborCellIdx = 0; neighborCellIdx < NUMBER_OF_NEIGHBOR_CELLS; + ++neighborCellIdx) { + int neighborCell = + gpu_neighborList[currentCell * NUMBER_OF_NEIGHBOR_CELLS + + neighborCellIdx]; // Calculate number of particles inside neighbor cell int particlesInsideNeighboringCell = - gpu_cellStartIndex[neighborCell + 1] - gpu_cellStartIndex[neighborCell]; - for (int neighborIdx = 0; neighborIdx < particlesInsideNeighboringCell; ++neighborIdx) { - int neighbor = gpu_cellVector[gpu_cellStartIndex[neighborCell] + neighborIdx]; + gpu_cellStartIndex[neighborCell + 1] - + gpu_cellStartIndex[neighborCell]; + for (int neighborIdx = 0; neighborIdx < particlesInsideNeighboringCell; + ++neighborIdx) { + int neighbor = + gpu_cellVector[gpu_cellStartIndex[neighborCell] + neighborIdx]; int mB = gpu_particleMol[neighbor]; // Check to be sure these are different molecules if (mA != mB) { @@ -652,11 +660,13 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, int kA = gpu_particleKind[particle]; int kB = gpu_particleKind[neighbor]; - if (currentCell < neighborCell || (currentCell == neighborCell && particle < neighbor)) { - LJEn += CalcEnGPU( - distSq, kA, kB, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, gpu_VDW_Kind[0], - gpu_isMartini[0], gpu_rCut[0], gpu_rOn[0], gpu_count[0], lambdaVDW, - sc_sigma_6, sc_alpha, sc_power, gpu_rMin, gpu_rMaxSq, gpu_expConst); + if (currentCell < neighborCell || + (currentCell == neighborCell && particle < neighbor)) { + LJEn += CalcEnGPU(distSq, kA, kB, gpu_sigmaSq, gpu_n, + gpu_epsilon_Cn, gpu_VDW_Kind[0], + gpu_isMartini[0], gpu_rCut[0], gpu_rOn[0], + gpu_count[0], lambdaVDW, sc_sigma_6, sc_alpha, + sc_power, gpu_rMin, gpu_rMaxSq, gpu_expConst); } double forces = CalcEnForceGPU( distSq, kA, kB, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, gpu_rCut[0], @@ -665,24 +675,27 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, gpu_expConst); double qi_qj_fact = 0.0; if (electrostatic) { - qi_qj_fact = gpu_particleCharge[particle] * - gpu_particleCharge[neighbor]; + qi_qj_fact = + gpu_particleCharge[particle] * gpu_particleCharge[neighbor]; if (qi_qj_fact != 0.0) { qi_qj_fact *= qqFactGPU; - double lambdaCoulomb = DeviceGetLambdaCoulomb( - mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); - if (currentCell < neighborCell || (currentCell == neighborCell && particle < neighbor)) { - REn += CalcCoulombGPU( - distSq, kA, kB, qi_qj_fact, gpu_rCutLow[0], gpu_ewald[0], - gpu_VDW_Kind[0], gpu_alpha[box], gpu_rCutCoulomb[box], - gpu_isMartini[0], gpu_diElectric_1[0], lambdaCoulomb, sc_coul, - sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, gpu_count[0]); + double lambdaCoulomb = + DeviceGetLambdaCoulomb(mA, mB, box, gpu_isFraction, + gpu_molIndex, gpu_lambdaCoulomb); + if (currentCell < neighborCell || + (currentCell == neighborCell && particle < neighbor)) { + REn += CalcCoulombGPU( + distSq, kA, kB, qi_qj_fact, gpu_rCutLow[0], gpu_ewald[0], + gpu_VDW_Kind[0], gpu_alpha[box], gpu_rCutCoulomb[box], + gpu_isMartini[0], gpu_diElectric_1[0], lambdaCoulomb, + sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, + gpu_count[0]); } forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], gpu_alpha[box], gpu_rCutCoulomb[box], - gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, - sc_power, lambdaCoulomb, gpu_count[0], kA, kB); + gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, + sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); } } forceComponents.x += forces * virComponents.x; @@ -880,8 +893,10 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, } } -__device__ double CalcCoulombVirParticleGPU(const double distSq, const double qi_qj, - const int gpu_ewald, const double gpu_alpha) { +__device__ double CalcCoulombVirParticleGPU(const double distSq, + const double qi_qj, + const int gpu_ewald, + const double gpu_alpha) { const double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) diff --git a/src/GPU/CalculateMinImageCUDAKernel.cuh b/src/GPU/CalculateMinImageCUDAKernel.cuh index de9670f15..f502cdbf6 100644 --- a/src/GPU/CalculateMinImageCUDAKernel.cuh +++ b/src/GPU/CalculateMinImageCUDAKernel.cuh @@ -70,7 +70,7 @@ WrapPBCNonOrth3(double3 &v, const double3 &ax, const double *gpu_cell_x, __device__ inline void UnwrapPBC(double &v, const double &ref, const double &ax, const double &halfax) { - //Per CUDA documention, use of std namespace math functions is not supported + // Per CUDA documention, use of std namespace math functions is not supported if (fabs(ref - v) > halfax) { if (ref < halfax) v -= ax; diff --git a/src/GPU/TransformParticlesCUDAKernel.cu b/src/GPU/TransformParticlesCUDAKernel.cu index 5be8bc5d2..daeeb2ee2 100644 --- a/src/GPU/TransformParticlesCUDAKernel.cu +++ b/src/GPU/TransformParticlesCUDAKernel.cu @@ -350,14 +350,14 @@ void CallRotateParticlesGPU( cudaMemcpyHostToDevice); RotateParticlesKernel<<>>( - r_max, vars->gpu_mTorquex, vars->gpu_mTorquey, - vars->gpu_mTorquez, vars->gpu_inForceRange, step, key, seed, vars->gpu_x, - vars->gpu_y, vars->gpu_z, vars->gpu_particleMol, atomCount, xAxes, yAxes, - zAxes, vars->gpu_comx, vars->gpu_comy, vars->gpu_comz, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], - vars->gpu_Invcell_z[box], vars->gpu_nonOrth, lambdaBETA, vars->gpu_r_k_x, - vars->gpu_r_k_y, vars->gpu_r_k_z, gpu_isMoleculeInvolved); + r_max, vars->gpu_mTorquex, vars->gpu_mTorquey, vars->gpu_mTorquez, + vars->gpu_inForceRange, step, key, seed, vars->gpu_x, vars->gpu_y, + vars->gpu_z, vars->gpu_particleMol, atomCount, xAxes, yAxes, zAxes, + vars->gpu_comx, vars->gpu_comy, vars->gpu_comz, vars->gpu_cell_x[box], + vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], + vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, + lambdaBETA, vars->gpu_r_k_x, vars->gpu_r_k_y, vars->gpu_r_k_z, + gpu_isMoleculeInvolved); #ifndef NDEBUG cudaDeviceSynchronize(); checkLastErrorCUDA(__FILE__, __LINE__); @@ -384,15 +384,15 @@ void CallRotateParticlesGPU( } __global__ void TranslateParticlesKernel( - double t_max, double *molForcex, - double *molForcey, double *molForcez, int *gpu_inForceRange, ulong step, - unsigned int key, ulong seed, double *gpu_x, double *gpu_y, double *gpu_z, - int *gpu_particleMol, int atomCount, double xAxes, double yAxes, - double zAxes, double *gpu_comx, double *gpu_comy, double *gpu_comz, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - int *gpu_nonOrth, double lambdaBETA, double *gpu_t_k_x, double *gpu_t_k_y, - double *gpu_t_k_z, int8_t *gpu_isMoleculeInvolved, double *gpu_mForceRecx, + double t_max, double *molForcex, double *molForcey, double *molForcez, + int *gpu_inForceRange, ulong step, unsigned int key, ulong seed, + double *gpu_x, double *gpu_y, double *gpu_z, int *gpu_particleMol, + int atomCount, double xAxes, double yAxes, double zAxes, double *gpu_comx, + double *gpu_comy, double *gpu_comz, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, double lambdaBETA, + double *gpu_t_k_x, double *gpu_t_k_y, double *gpu_t_k_z, + int8_t *gpu_isMoleculeInvolved, double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz) { int atomNumber = blockIdx.x * blockDim.x + threadIdx.x; if (atomNumber >= atomCount) @@ -414,17 +414,19 @@ __global__ void TranslateParticlesKernel( double shiftx, shifty, shiftz; - //Per CUDA documention, use of std namespace math functions is not supported - bool forceInRange = - (fabs(lbmaxx) > MIN_FORCE && fabs(lbmaxx) < MAX_FORCE && - fabs(lbmaxy) > MIN_FORCE && fabs(lbmaxy) < MAX_FORCE && - fabs(lbmaxz) > MIN_FORCE && fabs(lbmaxz) < MAX_FORCE); + // Per CUDA documention, use of std namespace math functions is not supported + bool forceInRange = (fabs(lbmaxx) > MIN_FORCE && fabs(lbmaxx) < MAX_FORCE && + fabs(lbmaxy) > MIN_FORCE && fabs(lbmaxy) < MAX_FORCE && + fabs(lbmaxz) > MIN_FORCE && fabs(lbmaxz) < MAX_FORCE); if (forceInRange) { double3 randnums = randomCoordsGPU(molIndex, key, step, seed); - shiftx = (-lbmaxx + log1p(2.0 * randnums.x * exp(lbmaxx) * sinh(lbmaxx))) / lbfx; - shifty = (-lbmaxy + log1p(2.0 * randnums.y * exp(lbmaxy) * sinh(lbmaxy))) / lbfy; - shiftz = (-lbmaxz + log1p(2.0 * randnums.z * exp(lbmaxz) * sinh(lbmaxz))) / lbfz; + shiftx = + (-lbmaxx + log1p(2.0 * randnums.x * exp(lbmaxx) * sinh(lbmaxx))) / lbfx; + shifty = + (-lbmaxy + log1p(2.0 * randnums.y * exp(lbmaxy) * sinh(lbmaxy))) / lbfy; + shiftz = + (-lbmaxz + log1p(2.0 * randnums.z * exp(lbmaxz) * sinh(lbmaxz))) / lbfz; } else { double3 randnums = SymRandomCoordsGPU(molIndex, key, step, seed); shiftx = t_max * randnums.x; @@ -472,15 +474,15 @@ __global__ void TranslateParticlesKernel( } __global__ void RotateParticlesKernel( - double r_max, double *molTorquex, - double *molTorquey, double *molTorquez, int *gpu_inForceRange, ulong step, - unsigned int key, ulong seed, double *gpu_x, double *gpu_y, double *gpu_z, - int *gpu_particleMol, int atomCount, double xAxes, double yAxes, - double zAxes, double *gpu_comx, double *gpu_comy, double *gpu_comz, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - int *gpu_nonOrth, double lambdaBETA, double *gpu_r_k_x, double *gpu_r_k_y, - double *gpu_r_k_z, int8_t *gpu_isMoleculeInvolved) { + double r_max, double *molTorquex, double *molTorquey, double *molTorquez, + int *gpu_inForceRange, ulong step, unsigned int key, ulong seed, + double *gpu_x, double *gpu_y, double *gpu_z, int *gpu_particleMol, + int atomCount, double xAxes, double yAxes, double zAxes, double *gpu_comx, + double *gpu_comy, double *gpu_comz, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, double lambdaBETA, + double *gpu_r_k_x, double *gpu_r_k_y, double *gpu_r_k_z, + int8_t *gpu_isMoleculeInvolved) { int atomNumber = blockIdx.x * blockDim.x + threadIdx.x; if (atomNumber >= atomCount) return; @@ -501,17 +503,19 @@ __global__ void RotateParticlesKernel( double rotx, roty, rotz, theta; double3 rotvec; - //Per CUDA documention, use of std namespace math functions is not supported - bool forceInRange = - (fabs(lbmaxx) > MIN_FORCE && fabs(lbmaxx) < MAX_FORCE && - fabs(lbmaxy) > MIN_FORCE && fabs(lbmaxy) < MAX_FORCE && - fabs(lbmaxz) > MIN_FORCE && fabs(lbmaxz) < MAX_FORCE); + // Per CUDA documention, use of std namespace math functions is not supported + bool forceInRange = (fabs(lbmaxx) > MIN_FORCE && fabs(lbmaxx) < MAX_FORCE && + fabs(lbmaxy) > MIN_FORCE && fabs(lbmaxy) < MAX_FORCE && + fabs(lbmaxz) > MIN_FORCE && fabs(lbmaxz) < MAX_FORCE); if (forceInRange) { double3 randnums = randomCoordsGPU(molIndex, key, step, seed); - rotx = (-lbmaxx + log1p(2.0 * randnums.x * exp(lbmaxx) * sinh(lbmaxx))) / lbtx; - roty = (-lbmaxy + log1p(2.0 * randnums.y * exp(lbmaxy) * sinh(lbmaxy))) / lbty; - rotz = (-lbmaxz + log1p(2.0 * randnums.z * exp(lbmaxz) * sinh(lbmaxz))) / lbtz; + rotx = + (-lbmaxx + log1p(2.0 * randnums.x * exp(lbmaxx) * sinh(lbmaxx))) / lbtx; + roty = + (-lbmaxy + log1p(2.0 * randnums.y * exp(lbmaxy) * sinh(lbmaxy))) / lbty; + rotz = + (-lbmaxz + log1p(2.0 * randnums.z * exp(lbmaxz) * sinh(lbmaxz))) / lbtz; theta = sqrt(rotx * rotx + roty * roty + rotz * rotz); rotvec = make_double3(rotx * (1.0 / theta), roty * (1.0 / theta), rotz * (1.0 / theta)); diff --git a/src/GPU/TransformParticlesCUDAKernel.cuh b/src/GPU/TransformParticlesCUDAKernel.cuh index d259b0e92..59a3312e8 100644 --- a/src/GPU/TransformParticlesCUDAKernel.cuh +++ b/src/GPU/TransformParticlesCUDAKernel.cuh @@ -35,27 +35,27 @@ void CallRotateParticlesGPU( XYZArray &newMolPos, XYZArray &newCOMs, double lambdaBETA, XYZArray &r_k); __global__ void TranslateParticlesKernel( - double t_max, double *molForcex, - double *molForcey, double *molForcez, int *inForceRange, ulong step, - unsigned int key, ulong seed, double *gpu_x, double *gpu_y, double *gpu_z, - int *gpu_particleMol, int atomCount, double xAxes, double yAxes, - double zAxes, double *gpu_comx, double *gpu_comy, double *gpu_comz, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - int *gpu_nonOrth, double lambdaBETA, double *gpu_t_k_x, double *gpu_t_k_y, - double *gpu_t_k_z, int8_t *gpu_isMoleculeInvolved, double *gpu_mForceRecx, + double t_max, double *molForcex, double *molForcey, double *molForcez, + int *inForceRange, ulong step, unsigned int key, ulong seed, double *gpu_x, + double *gpu_y, double *gpu_z, int *gpu_particleMol, int atomCount, + double xAxes, double yAxes, double zAxes, double *gpu_comx, + double *gpu_comy, double *gpu_comz, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, double lambdaBETA, + double *gpu_t_k_x, double *gpu_t_k_y, double *gpu_t_k_z, + int8_t *gpu_isMoleculeInvolved, double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz); __global__ void RotateParticlesKernel( - double r_max, double *molTorquex, - double *molTorquey, double *molTorquez, int *inForceRange, ulong step, - unsigned int key, ulong seed, double *gpu_x, double *gpu_y, double *gpu_z, - int *gpu_particleMol, int atomCount, double xAxes, double yAxes, - double zAxes, double *gpu_comx, double *gpu_comy, double *gpu_comz, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - int *gpu_nonOrth, double lambdaBETA, double *gpu_r_k_x, double *gpu_r_k_y, - double *gpu_r_k_z, int8_t *gpu_isMoleculeInvolved); + double r_max, double *molTorquex, double *molTorquey, double *molTorquez, + int *inForceRange, ulong step, unsigned int key, ulong seed, double *gpu_x, + double *gpu_y, double *gpu_z, int *gpu_particleMol, int atomCount, + double xAxes, double yAxes, double zAxes, double *gpu_comx, + double *gpu_comy, double *gpu_comz, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, double lambdaBETA, + double *gpu_r_k_x, double *gpu_r_k_y, double *gpu_r_k_z, + int8_t *gpu_isMoleculeInvolved); // Brownian Motion multiparticle void BrownianMotionRotateParticlesGPU( diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh index 1659b8709..9b43a7b74 100644 --- a/src/GPU/VariablesCUDA.cuh +++ b/src/GPU/VariablesCUDA.cuh @@ -9,11 +9,11 @@ along with this program, also can be found at #define VARIABLES_CUDA_H #ifdef GOMC_CUDA -#include -#include -#include #include "EnsemblePreprocessor.h" #include "NumLib.h" +#include +#include +#include // Need a separate float constant for device code with the MSVC compiler // See CUDA Programming Guide section I.4.13 for details