Skip to content

Commit

Permalink
Merge pull request #768 from brucefan1983/hip
Browse files Browse the repository at this point in the history
From CUDA only to CUDA+HIP
  • Loading branch information
brucefan1983 authored Oct 30, 2024
2 parents 49bda9b + 96e2b59 commit aad40ec
Show file tree
Hide file tree
Showing 113 changed files with 1,002 additions and 570 deletions.
15 changes: 8 additions & 7 deletions src/force/dftd3.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ J. Comput. Chem., 32, 1456 (2011).
#include "model/box.cuh"
#include "neighbor.cuh"
#include "utilities/common.cuh"
#include "utilities/gpu_macro.cuh"
#include <algorithm>
#include <cctype>
#include <iostream>
Expand Down Expand Up @@ -947,7 +948,7 @@ void DFTD3::compute_small_box(
r12.data() + size_x12 * 3,
r12.data() + size_x12 * 4,
r12.data() + size_x12 * 5);
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

find_dftd3_coordination_number_small_box<<<(N - 1) / 64 + 1, 64>>>(
dftd3_para,
Expand All @@ -959,7 +960,7 @@ void DFTD3::compute_small_box(
r12.data() + size_x12 * 4,
r12.data() + size_x12 * 5,
cn.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

add_dftd3_force_small_box<<<(N - 1) / 64 + 1, 64>>>(
dftd3_para,
Expand All @@ -979,7 +980,7 @@ void DFTD3::compute_small_box(
virial_per_atom.data(),
dc6_sum.data(),
dc8_sum.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

add_dftd3_force_extra_small_box<<<(N - 1) / 64 + 1, 64>>>(
dftd3_para,
Expand All @@ -996,7 +997,7 @@ void DFTD3::compute_small_box(
force_per_atom.data() + N,
force_per_atom.data() + N * 2,
virial_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}

void DFTD3::compute_large_box(
Expand Down Expand Up @@ -1058,7 +1059,7 @@ void DFTD3::compute_large_box(
position_per_atom.data() + N,
position_per_atom.data() + N * 2,
cn.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

find_dftd3_force_large_box<<<(N - 1) / 64 + 1, 64>>>(
dftd3_para,
Expand All @@ -1084,7 +1085,7 @@ void DFTD3::compute_large_box(
virial_per_atom.data(),
dc6_sum.data(),
dc8_sum.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

find_dftd3_force_extra_large_box<<<(N - 1) / 64 + 1, 64>>>(
dftd3_para,
Expand All @@ -1107,7 +1108,7 @@ void DFTD3::compute_large_box(
force_per_atom.data() + N,
force_per_atom.data() + N * 2,
virial_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}

void DFTD3::compute(
Expand Down
2 changes: 2 additions & 0 deletions src/force/dftd3para.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#pragma once

#include "utilities/gpu_macro.cuh"

namespace
{
#define Bohr 0.5291772575069165f
Expand Down
9 changes: 5 additions & 4 deletions src/force/eam.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The EAM potential. Currently two analytical versions:
#include "eam.cuh"
#include "neighbor.cuh"
#include "utilities/error.cuh"
#include "utilities/gpu_macro.cuh"
#include <cstring>
#define BLOCK_SIZE_FORCE 64

Expand Down Expand Up @@ -514,7 +515,7 @@ void EAM::compute(
position_per_atom.data() + number_of_atoms * 2,
eam_data.Fp.data(),
potential_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

find_force_eam_step2<0><<<grid_size, BLOCK_SIZE_FORCE>>>(
eam2004zhou,
Expand All @@ -535,7 +536,7 @@ void EAM::compute(
force_per_atom.data() + 2 * number_of_atoms,
virial_per_atom.data(),
potential_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}

if (potential_model == 1) {
Expand All @@ -554,7 +555,7 @@ void EAM::compute(
position_per_atom.data() + number_of_atoms * 2,
eam_data.Fp.data(),
potential_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

find_force_eam_step2<1><<<grid_size, BLOCK_SIZE_FORCE>>>(
eam2004zhou,
Expand All @@ -575,6 +576,6 @@ void EAM::compute(
force_per_atom.data() + 2 * number_of_atoms,
virial_per_atom.data(),
potential_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}
}
5 changes: 3 additions & 2 deletions src/force/fcp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The force constant potential (FCP)

#include "fcp.cuh"
#include "utilities/error.cuh"
#include "utilities/gpu_macro.cuh"
#include <cstring>
#include <vector>

Expand Down Expand Up @@ -1038,7 +1039,7 @@ void FCP::compute(
position_per_atom.data() + number_of_atoms * 2,
fcp_data.r0.data(),
fcp_data.u.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

fcp_data.pfv.fill(0.0f);

Expand Down Expand Up @@ -1125,5 +1126,5 @@ void FCP::compute(
force_per_atom.data() + 2 * number_of_atoms,
virial_per_atom.data());

CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}
37 changes: 19 additions & 18 deletions src/force/force.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The driver class calculating force and related quantities.
#include "ilp_tmd_sw.cuh"
#include "utilities/common.cuh"
#include "utilities/error.cuh"
#include "utilities/gpu_macro.cuh"
#include "utilities/read_file.cuh"
#include <cstring>
#include <iostream>
Expand Down Expand Up @@ -106,7 +107,7 @@ void Force::parse_potential(
strcmp(potential_name, "nep4_temperature") == 0 ||
strcmp(potential_name, "nep4_zbl_temperature") == 0) {
int num_gpus;
CHECK(cudaGetDeviceCount(&num_gpus));
CHECK(gpuGetDeviceCount(&num_gpus));
#ifdef ZHEYONG
num_gpus = 3;
#endif
Expand Down Expand Up @@ -226,7 +227,7 @@ static __global__ void gpu_sum_force(int N, double* g_fx, double* g_fy, double*
s_f[tid] = f;
__syncthreads();

#pragma unroll

for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
if (tid < offset) {
s_f[tid] += s_f[tid + offset];
Expand Down Expand Up @@ -466,7 +467,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms * 2,
potential_per_atom.data(),
virial_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

if (multiple_potentials_mode_.compare("observe") == 0) {
// If observing, calculate using main potential only
Expand Down Expand Up @@ -516,7 +517,7 @@ void Force::compute(
force_per_atom.data(),
virial_per_atom.data(),
(double)potentials.size());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
} else {
PRINT_INPUT_ERROR("Invalid mode for multiple potentials.\n");
}
Expand Down Expand Up @@ -552,7 +553,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
number_of_atoms,
Expand All @@ -561,7 +562,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}

// always correct the force when using the FCP potential
Expand All @@ -574,7 +575,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
number_of_atoms,
Expand All @@ -583,7 +584,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}
}
}
Expand Down Expand Up @@ -647,7 +648,7 @@ static __global__ void gpu_sum_tensor(int N, double* g_tensor, double* g_sum_ten
s_t[tid] = t;
__syncthreads();

#pragma unroll

for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
if (tid < offset) {
s_t[tid] += s_t[tid + offset];
Expand Down Expand Up @@ -754,7 +755,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms * 2,
potential_per_atom.data(),
virial_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

temperature += delta_T;
if (multiple_potentials_mode_.compare("observe") == 0) {
Expand Down Expand Up @@ -805,7 +806,7 @@ void Force::compute(
force_per_atom.data(),
virial_per_atom.data(),
(double)potentials.size());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
} else {
PRINT_INPUT_ERROR("Invalid mode for multiple potentials.\n");
}
Expand Down Expand Up @@ -841,7 +842,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
number_of_atoms,
Expand All @@ -850,7 +851,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
} else if (compute_hnemdec_ == 0) {
// the tensor:
// xx xy xz 0 3 4
Expand All @@ -876,10 +877,10 @@ void Force::compute(
virial_per_atom.data() + 8 * number_of_atoms,
virial_per_atom.data() + 2 * number_of_atoms,
tensor_per_atom.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

gpu_sum_tensor<<<9, 1024>>>(number_of_atoms, tensor_per_atom.data(), tensor_tot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

gpu_add_driving_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
number_of_atoms,
Expand All @@ -901,7 +902,7 @@ void Force::compute(
force_per_atom.data(),
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms);
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

} else if (compute_hnemdec_ != -1) {
gpu_add_driving_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
Expand All @@ -926,7 +927,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL

gpu_correct_force<<<(number_of_atoms - 1) / 128 + 1, 128>>>(
number_of_atoms,
Expand All @@ -935,7 +936,7 @@ void Force::compute(
force_per_atom.data() + number_of_atoms,
force_per_atom.data() + 2 * number_of_atoms,
ftot.data());
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}
}
}
13 changes: 7 additions & 6 deletions src/force/force_constant.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Use finite difference to calculate the seconod order force constants:
#include "model/box.cuh"
#include "model/group.cuh"
#include "utilities/error.cuh"
#include "utilities/gpu_macro.cuh"
#include <vector>

static __global__ void gpu_shift_atom(const double dx, double* x) { x[0] += dx; }
Expand All @@ -34,13 +35,13 @@ static void shift_atom(

if (beta == 0) {
gpu_shift_atom<<<1, 1>>>(dx, position_per_atom.data() + n2);
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
} else if (beta == 1) {
gpu_shift_atom<<<1, 1>>>(dx, position_per_atom.data() + number_of_atoms + n2);
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
} else {
gpu_shift_atom<<<1, 1>>>(dx, position_per_atom.data() + number_of_atoms * 2 + n2);
CUDA_CHECK_KERNEL
GPU_CHECK_KERNEL
}
}

Expand All @@ -67,10 +68,10 @@ static void get_f(
box, position_per_atom, type, group, potential_per_atom, force_per_atom, virial_per_atom);

size_t M = sizeof(double);
CHECK(cudaMemcpy(f + 0, force_per_atom.data() + n1, M, cudaMemcpyDeviceToHost));
CHECK(cudaMemcpy(f + 1, force_per_atom.data() + n1 + number_of_atoms, M, cudaMemcpyDeviceToHost));
CHECK(gpuMemcpy(f + 0, force_per_atom.data() + n1, M, gpuMemcpyDeviceToHost));
CHECK(gpuMemcpy(f + 1, force_per_atom.data() + n1 + number_of_atoms, M, gpuMemcpyDeviceToHost));
CHECK(
cudaMemcpy(f + 2, force_per_atom.data() + n1 + number_of_atoms * 2, M, cudaMemcpyDeviceToHost));
gpuMemcpy(f + 2, force_per_atom.data() + n1 + number_of_atoms * 2, M, gpuMemcpyDeviceToHost));

shift_atom(-dx, n2, beta, position_per_atom);
}
Expand Down
Loading

0 comments on commit aad40ec

Please sign in to comment.