Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge CUDA and ROCm in header files #2845

Merged
merged 1 commit into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 2 additions & 43 deletions source/lib/include/coord.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void compute_cell_info(int* cell_info,
const float& rcut,
const deepmd::Region<FPTYPE>& region);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// normalize coords
// output:
// coord
Expand Down Expand Up @@ -83,47 +83,6 @@ int copy_coord_gpu(FPTYPE* out_c,
const int& total_cellnum,
const int* cell_info,
const deepmd::Region<FPTYPE>& region);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
// normalize coords
// output:
// coord
// input:
// natom, box_info: boxt, rec_boxt
template <typename FPTYPE>
void normalize_coord_gpu(FPTYPE* coord,
const int natom,
const deepmd::Region<FPTYPE>& region);

// copy coordinates
// outputs:
// out_c, out_t, mapping, nall,
// int_data(temp cuda
// memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
// cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
// inputs:
// in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info,
// box_info mem_nall is the size of allocated memory for out_c, out_t,
// mapping
// returns
// 0: succssful
// 1: the memory is not large enough to hold all copied coords and types.
// i.e. nall > mem_nall
template <typename FPTYPE>
int copy_coord_gpu(FPTYPE* out_c,
int* out_t,
int* mapping,
int* nall,
int* int_data,
const FPTYPE* in_c,
const int* in_t,
const int& nloc,
const int& mem_nall,
const int& loc_cellnum,
const int& total_cellnum,
const int* cell_info,
const deepmd::Region<FPTYPE>& region);
#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace deepmd
28 changes: 2 additions & 26 deletions source/lib/include/fmt_nlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ void format_nlist_cpu(int* nlist,
const float rcut,
const std::vector<int> sec);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void format_nbor_list_gpu(int* nlist,
const FPTYPE* coord,
Expand All @@ -40,31 +40,7 @@ void test_encoding_decoding_nbor_info_gpu(uint_64* key,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void format_nbor_list_gpu(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);

template <typename FPTYPE>
void test_encoding_decoding_nbor_info_gpu(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace deepmd

Expand Down
23 changes: 2 additions & 21 deletions source/lib/include/gelu.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void gelu_grad_grad_cpu(FPTYPE* out,
const FPTYPE* dy_2,
const int_64 size);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);

Expand All @@ -36,24 +36,5 @@ void gelu_grad_grad_gpu(FPTYPE* out,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);

template <typename FPTYPE>
void gelu_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);

#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
} // namespace deepmd
46 changes: 2 additions & 44 deletions source/lib/include/neighbor_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ void use_nlist_map(int* nlist,

#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// build neighbor list.
// outputs
// nlist, max_list_size
Expand Down Expand Up @@ -162,49 +162,7 @@ void use_nei_info_gpu(int* nlist,
const int ntypes,
const bool b_nlist_map);

#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
// build neighbor list.
// outputs
// nlist, max_list_size
// max_list_size is the maximal size of jlist.
// inputs
// c_cpy, nloc, nall, mem_size, rcut, region
// mem_size is the size of allocated memory for jlist.
// returns
// 0: succssful
// 1: the memory is not large enough to hold all neighbors.
// i.e. max_list_size > mem_nall
template <typename FPTYPE>
int build_nlist_gpu(InputNlist& nlist,
int* max_list_size,
int* nlist_data,
const FPTYPE* c_cpy,
const int& nloc,
const int& nall,
const int& mem_size,
const float& rcut);
/**
* @brief Filter the fake atom type.
* @details If >=0, set to 0; if <0, set to -1.
* @param ftype_out The output filtered atom type.
* @param ftype_in The input atom type.
* @param nloc The number of atoms.
*/
void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc);

void use_nei_info_gpu(int* nlist,
int* ntype,
bool* nmask,
const int* type,
const int* nlist_map,
const int nloc,
const int nnei,
const int ntypes,
const bool b_nlist_map);

#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace deepmd

Expand Down
52 changes: 2 additions & 50 deletions source/lib/include/prod_env_mat.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void prod_env_mat_r_cpu(FPTYPE *em,
const float rcut_smth,
const std::vector<int> sec);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_env_mat_a_gpu(FPTYPE *em,
FPTYPE *em_deriv,
Expand Down Expand Up @@ -88,54 +88,6 @@ void env_mat_nbor_update(InputNlist &inlist,
int *&nbor_list_dev,
const int *mesh,
const int size);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_env_mat_a_gpu(FPTYPE *em,
FPTYPE *em_deriv,
FPTYPE *rij,
int *nlist,
const FPTYPE *coord,
const int *type,
const InputNlist &gpu_inlist,
int *array_int,
unsigned long long *array_longlong,
const int max_nbor_size,
const FPTYPE *avg,
const FPTYPE *std,
const int nloc,
const int nall,
const float rcut,
const float rcut_smth,
const std::vector<int> sec,
const int *f_type = NULL);

template <typename FPTYPE>
void prod_env_mat_r_gpu(FPTYPE *em,
FPTYPE *em_deriv,
FPTYPE *rij,
int *nlist,
const FPTYPE *coord,
const int *type,
const InputNlist &gpu_inlist,
int *array_int,
unsigned long long *array_longlong,
const int max_nbor_size,
const FPTYPE *avg,
const FPTYPE *std,
const int nloc,
const int nall,
const float rcut,
const float rcut_smth,
const std::vector<int> sec);

void env_mat_nbor_update(InputNlist &inlist,
InputNlist &gpu_inlist,
int &max_nbor_size,
int *&nbor_list_dev,
const int *mesh,
const int size);
#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace deepmd
8 changes: 2 additions & 6 deletions source/lib/include/prod_env_mat_nvnmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,8 @@ void prod_env_mat_a_nvnmd_quantize_cpu(FPTYPE* em,
const std::vector<int> sec,
const int* f_type = NULL);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// UNDEFINE
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
// UNDEFINE
#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace deepmd
26 changes: 2 additions & 24 deletions source/lib/include/prod_force.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void prod_force_r_cpu(FPTYPE* force,
const int nnei,
const int nframes);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_force_a_gpu(FPTYPE* force,
const FPTYPE* net_deriv,
Expand All @@ -87,28 +87,6 @@ void prod_force_r_gpu(FPTYPE* force,
const int nall,
const int nnei,
const int nframes);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_force_a_gpu(FPTYPE* force,
const FPTYPE* net_deriv,
const FPTYPE* in_deriv,
const int* nlist,
const int nloc,
const int nall,
const int nnei,
const int nframes);

template <typename FPTYPE>
void prod_force_r_gpu(FPTYPE* force,
const FPTYPE* net_deriv,
const FPTYPE* in_deriv,
const int* nlist,
const int nloc,
const int nall,
const int nnei,
const int nframes);
#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace deepmd
23 changes: 2 additions & 21 deletions source/lib/include/prod_force_grad.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ void prod_force_grad_r_cpu(FPTYPE* grad_net,
const int nnei,
const int nframes);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_force_grad_a_gpu(FPTYPE* grad_net,
const FPTYPE* grad,
Expand All @@ -39,25 +39,6 @@ void prod_force_grad_r_gpu(FPTYPE* grad_net,
const int nloc,
const int nnei,
const int nframes);
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_force_grad_a_gpu(FPTYPE* grad_net,
const FPTYPE* grad,
const FPTYPE* env_deriv,
const int* nlist,
const int nloc,
const int nnei,
const int nframes);

template <typename FPTYPE>
void prod_force_grad_r_gpu(FPTYPE* grad_net,
const FPTYPE* grad,
const FPTYPE* env_deriv,
const int* nlist,
const int nloc,
const int nnei,
const int nframes);
#endif // TENSORFLOW_USE_ROCM
} // namespace deepmd
28 changes: 2 additions & 26 deletions source/lib/include/prod_virial.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void prod_virial_r_cpu(FPTYPE* virial,
const int nall,
const int nnei);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_virial_a_gpu(FPTYPE* virial,
FPTYPE* atom_virial,
Expand All @@ -47,30 +47,6 @@ void prod_virial_r_gpu(FPTYPE* virial,
const int nloc,
const int nall,
const int nnei);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void prod_virial_a_gpu(FPTYPE* virial,
FPTYPE* atom_virial,
const FPTYPE* net_deriv,
const FPTYPE* env_deriv,
const FPTYPE* rij,
const int* nlist,
const int nloc,
const int nall,
const int nnei);

template <typename FPTYPE>
void prod_virial_r_gpu(FPTYPE* virial,
FPTYPE* atom_virial,
const FPTYPE* net_deriv,
const FPTYPE* env_deriv,
const FPTYPE* rij,
const int* nlist,
const int nloc,
const int nall,
const int nnei);
#endif // TENSORFLOW_USE_ROCM
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace deepmd
Loading
Loading