Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove _cuda or _rocm suffix #2839

Merged
merged 2 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions source/lib/include/coord.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ int copy_coord_gpu(FPTYPE* out_c,
// input:
// natom, box_info: boxt, rec_boxt
template <typename FPTYPE>
void normalize_coord_gpu_rocm(FPTYPE* coord,
const int natom,
const deepmd::Region<FPTYPE>& region);
void normalize_coord_gpu(FPTYPE* coord,
const int natom,
const deepmd::Region<FPTYPE>& region);

// copy coordinates
// outputs:
Expand All @@ -111,19 +111,19 @@ void normalize_coord_gpu_rocm(FPTYPE* coord,
// 1: the memory is not large enough to hold all copied coords and types.
// i.e. nall > mem_nall
template <typename FPTYPE>
int copy_coord_gpu_rocm(FPTYPE* out_c,
int* out_t,
int* mapping,
int* nall,
int* int_data,
const FPTYPE* in_c,
const int* in_t,
const int& nloc,
const int& mem_nall,
const int& loc_cellnum,
const int& total_cellnum,
const int* cell_info,
const deepmd::Region<FPTYPE>& region);
int copy_coord_gpu(FPTYPE* out_c,
int* out_t,
int* mapping,
int* nall,
int* int_data,
const FPTYPE* in_c,
const int* in_t,
const int& nloc,
const int& mem_nall,
const int& loc_cellnum,
const int& total_cellnum,
const int* cell_info,
const deepmd::Region<FPTYPE>& region);
#endif // TENSORFLOW_USE_ROCM

} // namespace deepmd
72 changes: 36 additions & 36 deletions source/lib/include/fmt_nlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,50 +20,50 @@ void format_nlist_cpu(int* nlist,

#if GOOGLE_CUDA
template <typename FPTYPE>
void format_nbor_list_gpu_cuda(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);
void format_nbor_list_gpu(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);

template <typename FPTYPE>
void test_encoding_decoding_nbor_info_gpu_cuda(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
void test_encoding_decoding_nbor_info_gpu(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void format_nbor_list_gpu_rocm(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);
void format_nbor_list_gpu(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);

template <typename FPTYPE>
void test_encoding_decoding_nbor_info_gpu_rocm(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
void test_encoding_decoding_nbor_info_gpu(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
#endif // TENSORFLOW_USE_ROCM

} // namespace deepmd
Expand Down
36 changes: 18 additions & 18 deletions source/lib/include/gelu.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,38 +22,38 @@ void gelu_grad_grad_cpu(FPTYPE* out,

#if GOOGLE_CUDA
template <typename FPTYPE>
void gelu_gpu_cuda(FPTYPE* out, const FPTYPE* xx, const int_64 size);
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);

template <typename FPTYPE>
void gelu_grad_gpu_cuda(FPTYPE* out,
void gelu_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu_cuda(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void gelu_gpu_rocm(FPTYPE* out, const FPTYPE* xx, const int_64 size);
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);

template <typename FPTYPE>
void gelu_grad_gpu_rocm(FPTYPE* out,
void gelu_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu_rocm(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);

#endif // TENSORFLOW_USE_ROCM
} // namespace deepmd
38 changes: 19 additions & 19 deletions source/lib/include/neighbor_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int build_nlist_gpu(InputNlist& nlist,
* @param ftype_in The input atom type.
* @param nloc The number of atoms.
*/
void filter_ftype_gpu_cuda(int* ftype_out, const int* ftype_in, const int nloc);
void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc);

void use_nei_info_gpu(int* nlist,
int* ntype,
Expand All @@ -177,32 +177,32 @@ void use_nei_info_gpu(int* nlist,
// 1: the memory is not large enough to hold all neighbors.
// i.e. max_list_size > mem_nall
template <typename FPTYPE>
int build_nlist_gpu_rocm(InputNlist& nlist,
int* max_list_size,
int* nlist_data,
const FPTYPE* c_cpy,
const int& nloc,
const int& nall,
const int& mem_size,
const float& rcut);
int build_nlist_gpu(InputNlist& nlist,
int* max_list_size,
int* nlist_data,
const FPTYPE* c_cpy,
const int& nloc,
const int& nall,
const int& mem_size,
const float& rcut);
/**
* @brief Filter the fake atom type.
* @details If >=0, set to 0; if <0, set to -1.
* @param ftype_out The output filtered atom type.
* @param ftype_in The input atom type.
* @param nloc The number of atoms.
*/
void filter_ftype_gpu_rocm(int* ftype_out, const int* ftype_in, const int nloc);
void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc);

void use_nei_info_gpu_rocm(int* nlist,
int* ntype,
bool* nmask,
const int* type,
const int* nlist_map,
const int nloc,
const int nnei,
const int ntypes,
const bool b_nlist_map);
void use_nei_info_gpu(int* nlist,
int* ntype,
bool* nmask,
const int* type,
const int* nlist_map,
const int nloc,
const int nnei,
const int ntypes,
const bool b_nlist_map);

#endif // TENSORFLOW_USE_ROCM

Expand Down
Loading
Loading