Skip to content

Commit

Permalink
remove _cuda or _rocm suffix (#2839)
Browse files Browse the repository at this point in the history
Remove all `_cuda` or `_rocm` suffixes in function names, as proposed in
#2838. They can be merged in the following PRs.

(Replace all: `gpu_cuda` -> `gpu`; `gpu_rocm` -> `gpu`)

Signed-off-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
njzjz authored Sep 19, 2023
1 parent 0aae8e5 commit 339ce47
Show file tree
Hide file tree
Showing 55 changed files with 1,999 additions and 2,049 deletions.
32 changes: 16 additions & 16 deletions source/lib/include/coord.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ int copy_coord_gpu(FPTYPE* out_c,
// input:
// natom, box_info: boxt, rec_boxt
template <typename FPTYPE>
void normalize_coord_gpu_rocm(FPTYPE* coord,
const int natom,
const deepmd::Region<FPTYPE>& region);
void normalize_coord_gpu(FPTYPE* coord,
const int natom,
const deepmd::Region<FPTYPE>& region);

// copy coordinates
// outputs:
Expand All @@ -111,19 +111,19 @@ void normalize_coord_gpu_rocm(FPTYPE* coord,
// 1: the memory is not large enough to hold all copied coords and types.
// i.e. nall > mem_nall
template <typename FPTYPE>
int copy_coord_gpu_rocm(FPTYPE* out_c,
int* out_t,
int* mapping,
int* nall,
int* int_data,
const FPTYPE* in_c,
const int* in_t,
const int& nloc,
const int& mem_nall,
const int& loc_cellnum,
const int& total_cellnum,
const int* cell_info,
const deepmd::Region<FPTYPE>& region);
int copy_coord_gpu(FPTYPE* out_c,
int* out_t,
int* mapping,
int* nall,
int* int_data,
const FPTYPE* in_c,
const int* in_t,
const int& nloc,
const int& mem_nall,
const int& loc_cellnum,
const int& total_cellnum,
const int* cell_info,
const deepmd::Region<FPTYPE>& region);
#endif // TENSORFLOW_USE_ROCM

} // namespace deepmd
72 changes: 36 additions & 36 deletions source/lib/include/fmt_nlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,50 +20,50 @@ void format_nlist_cpu(int* nlist,

#if GOOGLE_CUDA
template <typename FPTYPE>
void format_nbor_list_gpu_cuda(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);
void format_nbor_list_gpu(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);

template <typename FPTYPE>
void test_encoding_decoding_nbor_info_gpu_cuda(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
void test_encoding_decoding_nbor_info_gpu(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void format_nbor_list_gpu_rocm(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);
void format_nbor_list_gpu(int* nlist,
const FPTYPE* coord,
const int* type,
const deepmd::InputNlist& gpu_inlist,
int* array_int,
uint_64* array_longlong,
const int max_nbor_size,
const int nloc,
const int nall,
const float rcut,
const std::vector<int> sec);

template <typename FPTYPE>
void test_encoding_decoding_nbor_info_gpu_rocm(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
void test_encoding_decoding_nbor_info_gpu(uint_64* key,
int* out_type,
int* out_index,
const int* in_type,
const FPTYPE* in_dist,
const int* in_index,
const int size_of_array);
#endif // TENSORFLOW_USE_ROCM

} // namespace deepmd
Expand Down
36 changes: 18 additions & 18 deletions source/lib/include/gelu.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,38 +22,38 @@ void gelu_grad_grad_cpu(FPTYPE* out,

#if GOOGLE_CUDA
template <typename FPTYPE>
void gelu_gpu_cuda(FPTYPE* out, const FPTYPE* xx, const int_64 size);
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);

template <typename FPTYPE>
void gelu_grad_gpu_cuda(FPTYPE* out,
void gelu_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu_cuda(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);
#endif // GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
template <typename FPTYPE>
void gelu_gpu_rocm(FPTYPE* out, const FPTYPE* xx, const int_64 size);
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);

template <typename FPTYPE>
void gelu_grad_gpu_rocm(FPTYPE* out,
void gelu_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);

template <typename FPTYPE>
void gelu_grad_grad_gpu_rocm(FPTYPE* out,
const FPTYPE* xx,
const FPTYPE* dy,
const FPTYPE* dy_2,
const int_64 size);

#endif // TENSORFLOW_USE_ROCM
} // namespace deepmd
38 changes: 19 additions & 19 deletions source/lib/include/neighbor_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int build_nlist_gpu(InputNlist& nlist,
* @param ftype_in The input atom type.
* @param nloc The number of atoms.
*/
void filter_ftype_gpu_cuda(int* ftype_out, const int* ftype_in, const int nloc);
void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc);

void use_nei_info_gpu(int* nlist,
int* ntype,
Expand All @@ -177,32 +177,32 @@ void use_nei_info_gpu(int* nlist,
// 1: the memory is not large enough to hold all neighbors.
// i.e. max_list_size > mem_nall
template <typename FPTYPE>
int build_nlist_gpu_rocm(InputNlist& nlist,
int* max_list_size,
int* nlist_data,
const FPTYPE* c_cpy,
const int& nloc,
const int& nall,
const int& mem_size,
const float& rcut);
int build_nlist_gpu(InputNlist& nlist,
int* max_list_size,
int* nlist_data,
const FPTYPE* c_cpy,
const int& nloc,
const int& nall,
const int& mem_size,
const float& rcut);
/**
* @brief Filter the fake atom type.
* @details If >=0, set to 0; if <0, set to -1.
* @param ftype_out The output filtered atom type.
* @param ftype_in The input atom type.
* @param nloc The number of atoms.
*/
void filter_ftype_gpu_rocm(int* ftype_out, const int* ftype_in, const int nloc);
void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc);

void use_nei_info_gpu_rocm(int* nlist,
int* ntype,
bool* nmask,
const int* type,
const int* nlist_map,
const int nloc,
const int nnei,
const int ntypes,
const bool b_nlist_map);
void use_nei_info_gpu(int* nlist,
int* ntype,
bool* nmask,
const int* type,
const int* nlist_map,
const int nloc,
const int nnei,
const int ntypes,
const bool b_nlist_map);

#endif // TENSORFLOW_USE_ROCM

Expand Down
Loading

0 comments on commit 339ce47

Please sign in to comment.