From f8a6c8f20ceb4233a8b5cc70ccee009c5df18f80 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 19 Sep 2023 20:59:48 -0400 Subject: [PATCH] merge CUDA and ROCm in header files Signed-off-by: Jinzhe Zeng --- source/lib/include/coord.h | 45 +--------- source/lib/include/fmt_nlist.h | 28 +----- source/lib/include/gelu.h | 23 +---- source/lib/include/neighbor_list.h | 46 +--------- source/lib/include/prod_env_mat.h | 52 +---------- source/lib/include/prod_env_mat_nvnmd.h | 8 +- source/lib/include/prod_force.h | 26 +----- source/lib/include/prod_force_grad.h | 23 +---- source/lib/include/prod_virial.h | 28 +----- source/lib/include/prod_virial_grad.h | 24 +---- source/lib/include/region.h | 19 +--- source/lib/include/tabulate.h | 112 +----------------------- 12 files changed, 24 insertions(+), 410 deletions(-) diff --git a/source/lib/include/coord.h b/source/lib/include/coord.h index fb60f6440b..699a90898c 100644 --- a/source/lib/include/coord.h +++ b/source/lib/include/coord.h @@ -44,7 +44,7 @@ void compute_cell_info(int* cell_info, const float& rcut, const deepmd::Region& region); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // normalize coords // output: // coord @@ -83,47 +83,6 @@ int copy_coord_gpu(FPTYPE* out_c, const int& total_cellnum, const int* cell_info, const deepmd::Region& region); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -// normalize coords -// output: -// coord -// input: -// natom, box_info: boxt, rec_boxt -template -void normalize_coord_gpu(FPTYPE* coord, - const int natom, - const deepmd::Region& region); - -// copy coordinates -// outputs: -// out_c, out_t, mapping, nall, -// int_data(temp cuda -// memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map, -// cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist -// inputs: -// in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info, -// box_info mem_nall is the size of allocated memory for out_c, out_t, -// mapping -// returns -// 0: succssful -// 1: the memory is not large enough to hold all copied coords and types. -// i.e. nall > mem_nall -template -int copy_coord_gpu(FPTYPE* out_c, - int* out_t, - int* mapping, - int* nall, - int* int_data, - const FPTYPE* in_c, - const int* in_t, - const int& nloc, - const int& mem_nall, - const int& loc_cellnum, - const int& total_cellnum, - const int* cell_info, - const deepmd::Region& region); -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/fmt_nlist.h b/source/lib/include/fmt_nlist.h index 1e7c6574cc..18cb319304 100644 --- a/source/lib/include/fmt_nlist.h +++ b/source/lib/include/fmt_nlist.h @@ -18,7 +18,7 @@ void format_nlist_cpu(int* nlist, const float rcut, const std::vector sec); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void format_nbor_list_gpu(int* nlist, const FPTYPE* coord, @@ -40,31 +40,7 @@ void test_encoding_decoding_nbor_info_gpu(uint_64* key, const FPTYPE* in_dist, const int* in_index, const int size_of_array); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -template -void format_nbor_list_gpu(int* nlist, - const FPTYPE* coord, - const int* type, - const deepmd::InputNlist& gpu_inlist, - int* array_int, - uint_64* array_longlong, - const int max_nbor_size, - const int nloc, - const int nall, - const float rcut, - const std::vector sec); - -template -void test_encoding_decoding_nbor_info_gpu(uint_64* key, - int* out_type, - int* out_index, - const int* in_type, - const FPTYPE* in_dist, - const int* in_index, - const int size_of_array); -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/gelu.h b/source/lib/include/gelu.h index 946c283c8d..013d4ef02b 100644 --- a/source/lib/include/gelu.h +++ b/source/lib/include/gelu.h @@ -20,7 +20,7 @@ void gelu_grad_grad_cpu(FPTYPE* out, const FPTYPE* dy_2, const int_64 size); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size); @@ -36,24 +36,5 @@ void gelu_grad_grad_gpu(FPTYPE* out, const FPTYPE* dy, const FPTYPE* dy_2, const int_64 size); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -template -void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size); - -template -void gelu_grad_gpu(FPTYPE* out, - const FPTYPE* xx, - const FPTYPE* dy, - const int_64 size); - -template -void gelu_grad_grad_gpu(FPTYPE* out, - const FPTYPE* xx, - const FPTYPE* dy, - const FPTYPE* dy_2, - const int_64 size); - -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h index 5ed2dd4501..eb510eb25b 100644 --- a/source/lib/include/neighbor_list.h +++ b/source/lib/include/neighbor_list.h @@ -121,7 +121,7 @@ void use_nlist_map(int* nlist, #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // build neighbor list. // outputs // nlist, max_list_size @@ -162,49 +162,7 @@ void use_nei_info_gpu(int* nlist, const int ntypes, const bool b_nlist_map); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -// build neighbor list. -// outputs -// nlist, max_list_size -// max_list_size is the maximal size of jlist. -// inputs -// c_cpy, nloc, nall, mem_size, rcut, region -// mem_size is the size of allocated memory for jlist. -// returns -// 0: succssful -// 1: the memory is not large enough to hold all neighbors. -// i.e. max_list_size > mem_nall -template -int build_nlist_gpu(InputNlist& nlist, - int* max_list_size, - int* nlist_data, - const FPTYPE* c_cpy, - const int& nloc, - const int& nall, - const int& mem_size, - const float& rcut); -/** - * @brief Filter the fake atom type. - * @details If >=0, set to 0; if <0, set to -1. - * @param ftype_out The output filtered atom type. - * @param ftype_in The input atom type. - * @param nloc The number of atoms. - */ -void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc); - -void use_nei_info_gpu(int* nlist, - int* ntype, - bool* nmask, - const int* type, - const int* nlist_map, - const int nloc, - const int nnei, - const int ntypes, - const bool b_nlist_map); - -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/prod_env_mat.h b/source/lib/include/prod_env_mat.h index 91f09f74e7..60da638d68 100644 --- a/source/lib/include/prod_env_mat.h +++ b/source/lib/include/prod_env_mat.h @@ -42,7 +42,7 @@ void prod_env_mat_r_cpu(FPTYPE *em, const float rcut_smth, const std::vector sec); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void prod_env_mat_a_gpu(FPTYPE *em, FPTYPE *em_deriv, @@ -88,54 +88,6 @@ void env_mat_nbor_update(InputNlist &inlist, int *&nbor_list_dev, const int *mesh, const int size); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -template -void prod_env_mat_a_gpu(FPTYPE *em, - FPTYPE *em_deriv, - FPTYPE *rij, - int *nlist, - const FPTYPE *coord, - const int *type, - const InputNlist &gpu_inlist, - int *array_int, - unsigned long long *array_longlong, - const int max_nbor_size, - const FPTYPE *avg, - const FPTYPE *std, - const int nloc, - const int nall, - const float rcut, - const float rcut_smth, - const std::vector sec, - const int *f_type = NULL); - -template -void prod_env_mat_r_gpu(FPTYPE *em, - FPTYPE *em_deriv, - FPTYPE *rij, - int *nlist, - const FPTYPE *coord, - const int *type, - const InputNlist &gpu_inlist, - int *array_int, - unsigned long long *array_longlong, - const int max_nbor_size, - const FPTYPE *avg, - const FPTYPE *std, - const int nloc, - const int nall, - const float rcut, - const float rcut_smth, - const std::vector sec); - -void env_mat_nbor_update(InputNlist &inlist, - InputNlist &gpu_inlist, - int &max_nbor_size, - int *&nbor_list_dev, - const int *mesh, - const int size); -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/prod_env_mat_nvnmd.h b/source/lib/include/prod_env_mat_nvnmd.h index df70423021..c0a7e32cc4 100644 --- a/source/lib/include/prod_env_mat_nvnmd.h +++ b/source/lib/include/prod_env_mat_nvnmd.h @@ -45,12 +45,8 @@ void prod_env_mat_a_nvnmd_quantize_cpu(FPTYPE* em, const std::vector sec, const int* f_type = NULL); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // UNDEFINE -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -// UNDEFINE -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/prod_force.h b/source/lib/include/prod_force.h index 03c72ba661..b5ae68bdce 100644 --- a/source/lib/include/prod_force.h +++ b/source/lib/include/prod_force.h @@ -67,7 +67,7 @@ void prod_force_r_cpu(FPTYPE* force, const int nnei, const int nframes); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void prod_force_a_gpu(FPTYPE* force, const FPTYPE* net_deriv, @@ -87,28 +87,6 @@ void prod_force_r_gpu(FPTYPE* force, const int nall, const int nnei, const int nframes); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -template -void prod_force_a_gpu(FPTYPE* force, - const FPTYPE* net_deriv, - const FPTYPE* in_deriv, - const int* nlist, - const int nloc, - const int nall, - const int nnei, - const int nframes); - -template -void prod_force_r_gpu(FPTYPE* force, - const FPTYPE* net_deriv, - const FPTYPE* in_deriv, - const int* nlist, - const int nloc, - const int nall, - const int nnei, - const int nframes); -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/prod_force_grad.h b/source/lib/include/prod_force_grad.h index 5d0ab50b68..737d54001d 100644 --- a/source/lib/include/prod_force_grad.h +++ b/source/lib/include/prod_force_grad.h @@ -21,7 +21,7 @@ void prod_force_grad_r_cpu(FPTYPE* grad_net, const int nnei, const int nframes); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void prod_force_grad_a_gpu(FPTYPE* grad_net, const FPTYPE* grad, @@ -39,25 +39,6 @@ void prod_force_grad_r_gpu(FPTYPE* grad_net, const int nloc, const int nnei, const int nframes); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_ROCM -template -void prod_force_grad_a_gpu(FPTYPE* grad_net, - const FPTYPE* grad, - const FPTYPE* env_deriv, - const int* nlist, - const int nloc, - const int nnei, - const int nframes); - -template -void prod_force_grad_r_gpu(FPTYPE* grad_net, - const FPTYPE* grad, - const FPTYPE* env_deriv, - const int* nlist, - const int nloc, - const int nnei, - const int nframes); -#endif // TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/prod_virial.h b/source/lib/include/prod_virial.h index 348188874c..d42b547d32 100644 --- a/source/lib/include/prod_virial.h +++ b/source/lib/include/prod_virial.h @@ -25,7 +25,7 @@ void prod_virial_r_cpu(FPTYPE* virial, const int nall, const int nnei); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void prod_virial_a_gpu(FPTYPE* virial, FPTYPE* atom_virial, @@ -47,30 +47,6 @@ void prod_virial_r_gpu(FPTYPE* virial, const int nloc, const int nall, const int nnei); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -template -void prod_virial_a_gpu(FPTYPE* virial, - FPTYPE* atom_virial, - const FPTYPE* net_deriv, - const FPTYPE* env_deriv, - const FPTYPE* rij, - const int* nlist, - const int nloc, - const int nall, - const int nnei); - -template -void prod_virial_r_gpu(FPTYPE* virial, - FPTYPE* atom_virial, - const FPTYPE* net_deriv, - const FPTYPE* env_deriv, - const FPTYPE* rij, - const int* nlist, - const int nloc, - const int nall, - const int nnei); -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/prod_virial_grad.h b/source/lib/include/prod_virial_grad.h index 6e0c232f8a..eda98f9634 100644 --- a/source/lib/include/prod_virial_grad.h +++ b/source/lib/include/prod_virial_grad.h @@ -21,7 +21,7 @@ void prod_virial_grad_r_cpu(FPTYPE* grad_net, const int nloc, const int nnei); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void prod_virial_grad_a_gpu(FPTYPE* grad_net, const FPTYPE* grad, @@ -39,26 +39,6 @@ void prod_virial_grad_r_gpu(FPTYPE* grad_net, const int* nlist, const int nloc, const int nnei); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -template -void prod_virial_grad_a_gpu(FPTYPE* grad_net, - const FPTYPE* grad, - const FPTYPE* env_deriv, - const FPTYPE* rij, - const int* nlist, - const int nloc, - const int nnei); - -template -void prod_virial_grad_r_gpu(FPTYPE* grad_net, - const FPTYPE* grad, - const FPTYPE* env_deriv, - const FPTYPE* rij, - const int* nlist, - const int nloc, - const int nnei); -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/region.h b/source/lib/include/region.h index 9db2735462..2f6dbbf4e0 100644 --- a/source/lib/include/region.h +++ b/source/lib/include/region.h @@ -27,7 +27,7 @@ void convert_to_phys_cpu(FPTYPE* rp, const Region& region, const FPTYPE* ri); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // only for unittest template void convert_to_inter_gpu(FPTYPE* ri, @@ -41,21 +41,6 @@ void convert_to_phys_gpu(FPTYPE* rp, template void volume_gpu(FPTYPE* volume, const Region& region); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_ROCM -// only for unittest -template -void convert_to_inter_gpu(FPTYPE* ri, - const Region& region, - const FPTYPE* rp); - -template -void convert_to_phys_gpu(FPTYPE* rp, - const Region& region, - const FPTYPE* ri); - -template -void volume_gpu(FPTYPE* volume, const Region& region); -#endif // TENSORFLOW_USE_ROCM } // namespace deepmd diff --git a/source/lib/include/tabulate.h b/source/lib/include/tabulate.h index 96072e6a33..76a46bbe6c 100644 --- a/source/lib/include/tabulate.h +++ b/source/lib/include/tabulate.h @@ -108,7 +108,7 @@ void tabulate_fusion_se_r_grad_grad_cpu(FPTYPE* dz_dy, const int nnei, const int last_layer_size); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template void tabulate_fusion_se_a_gpu(FPTYPE* out, const FPTYPE* table, @@ -213,113 +213,5 @@ void tabulate_fusion_se_r_grad_grad_gpu(FPTYPE* dz_dy, const int nloc, const int nnei, const int last_layer_size); -#endif // GOOGLE_CUDA - -#if TENSORFLOW_USE_ROCM -template -void tabulate_fusion_se_a_gpu(FPTYPE* out, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em_x, - const FPTYPE* em, - const FPTYPE* two_embed, - const int nloc, - const int nnei, - const int last_layer_size, - const bool is_sorted = true); - -template -void tabulate_fusion_se_a_grad_gpu(FPTYPE* dy_dem_x, - FPTYPE* dy_dem, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em_x, - const FPTYPE* em, - const FPTYPE* two_embed, - const FPTYPE* dy, - const int nloc, - const int nnei, - const int last_layer_size, - const bool is_sorted = true); - -template -void tabulate_fusion_se_a_grad_grad_gpu(FPTYPE* dz_dy, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em_x, - const FPTYPE* em, - const FPTYPE* dz_dy_dem_x, - const FPTYPE* dz_dy_dem, - const int nloc, - const int nnei, - const int last_layer_size, - const bool is_sorted = true); - -template -void tabulate_fusion_se_t_gpu(FPTYPE* out, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em_x, - const FPTYPE* em, - const int nloc, - const int nnei_i, - const int nnei_j, - const int last_layer_size); - -template -void tabulate_fusion_se_t_grad_gpu(FPTYPE* dy_dem_x, - FPTYPE* dy_dem, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em_x, - const FPTYPE* em, - const FPTYPE* dy, - const int nloc, - const int nnei_i, - const int nnei_j, - const int last_layer_size); - -template -void tabulate_fusion_se_t_grad_grad_gpu(FPTYPE* dz_dy, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em_x, - const FPTYPE* em, - const FPTYPE* dz_dy_dem_x, - const FPTYPE* dz_dy_dem, - const int nloc, - const int nnei_i, - const int nnei_j, - const int last_layer_size); - -template -void tabulate_fusion_se_r_gpu(FPTYPE* out, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em, - const int nloc, - const int nnei, - const int last_layer_size); - -template -void tabulate_fusion_se_r_grad_gpu(FPTYPE* dy_dem, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em, - const FPTYPE* dy, - const int nloc, - const int nnei, - const int last_layer_size); - -template -void tabulate_fusion_se_r_grad_grad_gpu(FPTYPE* dz_dy, - const FPTYPE* table, - const FPTYPE* table_info, - const FPTYPE* em, - const FPTYPE* dz_dy_dem, - const int nloc, - const int nnei, - const int last_layer_size); - -#endif // TENSORFLOW_USE_ROCM +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace deepmd