Skip to content

Commit

Permalink
initializes textures when preparing gpu arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
danielpeter committed Nov 16, 2015
1 parent 1a74bb9 commit bb31d9f
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 144 deletions.
90 changes: 11 additions & 79 deletions src/cuda/compute_forces_acoustic_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,10 @@ template<> __device__ float texfetch_potential_dot_dot<3>(int x) { return tex1Df
#endif

#ifdef USE_TEXTURES_CONSTANTS
realw_texture d_hprime_xx_tex;
//realw_texture d_hprimewgll_xx_tex;
realw_texture d_wxgll_xx_tex;
// already defined in compute_forces_viscoelastic_cuda.cu
extern realw_texture d_hprime_xx_tex;
//extern realw_texture d_hprimewgll_xx_tex;
extern realw_texture d_wxgll_xx_tex;
#endif


Expand Down Expand Up @@ -454,73 +455,6 @@ void FC_FUNC_(compute_forces_acoustic_cuda,
//double start_time = get_time();

Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper


if(mp->simulation_type==3){
#ifdef USE_TEXTURES_FIELDS
{
int size = mp->NGLOB_AB;
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_b_potential_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_potential_tex_ref_ptr, "d_b_potential_tex"), 3001);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_potential_tex_ref_ptr, mp->d_b_potential_acoustic, &channelDesc, sizeof(realw)*size), 3001);

const textureReference* d_b_potential_dot_dot_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_potential_dot_dot_tex_ref_ptr, "d_b_potential_dot_dot_tex"),3003);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_potential_dot_dot_tex_ref_ptr, mp->d_b_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 3003);
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_b_potential_tex, mp->d_b_potential_acoustic, &channelDesc, sizeof(realw)*size), 3001);
print_CUDA_error_if_any(cudaBindTexture(0, &d_b_potential_dot_dot_tex, mp->d_b_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 3003);
#endif
}
#endif

}

#ifdef USE_TEXTURES_FIELDS
{
int size = mp->NGLOB_AB;
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_potential_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_potential_tex_ref_ptr, "d_potential_tex"), 2001);
print_CUDA_error_if_any(cudaBindTexture(0, d_potential_tex_ref_ptr, mp->d_potential_acoustic, &channelDesc, sizeof(realw)*size), 2001);

const textureReference* d_potential_dot_dot_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_potential_dot_dot_tex_ref_ptr, "d_potential_dot_dot_tex"), 2003);
print_CUDA_error_if_any(cudaBindTexture(0, d_potential_dot_dot_tex_ref_ptr, mp->d_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 2003);
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_potential_tex, mp->d_potential_acoustic, &channelDesc, sizeof(realw)*size), 2001);
print_CUDA_error_if_any(cudaBindTexture(0, &d_potential_dot_dot_tex, mp->d_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 2003);
#endif
}
#endif



#ifdef USE_TEXTURES_CONSTANTS
{
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_hprime_xx_tex_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_hprime_xx_tex_ptr, "d_hprime_xx_tex"), 4101);
print_CUDA_error_if_any(cudaBindTexture(0, d_hprime_xx_tex_ptr, mp->d_hprime_xx, &channelDesc, sizeof(realw)*(NGLL2)), 4001);
const textureReference* d_wxgll_xx_tex_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_wxgll_xx_tex_ptr, "d_wxgll_xx_tex"), 4101);
print_CUDA_error_if_any(cudaBindTexture(0, d_wxgll_xx_tex_ptr, mp->d_wxgll, &channelDesc, sizeof(realw)*(NGLL2)), 4001);
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
// print_CUDA_error_if_any(cudaBindTexture(0, &d_hprimewgll_xx_tex, mp->d_hprimewgll_xx, &channelDesc, sizeof(realw)*(NGLL2)), 40010);
print_CUDA_error_if_any(cudaBindTexture(0, &d_hprime_xx_tex, mp->d_hprime_xx, &channelDesc, sizeof(realw)*(NGLL2)), 4002);
print_CUDA_error_if_any(cudaBindTexture(0, &d_wxgll_xx_tex, mp->d_wxgll, &channelDesc, sizeof(realw)*(NGLLX)), 40013);
#endif
}
#endif


int num_elements;

if( *iphase == 1 )
Expand All @@ -530,15 +464,13 @@ if(mp->simulation_type==3){

if( num_elements == 0 ) return;

// no mesh coloring: uses atomic updates
Kernel_2_acoustic(num_elements, mp, *iphase,
mp->d_ibool,
mp->d_xix,mp->d_xiz,
mp->d_gammax,mp->d_gammaz,
mp->d_rhostore,
mp->d_kappastore);


// no mesh coloring: uses atomic updates
Kernel_2_acoustic(num_elements, mp, *iphase,
mp->d_ibool,
mp->d_xix,mp->d_xiz,
mp->d_gammax,mp->d_gammaz,
mp->d_rhostore,
mp->d_kappastore);
}


Expand Down
72 changes: 7 additions & 65 deletions src/cuda/compute_forces_viscoelastic_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@

#ifdef USE_TEXTURES_FIELDS

realw_texture d_displ_tex;
realw_texture d_accel_tex;
// backward/reconstructed
realw_texture d_b_displ_tex;
realw_texture d_b_accel_tex;
realw_texture d_displ_tex;
realw_texture d_accel_tex;
// backward/reconstructed
realw_texture d_b_displ_tex;
realw_texture d_b_accel_tex;

//note: texture variables are implicitly static, and cannot be passed as arguments to cuda kernels;
// thus, 1) we thus use if-statements (FORWARD_OR_ADJOINT) to determine from which texture to fetch from
Expand All @@ -83,7 +83,7 @@ template<> __device__ float texfetch_accel<3>(int x) { return tex1Dfetch(d_b_acc

#ifdef USE_TEXTURES_CONSTANTS
realw_texture d_hprime_xx_tex;
realw_texture d_hprimewgll_xx_tex;
//realw_texture d_hprimewgll_xx_tex;
realw_texture d_wxgll_xx_tex;
#endif

Expand Down Expand Up @@ -1063,64 +1063,6 @@ void FC_FUNC_(compute_forces_viscoelastic_cuda,
//double start_time = get_time();

Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper


#ifdef USE_TEXTURES_FIELDS
{
int size = NDIM * mp->NGLOB_AB;
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_displ_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_displ_tex_ref_ptr, "d_displ_tex"), 4001);
print_CUDA_error_if_any(cudaBindTexture(0, d_displ_tex_ref_ptr, mp->d_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ){
const textureReference* d_accel_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_accel_tex_ref_ptr, "d_accel_tex"), 4003);
print_CUDA_error_if_any(cudaBindTexture(0, d_accel_tex_ref_ptr, mp->d_accel, &channelDesc, sizeof(realw)*size), 4003);
}
if(mp->simulation_type == 3) {
const textureReference* d_b_displ_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_displ_tex_ref_ptr, "d_b_displ_tex"), 4001);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_displ_tex_ref_ptr, mp->d_b_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ){
const textureReference* d_b_accel_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_accel_tex_ref_ptr, "d_b_accel_tex"), 4003);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_accel_tex_ref_ptr, mp->d_b_accel, &channelDesc, sizeof(realw)*size), 4003);
}}
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_displ_tex, mp->d_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ) print_CUDA_error_if_any(cudaBindTexture(0, &d_accel_tex, mp->d_accel, &channelDesc, sizeof(realw)*size), 4003);

if(mp->simulation_type == 3) {
print_CUDA_error_if_any(cudaBindTexture(0, &d_b_displ_tex, mp->d_b_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ) print_CUDA_error_if_any(cudaBindTexture(0, &d_b_accel_tex, mp->d_b_accel, &channelDesc, sizeof(realw)*size), 4003);
}
#endif
}
#endif


#ifdef USE_TEXTURES_CONSTANTS
{
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_hprime_xx_tex_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_hprime_xx_tex_ptr, "d_hprime_xx_tex"), 4101);
print_CUDA_error_if_any(cudaBindTexture(0, d_hprime_xx_tex_ptr, mp->d_hprime_xx, &channelDesc, sizeof(realw)*(NGLL2)), 4001);
const textureReference* d_wxgll_xx_tex_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_wxgll_xx_tex_ptr, "d_wxgll_xx_tex"), 4101);
print_CUDA_error_if_any(cudaBindTexture(0, d_wxgll_xx_tex_ptr, mp->d_wxgll, &channelDesc, sizeof(realw)*(NGLL2)), 4001);
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_hprime_xx_tex, mp->d_hprime_xx, &channelDesc, sizeof(realw)*(NGLL2)), 4001);
print_CUDA_error_if_any(cudaBindTexture(0, &d_wxgll_xx_tex, mp->d_wxgll, &channelDesc, sizeof(realw)*(NGLLX)), 40013);
// print_CUDA_error_if_any(cudaBindTexture(0, &d_hprimewgll_xx_tex, mp->d_hprimewgll_xx, &channelDesc, sizeof(realw)*(NGLL2)), 40010);

#endif
}
#endif

int num_elements;

if( *iphase == 1 )
Expand Down Expand Up @@ -1194,7 +1136,7 @@ void FC_FUNC_(compute_forces_viscoelastic_cuda,

}else{
// no mesh coloring: uses atomic updates
Kernel_2(num_elements,mp,*iphase,*deltat,*ANISOTROPY,
Kernel_2(num_elements,mp,*iphase,*deltat,*ANISOTROPY,
mp->d_ibool,
mp->d_xix,mp->d_xiz,
mp->d_gammax,mp->d_gammaz,
Expand Down
127 changes: 127 additions & 0 deletions src/cuda/prepare_mesh_constants_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,30 @@
#include "prepare_constants_cuda.h"


#ifdef USE_OLDER_CUDA4_GPU
#else
#ifdef USE_TEXTURES_FIELDS
// elastic
extern realw_texture d_displ_tex;
extern realw_texture d_accel_tex;
// backward/reconstructed
extern realw_texture d_b_displ_tex;
extern realw_texture d_b_accel_tex;
// acoustic
extern realw_texture d_potential_tex;
extern realw_texture d_potential_dot_dot_tex;
// backward/reconstructed
extern realw_texture d_b_potential_tex;
extern realw_texture d_b_potential_dot_dot_tex;
#endif
#ifdef USE_TEXTURES_CONSTANTS
extern realw_texture d_hprime_xx_tex;
//extern realw_texture d_hprimewgll_xx_tex;
extern realw_texture d_wxgll_xx_tex;
#endif
#endif


/* ----------------------------------------------------------------------------------------------- */

// helper functions
Expand Down Expand Up @@ -203,6 +227,33 @@ void FC_FUNC_(prepare_constants_device,

//setConst_hprimewgll_zz(h_hprimewgll_zz,mp); // only needed if NGLLX != NGLLY != NGLLZ

// Using texture memory for the hprime-style constants is slower on
// Fermi generation hardware, but *may* be faster on Kepler
// generation. We will reevaluate this again, so might as well leave
// in the code with with #USE_TEXTURES_FIELDS not-defined.
#ifdef USE_TEXTURES_CONSTANTS
{
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();

const textureReference* d_hprime_xx_tex_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_hprime_xx_tex_ptr, "d_hprime_xx_tex"), 4101);
print_CUDA_error_if_any(cudaBindTexture(0, d_hprime_xx_tex_ptr, mp->d_hprime_xx, &channelDesc, sizeof(realw)*(NGLL2)), 4001);

const textureReference* d_wxgll_xx_tex_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_wxgll_xx_tex_ptr, "d_wxgll_xx_tex"), 4102);
print_CUDA_error_if_any(cudaBindTexture(0, d_wxgll_xx_tex_ptr, mp->d_wxgll, &channelDesc, sizeof(realw)*(NGLL2)), 4002);

#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();

print_CUDA_error_if_any(cudaBindTexture(0, &d_hprime_xx_tex, mp->d_hprime_xx, &channelDesc, sizeof(realw)*(NGLL2)), 4001);
print_CUDA_error_if_any(cudaBindTexture(0, &d_wxgll_xx_tex, mp->d_wxgll, &channelDesc, sizeof(realw)*(NGLLX)), 40013);
//print_CUDA_error_if_any(cudaBindTexture(0, &d_hprimewgll_xx_tex, mp->d_hprimewgll_xx, &channelDesc, sizeof(realw)*(NGLL2)), 40010);
#endif
}
#endif


// mesh
// Assuming NGLLX=5. Padded is then 32 (5^2+3)
Expand Down Expand Up @@ -390,6 +441,25 @@ void FC_FUNC_(prepare_fields_acoustic_device,
//print_CUDA_error_if_any(cudaMemset(mp->d_potential_dot_acoustic,0,sizeof(realw)*size),2007);
//print_CUDA_error_if_any(cudaMemset(mp->d_potential_dot_dot_acoustic,0,sizeof(realw)*size),2007);

#ifdef USE_TEXTURES_FIELDS
{
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_potential_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_potential_tex_ref_ptr, "d_potential_tex"), 2001);
print_CUDA_error_if_any(cudaBindTexture(0, d_potential_tex_ref_ptr, mp->d_potential_acoustic, &channelDesc, sizeof(realw)*size), 2001);

const textureReference* d_potential_dot_dot_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_potential_dot_dot_tex_ref_ptr, "d_potential_dot_dot_tex"), 2003);
print_CUDA_error_if_any(cudaBindTexture(0, d_potential_dot_dot_tex_ref_ptr, mp->d_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 2003);
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_potential_tex, mp->d_potential_acoustic, &channelDesc, sizeof(realw)*size), 2001);
print_CUDA_error_if_any(cudaBindTexture(0, &d_potential_dot_dot_tex, mp->d_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 2003);
#endif
}
#endif

// mpi buffer
mp->size_mpi_buffer_potential = (mp->num_interfaces_ext_mesh) * (mp->max_nibool_interfaces_ext_mesh);
if( mp->size_mpi_buffer_potential > 0 ){
Expand Down Expand Up @@ -496,6 +566,24 @@ void FC_FUNC_(prepare_fields_acoustic_adj_dev,
print_CUDA_error_if_any(cudaMemset(mp->d_b_potential_dot_dot_acoustic,0,sizeof(realw)*size),3007);


#ifdef USE_TEXTURES_FIELDS
{
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_b_potential_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_potential_tex_ref_ptr, "d_b_potential_tex"), 3001);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_potential_tex_ref_ptr, mp->d_b_potential_acoustic, &channelDesc, sizeof(realw)*size), 3001);

const textureReference* d_b_potential_dot_dot_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_potential_dot_dot_tex_ref_ptr, "d_b_potential_dot_dot_tex"),3003);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_potential_dot_dot_tex_ref_ptr, mp->d_b_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 3003);
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_b_potential_tex, mp->d_b_potential_acoustic, &channelDesc, sizeof(realw)*size), 3001);
print_CUDA_error_if_any(cudaBindTexture(0, &d_b_potential_dot_dot_tex, mp->d_b_potential_dot_dot_acoustic, &channelDesc, sizeof(realw)*size), 3003);
#endif
}
#endif

// allocates kernels
size = NGLL2*mp->NSPEC_AB;
Expand Down Expand Up @@ -575,6 +663,26 @@ void FC_FUNC_(prepare_fields_elastic_device,
//print_CUDA_error_if_any(cudaMemset(mp->d_veloc,0,sizeof(realw)*size),4007);
//print_CUDA_error_if_any(cudaMemset(mp->d_accel,0,sizeof(realw)*size),4007);

#ifdef USE_TEXTURES_FIELDS
{
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_displ_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_displ_tex_ref_ptr, "d_displ_tex"), 4001);
print_CUDA_error_if_any(cudaBindTexture(0, d_displ_tex_ref_ptr, mp->d_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ){
const textureReference* d_accel_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_accel_tex_ref_ptr, "d_accel_tex"), 4003);
print_CUDA_error_if_any(cudaBindTexture(0, d_accel_tex_ref_ptr, mp->d_accel, &channelDesc, sizeof(realw)*size), 4003);
}
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_displ_tex, mp->d_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ) print_CUDA_error_if_any(cudaBindTexture(0, &d_accel_tex, mp->d_accel, &channelDesc, sizeof(realw)*size), 4003);
#endif
}
#endif


// debug
//synchronize_mpi();
Expand Down Expand Up @@ -775,6 +883,25 @@ void FC_FUNC_(prepare_fields_elastic_adj_dev,
//print_CUDA_error_if_any(cudaMemset(mp->d_b_veloc,0,sizeof(realw)*size),5207);
//print_CUDA_error_if_any(cudaMemset(mp->d_b_accel,0,sizeof(realw)*size),5207);

#ifdef USE_TEXTURES_FIELDS
{
#ifdef USE_OLDER_CUDA4_GPU
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
const textureReference* d_b_displ_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_displ_tex_ref_ptr, "d_b_displ_tex"), 4001);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_displ_tex_ref_ptr, mp->d_b_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ){
const textureReference* d_b_accel_tex_ref_ptr;
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_accel_tex_ref_ptr, "d_b_accel_tex"), 4003);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_accel_tex_ref_ptr, mp->d_b_accel, &channelDesc, sizeof(realw)*size), 4003);
}
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
print_CUDA_error_if_any(cudaBindTexture(0, &d_b_displ_tex, mp->d_b_displ, &channelDesc, sizeof(realw)*size), 4001);
if( mp->use_mesh_coloring_gpu ) print_CUDA_error_if_any(cudaBindTexture(0, &d_b_accel_tex, mp->d_b_accel, &channelDesc, sizeof(realw)*size), 4003);
#endif
}
#endif

// anisotropic/isotropic kernels
// debug
Expand Down

0 comments on commit bb31d9f

Please sign in to comment.