Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fesom tracer sesh #551

Draft
wants to merge 8 commits into
base: lumi_gpu_evatali
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions env/levante.dkrz.de/shell.nvhpc
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ export CPU_MODEL=AMD_EPYC_ZEN3
module --force purge
# module load intel-oneapi-compilers/2022.0.1-gcc-11.2.0
# module load openmpi/4.1.2-intel-2021.5.0
module load nvhpc/22.5-gcc-11.2.0
module load openmpi/.4.1.4-nvhpc-22.5
module load nvhpc/23.9-gcc-11.2.0
module load openmpi/4.1.6-nvhpc-23.9
export FC=mpif90 CC=mpicc CXX=mpicxx;

module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0
Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ elseif(${FESOM_PLATFORM_STRATEGY} STREQUAL albedo )
message(STATUS "multithreading disabled for Albedo") # multithreading suddenly produces an error, disable it until a fix is found. issue #413
option(DISABLE_MULTITHREADING "disable asynchronous operations" ON)
else()
option(DISABLE_MULTITHREADING "disable asynchronous operations" OFF)
option(DISABLE_MULTITHREADING "disable asynchronous operations" ON)
endif()

option(ENABLE_OPENACC "compile with OpenACC support" OFF)
option(DISABLE_OPENACC_ATOMICS "disable kernels using atomic statement for reproducible results" ON)
set(GPU_COMPUTE_CAPABILITY "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)")
set(GPU_FLAGS "cuda11.7,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)")
set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)")

option(ENABLE_OPENMP "build FESOM with OpenMP" OFF)
if(${ENABLE_OPENMP})
Expand Down
2 changes: 2 additions & 0 deletions src/fesom_module.F90
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ subroutine fesom_runloop(current_nsteps)
!$ACC CREATE (f%tracers%work%adv_flux_hor, f%tracers%work%adv_flux_ver, f%tracers%work%fct_LO) &
!$ACC CREATE (f%tracers%work%del_ttf_advvert, f%tracers%work%del_ttf_advhoriz, f%tracers%work%edge_up_dn_grad) &
!$ACC CREATE (f%tracers%work%del_ttf)
!$ACC DATA CREATE(tr_xy, tr_z, relax2clim, Sclim, Tclim)
do n=nstart, ntotal
if (use_global_tides) then
call foreph(f%partit, f%mesh)
Expand Down Expand Up @@ -465,6 +466,7 @@ subroutine fesom_runloop(current_nsteps)
!$ACC EXIT DATA DELETE (f%tracers%work%adv_flux_hor, f%tracers%work%adv_flux_ver, f%tracers%work%fct_LO)
!$ACC EXIT DATA DELETE (f%tracers%work%del_ttf_advvert, f%tracers%work%del_ttf_advhoriz, f%tracers%work%edge_up_dn_grad)
!$ACC EXIT DATA DELETE (f%tracers%work%del_ttf)
!$ACC END DATA
!$ACC EXIT DATA DELETE (f%tracers%data, f%tracers%work)
!$ACC EXIT DATA DELETE (f%dynamics%w, f%dynamics%w_e, f%dynamics%uv)

Expand Down
7 changes: 4 additions & 3 deletions src/oce_ale_tracer.F90
Original file line number Diff line number Diff line change
Expand Up @@ -198,17 +198,18 @@ subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh)
! do tracer AB (Adams-Bashfort) interpolation only for advectiv part
! needed
if (flag_debug .and. mype==0) print *, achar(27)//'[37m'//' --> call init_tracers_AB'//achar(27)//'[0m'
!$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB)
call init_tracers_AB(tr_num, tracers, partit, mesh)

! advect tracers
if (flag_debug .and. mype==0) print *, achar(27)//'[37m'//' --> call adv_tracers_ale'//achar(27)//'[0m'


!here update only those initialized in the init_tracers. (values, valuesAB, edge_up_dn_grad, ...)
!$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) &
!$ACC DEVICE(tracers%work%edge_up_dn_grad) !!&
!!$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) &
!!$ACC DEVICE(tracers%work%edge_up_dn_grad) !!&
! it will update del_ttf with contributions from horizontal and vertical advection parts (del_ttf_advhoriz and del_ttf_advvert)
!$ACC wait(1)
!!$ACC wait(1)
call do_oce_adv_tra(dt, UV, Wvel, Wvel_i, Wvel_e, tr_num, dynamics, tracers, partit, mesh)


Expand Down
61 changes: 55 additions & 6 deletions src/oce_tracer_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh)
type(t_tracer), intent(inout), target :: tracers
integer :: n,nz

!$ACC parallel loop collapse(2) default(present) async(1)
#ifdef ENABLE_OPENACC
!$ACC parallel loop collapse(2)
#endif
do n=1, partit%myDim_nod2D+partit%eDim_nod2D
do nz=1, mesh%nl-1
! del_ttf will contain all advection / diffusion contributions for this tracer. Set it to 0 at the beginning!
Expand All @@ -34,32 +36,45 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh)
tracers%work%del_ttf_advvert (nz, n) = 0.0_WP
end do
end do
#ifdef ENABLE_OPENACC
!$ACC end parallel loop
#endif

#ifndef ENABLE_OPENACC
!$OMP PARALLEL DO
#else
!$ACC parallel loop collapse(2)
#endif
do n=1, partit%myDim_nod2D+partit%eDim_nod2D
! AB interpolation
tracers%data(tr_num)%valuesAB(:, n) =-(0.5_WP+epsilon)*tracers%data(tr_num)%valuesAB(:, n)+(1.5_WP+epsilon)*tracers%data(tr_num)%values(:, n)
do nz = 1, mesh%nl
tracers%data(tr_num)%valuesAB(nz, n) =-(0.5_WP+epsilon)*tracers%data(tr_num)%valuesAB(nz, n)+(1.5_WP+epsilon)*tracers%data(tr_num)%values(nz, n)
end do
end do
#ifndef ENABLE_OPENACC
!$OMP END PARALLEL DO
#else
!$ACC end parallel loop
#endif

if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call tracer_gradient_elements'//achar(27)//'[0m'
call tracer_gradient_elements(tracers%data(tr_num)%valuesAB, partit, mesh)
call exchange_elem_begin(tr_xy, partit)
call exchange_elem_begin(tr_xy, partit, luse_g2g = .true.)

if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call tracer_gradient_z'//achar(27)//'[0m'
call tracer_gradient_z(tracers%data(tr_num)%values, partit, mesh) !WHY NOT AB HERE? DSIDOREN!
call exchange_elem_end(partit) ! tr_xy used in fill_up_dn_grad
!$OMP BARRIER

call exchange_nod_begin(tr_z, partit) ! not used in fill_up_dn_grad
call exchange_nod_begin(tr_z, partit, luse_g2g = .true.) ! not used in fill_up_dn_grad

if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call fill_up_dn_grad'//achar(27)//'[0m'
call fill_up_dn_grad(tracers%work, partit, mesh)
call exchange_nod_end(partit) ! tr_z halos should have arrived by now.

if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call tracer_gradient_elements'//achar(27)//'[0m'
call tracer_gradient_elements(tracers%data(tr_num)%values, partit, mesh) !redefine tr_arr to the current timestep
call exchange_elem(tr_xy, partit)
call exchange_elem(tr_xy, partit, luse_g2g = .true.)

END SUBROUTINE init_tracers_AB
!
Expand All @@ -85,7 +100,11 @@ SUBROUTINE tracer_gradient_elements(ttf, partit, mesh)
#include "associate_mesh_def.h"
#include "associate_part_ass.h"
#include "associate_mesh_ass.h"
#ifndef ENABLE_OPENACC
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(elem, elnodes, nz, nzmin, nzmax)
#else
!$ACC parallel loop private(elnodes)
#endif
DO elem=1, myDim_elem2D
elnodes=elem2D_nodes(:,elem)
nzmin = ulevels(elem)
Expand All @@ -96,7 +115,11 @@ SUBROUTINE tracer_gradient_elements(ttf, partit, mesh)
tr_xy(2,nz, elem)=sum(gradient_sca(4:6,elem)*ttf(nz,elnodes))
END DO
END DO
#ifndef ENABLE_OPENACC
!$OMP END PARALLEL DO
#else
!$ACC end parallel loop
#endif
END SUBROUTINE tracer_gradient_elements
!
!
Expand All @@ -121,7 +144,11 @@ SUBROUTINE tracer_gradient_z(ttf, partit, mesh)
#include "associate_mesh_def.h"
#include "associate_part_ass.h"
#include "associate_mesh_ass.h"
#ifndef ENABLE_OPENACC
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nz, nzmin, nzmax, dz)
#else
!$ACC parallel loop
#endif
DO n=1, myDim_nod2D+eDim_nod2D
!!PS nlev=nlevels_nod2D(n)
nzmax=nlevels_nod2D(n)
Expand All @@ -136,7 +163,11 @@ SUBROUTINE tracer_gradient_z(ttf, partit, mesh)
tr_z(nzmin, n)=0.0_WP
tr_z(nzmax, n)=0.0_WP
END DO
#ifndef ENABLE_OPENACC
!$OMP END PARALLEL DO
#else
!$ACC end parallel loop
#endif
END SUBROUTINE tracer_gradient_z
!
!
Expand Down Expand Up @@ -164,7 +195,12 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh)
trarr=>tracers%data(tr_num)%values(:,:)

if ((clim_relax>1.0e-8_WP).and.(tracers%data(tr_num)%ID==1)) then
#ifndef ENABLE_OPENACC
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nzmin, nzmax)
#else
!$ACC update device(relax2clim, Tclim)
!$ACC parallel loop
#endif
DO n=1, myDim_nod2D
nzmin = ulevels_nod2D(n)
nzmax = nlevels_nod2D(n)
Expand All @@ -173,17 +209,30 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh)
trarr(nzmin:nzmax-1,n)=trarr(nzmin:nzmax-1,n)+&
relax2clim(n)*dt*(Tclim(nzmin:nzmax-1,n)-trarr(nzmin:nzmax-1,n))
END DO
#ifndef ENABLE_OPENACC
!$OMP END PARALLEL DO
#else
!$ACC end parallel loop
#endif
END if
if ((clim_relax>1.0e-8_WP).and.(tracers%data(tr_num)%ID==2)) then
#ifndef ENABLE_OPENACC
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nzmin, nzmax)
#else
!$ACC update device(Sclim)
!$ACC parallel loop
#endif
DO n=1, myDim_nod2D
nzmin = ulevels_nod2D(n)
nzmax = nlevels_nod2D(n)
trarr(nzmin:nzmax-1,n)=trarr(nzmin:nzmax-1,n)+&
relax2clim(n)*dt*(Sclim(nzmin:nzmax-1,n)-trarr(nzmin:nzmax-1,n))
END DO
#ifndef ENABLE_OPENACC
!$OMP END PARALLEL DO
#else
!$ACC end parallel loop
#endif
END IF
END SUBROUTINE relax_to_clim
END MODULE o_tracers
END MODULE o_tracers
Loading