Skip to content

Commit

Permalink
(*)Parenthesize parameterization squares for FMAs
Browse files Browse the repository at this point in the history
  Added parentheses to 29 sums of squares of velocity or other vector components
used in parameterizations in 9 modules to give rotationally consistent solutions
when fused-multiply-adds are enabled.  All answers are bitwise identical in
cases without FMAs, but answers could change when FMAs are enabled.
  • Loading branch information
Hallberg-NOAA committed Mar 18, 2024
1 parent 5467616 commit 720b81f
Show file tree
Hide file tree
Showing 9 changed files with 30 additions and 30 deletions.
2 changes: 1 addition & 1 deletion src/diagnostics/MOM_sum_output.F90
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ subroutine write_energy(u, v, h, tv, day, n, G, GV, US, CS, tracer_CSp, dt_forci
tmp1(:,:,:) = 0.0
do k=1,nz ; do j=js,je ; do i=is,ie
tmp1(i,j,k) = (0.25 * KE_scale_factor * (areaTm(i,j) * h(i,j,k))) * &
((u(I-1,j,k)**2 + u(I,j,k)**2) + (v(i,J-1,k)**2 + v(i,J,k)**2))
(((u(I-1,j,k)**2) + (u(I,j,k)**2)) + ((v(i,J-1,k)**2) + (v(i,J,k)**2)))
enddo ; enddo ; enddo

KE_tot = reproducing_sum(tmp1, isr, ier, jsr, jer, sums=KE)
Expand Down
26 changes: 13 additions & 13 deletions src/parameterizations/lateral/MOM_MEKE.F90
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,10 @@ subroutine step_forward_MEKE(MEKE, h, SN_u, SN_v, visc, dt, G, GV, US, CS, hu, h
!$OMP parallel do default(shared)
do j=js,je ; do i=is,ie
drag_rate_visc(i,j) = (0.25*G%IareaT(i,j) * &
((G%areaCu(I-1,j)*drag_vel_u(I-1,j) + &
G%areaCu(I,j)*drag_vel_u(I,j)) + &
(G%areaCv(i,J-1)*drag_vel_v(i,J-1) + &
G%areaCv(i,J)*drag_vel_v(i,J)) ) )
(((G%areaCu(I-1,j)*drag_vel_u(I-1,j)) + &
(G%areaCu(I,j)*drag_vel_u(I,j))) + &
((G%areaCv(i,J-1)*drag_vel_v(i,J-1)) + &
(G%areaCv(i,J)*drag_vel_v(i,J))) ) )
enddo ; enddo
else
!$OMP parallel do default(shared)
Expand Down Expand Up @@ -821,8 +821,8 @@ subroutine MEKE_equilibrium(CS, MEKE, G, GV, US, SN_u, SN_v, drag_rate_visc, I_m
(depth_tot(i,j)-depth_tot(i,j-1)) * G%IdyCv(i,J-1) &
/ max(depth_tot(i,j), depth_tot(i,j-1), h_neglect) )
endif
beta = sqrt((G%dF_dx(i,j) + beta_topo_x)**2 + &
(G%dF_dy(i,j) + beta_topo_y)**2 )
beta = sqrt(((G%dF_dx(i,j) + beta_topo_x)**2) + &
((G%dF_dy(i,j) + beta_topo_y)**2) )

if (KhCoeff*SN*I_mass(i,j)>0.) then
! Solve resid(E) = 0, where resid = Kh(E) * (SN)^2 - damp_rate(E) E
Expand Down Expand Up @@ -1001,8 +1001,8 @@ subroutine MEKE_lengthScales(CS, MEKE, G, GV, US, SN_u, SN_v, EKE, depth_tot, &
(depth_tot(i,j)-depth_tot(i,j-1)) * G%IdyCv(i,J-1) &
/ max(depth_tot(i,j), depth_tot(i,j-1), h_neglect) )
endif
beta = sqrt((G%dF_dx(i,j) + beta_topo_x)**2 + &
(G%dF_dy(i,j) + beta_topo_y)**2 )
beta = sqrt(((G%dF_dx(i,j) + beta_topo_x)**2) + &
((G%dF_dy(i,j) + beta_topo_y)**2) )

else
beta = 0.
Expand Down Expand Up @@ -1618,9 +1618,9 @@ subroutine ML_MEKE_calculate_features(G, GV, US, CS, Rd_dx_h, u, v, tv, h, dt, f
endif

! Calculate mean kinetic energy
u_t = a_e*u(I,j,1)+a_w*u(I-1,j,1)
v_t = a_n*v(i,J,1)+a_s*v(i,J-1,1)
mke(i,j) = 0.5*( u_t*u_t + v_t*v_t )
u_t = (a_e*u(I,j,1)) + (a_w*u(I-1,j,1))
v_t = (a_n*v(i,J,1)) + (a_s*v(i,J-1,1))
mke(i,j) = 0.5*( (u_t*u_t) + (v_t*v_t) )

! Calculate the magnitude of the slope
slope_t = slope_x_vert_avg(I,j)*a_e+slope_x_vert_avg(I-1,j)*a_w
Expand All @@ -1632,8 +1632,8 @@ subroutine ML_MEKE_calculate_features(G, GV, US, CS, Rd_dx_h, u, v, tv, h, dt, f

! Calculate relative vorticity
do J=Jsq-1,Jeq+1 ; do I=Isq-1,Ieq+1
dvdx = (v(i+1,J,1)*G%dyCv(i+1,J) - v(i,J,1)*G%dyCv(i,J))
dudy = (u(I,j+1,1)*G%dxCu(I,j+1) - u(I,j,1)*G%dxCu(I,j))
dvdx = ((v(i+1,J,1)*G%dyCv(i+1,J)) - (v(i,J,1)*G%dyCv(i,J)))
dudy = ((u(I,j+1,1)*G%dxCu(I,j+1)) - (u(I,j,1)*G%dxCu(I,j)))
! Assumed no slip
rv_z(I,J) = (2.0-G%mask2dBu(I,J)) * (dvdx - dudy) * G%IareaBu(I,J)
enddo; enddo
Expand Down
4 changes: 2 additions & 2 deletions src/parameterizations/lateral/MOM_interface_filter.F90
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ subroutine interface_filter(h, uhtr, vhtr, tv, dt, G, GV, US, CDp, CS)
if (CS%isotropic_filter) then
!$OMP parallel do default(shared)
do j=js-hs,je+hs ; do I=is-(hs+1),ie+hs
Lsm2_u(I,j) = (0.25*filter_strength) / (G%IdxCu(I,j)**2 + G%IdyCu(I,j)**2)
Lsm2_u(I,j) = (0.25*filter_strength) / ((G%IdxCu(I,j)**2) + (G%IdyCu(I,j)**2))
enddo ; enddo
!$OMP parallel do default(shared)
do J=js-(hs+1),je+hs ; do i=is-hs,ie+hs
Lsm2_v(i,J) = (0.25*filter_strength) / (G%IdxCv(i,J)**2 + G%IdyCv(i,J)**2)
Lsm2_v(i,J) = (0.25*filter_strength) / ((G%IdxCv(i,J)**2) + (G%IdyCv(i,J)**2))
enddo ; enddo
else
!$OMP parallel do default(shared)
Expand Down
4 changes: 2 additions & 2 deletions src/parameterizations/lateral/MOM_mixed_layer_restrat.F90
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
absf = 0.5*(abs(G%CoriolisBu(I,J-1)) + abs(G%CoriolisBu(I,J)))
! If needed, res_scaling_fac = min( ds, L_d ) / l_f
if (res_upscale) res_scaling_fac = &
( sqrt( 0.5 * ( G%dxCu(I,j)**2 + G%dyCu(I,j)**2 ) ) * I_LFront ) &
( sqrt( 0.5 * ( (G%dxCu(I,j)**2) + (G%dyCu(I,j)**2) ) ) * I_LFront ) &
* min( 1., 0.5*( VarMix%Rd_dx_h(i,j) + VarMix%Rd_dx_h(i+1,j) ) )

! peak ML visc: u_star * von_Karman * (h_ml*u_star)/(absf*h_ml + 4.0*u_star)
Expand Down Expand Up @@ -609,7 +609,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
absf = 0.5*(abs(G%CoriolisBu(I-1,J)) + abs(G%CoriolisBu(I,J)))
! If needed, res_scaling_fac = min( ds, L_d ) / l_f
if (res_upscale) res_scaling_fac = &
( sqrt( 0.5 * ( (G%dxCv(i,J))**2 + (G%dyCv(i,J))**2 ) ) * I_LFront ) &
( sqrt( 0.5 * ( (G%dxCv(i,J)**2) + (G%dyCv(i,J)**2) ) ) * I_LFront ) &
* min( 1., 0.5*( VarMix%Rd_dx_h(i,j) + VarMix%Rd_dx_h(i,j+1) ) )

! peak ML visc: u_star * von_Karman * (h_ml*u_star)/(absf*h_ml + 4.0*u_star)
Expand Down
2 changes: 1 addition & 1 deletion src/parameterizations/vertical/MOM_CVMix_KPP.F90
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,7 @@ subroutine KPP_compute_BLD(CS, G, GV, US, h, Temp, Salt, u, v, tv, uStar, buoyFl
Vk = Vk + (0.5*(Waves%Us_y(i,j,k)+Waves%Us_y(i,j-1,k)) - surfVs )
endif

deltaU2(k) = US%L_T_to_m_s**2 * (Uk**2 + Vk**2)
deltaU2(k) = US%L_T_to_m_s**2 * ((Uk**2) + (Vk**2))

! pressure, temperature, and salinity for calling the equation of state
! kk+1 = surface fields
Expand Down
2 changes: 1 addition & 1 deletion src/parameterizations/vertical/MOM_CVMix_shear.F90
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ subroutine calculate_CVMix_shear(u_H, v_H, h, tv, kd, kv, G, GV, US, CS )
endif
dz_int = 0.5*(dz(i,km1) + dz(i,k)) + GV%dZ_subroundoff
N2 = DRHO / dz_int
S2 = US%L_to_Z**2*(DU*DU + DV*DV) / (dz_int*dz_int)
S2 = US%L_to_Z**2*((DU*DU) + (DV*DV)) / (dz_int*dz_int)
Ri_Grad(k) = max(0., N2) / max(S2, 1.e-10*US%T_to_s**2)

! fill 3d arrays, if user asks for diagnostics
Expand Down
10 changes: 5 additions & 5 deletions src/parameterizations/vertical/MOM_bulk_mixed_layer.F90
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ subroutine convective_adjustment(h, u, v, R0, SpV0, Rcv, T, S, eps, d_eb, &
do k1=min(nzc-1,nkmb),1,-1
do i=is,ie
h_orig_k1(i) = h(i,k1)
KE_orig(i) = 0.5*h(i,k1)*(u(i,k1)**2 + v(i,k1)**2)
KE_orig(i) = 0.5*h(i,k1)*((u(i,k1)**2) + (v(i,k1)**2))
uhtot(i) = h(i,k1)*u(i,k1) ; vhtot(i) = h(i,k1)*v(i,k1)
if (CS%nonBous_energetics) then
SpV0_tot(i) = SpV0(i,k1) * h(i,k1)
Expand Down Expand Up @@ -944,7 +944,7 @@ subroutine convective_adjustment(h, u, v, R0, SpV0, Rcv, T, S, eps, d_eb, &
dKE_CA(i,k1) = dKE_CA(i,k1) + dKE_CA(i,k)
endif
KE_orig(i) = KE_orig(i) + 0.5*h_ent* &
(u(i,k)*u(i,k) + v(i,k)*v(i,k))
((u(i,k)*u(i,k)) + (v(i,k)*v(i,k)))
uhtot(i) = uhtot(i) + h_ent*u(i,k)
vhtot(i) = vhtot(i) + h_ent*v(i,k)

Expand All @@ -969,7 +969,7 @@ subroutine convective_adjustment(h, u, v, R0, SpV0, Rcv, T, S, eps, d_eb, &
endif
u(i,k1) = uhtot(i) * Ih ; v(i,k1) = vhtot(i) * Ih
dKE_CA(i,k1) = dKE_CA(i,k1) + CS%bulk_Ri_convective * &
(KE_orig(i) - 0.5*h(i,k1)*(u(i,k1)**2 + v(i,k1)**2))
(KE_orig(i) - 0.5*h(i,k1)*((u(i,k1)**2) + (v(i,k1)**2)))
Rcv(i,k1) = Rcv_tot(i) * Ih
T(i,k1) = Ttot(i) * Ih ; S(i,k1) = Stot(i) * Ih
endif ; enddo
Expand Down Expand Up @@ -1402,7 +1402,7 @@ subroutine mixedlayer_convection(h, d_eb, htot, Ttot, Stot, uhtot, vhtot, &
if ((h_ent > 0.0) .and. (htot(i) > 0.0)) &
dKE_FC(i) = dKE_FC(i) + CS%bulk_Ri_convective * 0.5 * &
((h_ent) / (htot(i)*(h_ent+htot(i)))) * &
((uhtot(i)-u(i,k)*htot(i))**2 + (vhtot(i)-v(i,k)*htot(i))**2)
(((uhtot(i)-u(i,k)*htot(i))**2) + ((vhtot(i)-v(i,k)*htot(i))**2))

if (h_ent > 0.0) then
htot(i) = htot(i) + h_ent
Expand Down Expand Up @@ -1780,7 +1780,7 @@ subroutine mechanical_entrainment(h, d_eb, htot, Ttot, Stot, uhtot, vhtot, &
dRL = g_H_2Rho0 * (R0(i,k)*htot(i) - R0_tot(i) )
endif
dMKE = CS%bulk_Ri_ML * 0.5 * &
((uhtot(i)-u(i,k)*htot(i))**2 + (vhtot(i)-v(i,k)*htot(i))**2)
(((uhtot(i)-u(i,k)*htot(i))**2) + ((vhtot(i)-v(i,k)*htot(i))**2))

! Find the TKE that would remain if the entire layer were entrained.
kh = Idecay_len_TKE(i)*h_avail ; exp_kh = exp(-kh)
Expand Down
2 changes: 1 addition & 1 deletion src/parameterizations/vertical/MOM_energetic_PBL.F90
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,7 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
! velocities between layer k and the layers above.
dMKE_max = (US%L_to_Z**2*GV%H_to_RZ * CS%MKE_to_TKE_effic) * 0.5 * &
(h(k) / ((htot + h(k))*htot)) * &
((uhtot-u(k)*htot)**2 + (vhtot-v(k)*htot)**2)
(((uhtot-u(k)*htot)**2) + ((vhtot-v(k)*htot)**2))
! A fraction (1-exp(Kddt_h*MKE2_Hharm)) of this energy would be
! extracted by mixing with a finite viscosity.
MKE2_Hharm = (htot + h(k) + 2.0*h_neglect) / &
Expand Down
8 changes: 4 additions & 4 deletions src/parameterizations/vertical/MOM_vert_friction.F90
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ subroutine vertFPmix(ui, vi, uold, vold, hbl_h, h, forces, dt, G, GV, US, CS, OB

do k=1,nz
kp1 = MIN(k+1 , nz)
tau_u(I,j,k+1) = sqrt( tauxDG_u(I,j,k+1)*tauxDG_u(I,j,k+1) + tauyDG_u(I,j,k+1)*tauyDG_u(I,j,k+1))
tau_u(I,j,k+1) = sqrt( (tauxDG_u(I,j,k+1)*tauxDG_u(I,j,k+1)) + (tauyDG_u(I,j,k+1)*tauyDG_u(I,j,k+1)) )
Omega_tau2x = atan2( tauyDG_u(I,j,k+1) , tauxDG_u(I,j,k+1) )
omega_tmp = Omega_tau2x !- omega_w2x_u(I,j)
if ( (omega_tmp > pi ) ) omega_tmp = omega_tmp - 2.*pi
Expand All @@ -407,7 +407,7 @@ subroutine vertFPmix(ui, vi, uold, vold, hbl_h, h, forces, dt, G, GV, US, CS, OB

do k=1,nz-1
kp1 = MIN(k+1 , nz)
tau_v(i,J,k+1) = sqrt ( tauxDG_v(i,J,k+1)*tauxDG_v(i,J,k+1) + tauyDG_v(i,J,k+1)*tauyDG_v(i,J,k+1) )
tau_v(i,J,k+1) = sqrt ( (tauxDG_v(i,J,k+1)*tauxDG_v(i,J,k+1)) + (tauyDG_v(i,J,k+1)*tauyDG_v(i,J,k+1)) )
omega_tau2x = atan2( tauyDG_v(i,J,k+1) , tauxDG_v(i,J,k+1) )
omega_tmp = omega_tau2x !- omega_w2x_v(i,J)
if ( (omega_tmp > pi ) ) omega_tmp = omega_tmp - 2.*pi
Expand Down Expand Up @@ -464,7 +464,7 @@ subroutine vertFPmix(ui, vi, uold, vold, hbl_h, h, forces, dt, G, GV, US, CS, OB

! diagnostics
Omega_tau2s_u(I,j,k+1) = atan2(tauNL_CG , (tau_u(I,j,k+1)+tauNL_DG))
tau_u(I,j,k+1) = sqrt((tauxDG_u(I,j,k+1) + tauNL_X)**2 + (tauyDG_u(I,j,k+1) + tauNL_Y)**2)
tau_u(I,j,k+1) = sqrt(((tauxDG_u(I,j,k+1) + tauNL_X)**2) + ((tauyDG_u(I,j,k+1) + tauNL_Y)**2))
omega_tau2x = atan2((tauyDG_u(I,j,k+1) + tauNL_Y), (tauxDG_u(I,j,k+1) + tauNL_X))
omega_tau2w = omega_tau2x !- omega_w2x_u(I,j)
if (omega_tau2w >= pi ) omega_tau2w = omega_tau2w - 2.*pi
Expand Down Expand Up @@ -521,7 +521,7 @@ subroutine vertFPmix(ui, vi, uold, vold, hbl_h, h, forces, dt, G, GV, US, CS, OB

! diagnostics
Omega_tau2s_v(i,J,k+1) = atan2(tauNL_CG, tau_v(i,J,k+1) + tauNL_DG)
tau_v(i,J,k+1) = sqrt((tauxDG_v(i,J,k+1) + tauNL_X)**2 + (tauyDG_v(i,J,k+1) + tauNL_Y)**2)
tau_v(i,J,k+1) = sqrt(((tauxDG_v(i,J,k+1) + tauNL_X)**2) + ((tauyDG_v(i,J,k+1) + tauNL_Y)**2))
!omega_tau2x = atan2((tauyDG_v(i,J,k+1) + tauNL_Y) , (tauxDG_v(i,J,k+1) + tauNL_X))
!omega_tau2w = omega_tau2x - omega_w2x_v(i,J)
if (omega_tau2w > pi) omega_tau2w = omega_tau2w - 2.*pi
Expand Down

0 comments on commit 720b81f

Please sign in to comment.