From dbf68f572a18b49e3db7f6298b09a3dd7c15f38f Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 17 Sep 2023 11:56:49 -0400 Subject: [PATCH] add noexcept clause to ParallelFor (#2554) also switch the lambdas to use AMREX_GPU_DEVICE --- Source/diffusion/diffusion_util.cpp | 4 +- Source/driver/Castro.cpp | 92 +++++++++++------------ Source/driver/Castro_advance.cpp | 2 +- Source/driver/Castro_io.cpp | 2 +- Source/driver/Derive.cpp | 66 ++++++++-------- Source/driver/MGutils.cpp | 10 +-- Source/gravity/Castro_gravity.cpp | 4 +- Source/gravity/Castro_pointmass.cpp | 4 +- Source/gravity/Gravity.cpp | 34 ++++----- Source/hydro/Castro_ctu.cpp | 4 +- Source/hydro/Castro_ctu_hydro.cpp | 20 ++--- Source/hydro/Castro_ctu_rad.cpp | 6 +- Source/hydro/Castro_hybrid.cpp | 6 +- Source/hydro/Castro_mol.cpp | 14 ++-- Source/hydro/Castro_mol_hydro.cpp | 28 +++---- Source/hydro/advection_util.cpp | 24 +++--- Source/hydro/edge_util.cpp | 4 +- Source/hydro/flatten.cpp | 2 +- Source/hydro/fourth_center_average.cpp | 14 ++-- Source/hydro/fourth_order.cpp | 22 +++--- Source/hydro/riemann.cpp | 2 +- Source/hydro/riemann_util.cpp | 4 +- Source/hydro/trace_plm.cpp | 2 +- Source/hydro/trace_ppm.cpp | 2 +- Source/hydro/trans.cpp | 4 +- Source/mhd/Castro_mhd.cpp | 20 ++--- Source/mhd/ct_upwind.cpp | 8 +- Source/mhd/electric.cpp | 6 +- Source/mhd/hlld.cpp | 2 +- Source/mhd/mhd_plm.cpp | 2 +- Source/mhd/mhd_ppm.cpp | 2 +- Source/mhd/mhd_util.cpp | 6 +- Source/problems/Castro_bc_fill_nd.cpp | 2 +- Source/problems/Castro_problem_source.cpp | 2 +- Source/problems/ambient_fill.cpp | 4 +- Source/problems/hse_fill.cpp | 10 +-- Source/rotation/Rotation.cpp | 2 +- Source/rotation/rotation_sources.cpp | 4 +- Source/scf/scf_relax.cpp | 2 +- Source/sdc/Castro_sdc.cpp | 12 +-- Source/sources/Castro_geom.cpp | 2 +- Source/sources/Castro_sponge.cpp | 2 +- Source/sources/Castro_thermo.cpp | 2 +- 43 files changed, 233 insertions(+), 233 deletions(-) diff --git a/Source/diffusion/diffusion_util.cpp b/Source/diffusion/diffusion_util.cpp index 5105019d70..0dda3a0376 100644 --- a/Source/diffusion/diffusion_util.cpp +++ b/Source/diffusion/diffusion_util.cpp @@ -15,7 +15,7 @@ fill_temp_cond(const Box& bx, Array4 const& coeff_arr) { amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { eos_t eos_state; @@ -64,7 +64,7 @@ fill_temp_diff_coeff(const Box& bx, Array4 const& coeff_arr) { amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { eos_t eos_state; diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp index 7ef73d48e4..44c75144ad 100644 --- a/Source/driver/Castro.cpp +++ b/Source/driver/Castro.cpp @@ -1061,7 +1061,7 @@ Castro::initData () const Box& box_x = mfi.nodaltilebox(0); amrex::ParallelFor(box_x, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // C++ MHD problem initialization; has no effect if not // implemented by a problem setup (defaults to an empty @@ -1072,7 +1072,7 @@ Castro::initData () const Box& box_y = mfi.nodaltilebox(1); amrex::ParallelFor(box_y, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // C++ MHD problem initialization; has no effect if not // implemented by a problem setup (defaults to an empty @@ -1083,7 +1083,7 @@ Castro::initData () const Box& box_z = mfi.nodaltilebox(2); amrex::ParallelFor(box_z, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // C++ MHD problem initialization; has no effect if not // implemented by a problem setup (defaults to an empty @@ -1106,7 +1106,7 @@ Castro::initData () auto geomdata = geom.data(); amrex::ParallelFor(box, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // problem initialization problem_initialize_state_data(i, j, k, s, geomdata); @@ -1149,7 +1149,7 @@ Castro::initData () Real lsmall_dens = small_dens; reduce_op.eval(bx, reduce_data, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple { // if the problem tried to initialize a thermodynamic // state that is at or below small_temp, then we abort. @@ -1199,7 +1199,7 @@ Castro::initData () auto S_arr = S_new.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real spec_sum = 0.0_rt; for (int n = 0; n < NumSpec; n++) { @@ -1287,7 +1287,7 @@ Castro::initData () auto S_arr = Sborder.array(mfi); amrex::ParallelFor(box, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rhoInv = 1.0_rt / S_arr(i,j,k,URHO); @@ -1384,7 +1384,7 @@ Castro::initData () #endif amrex::ParallelFor(box, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // C++ problem initialization; has no effect if not implemented // by a problem setup (defaults to an empty routine). @@ -2764,32 +2764,32 @@ Castro::reflux (int crse_level, int fine_level, bool in_post_timestep) crse_lev.limit_hydro_fluxes_on_small_dens(nbx, idir, U, V, F, A, dt, scale_by_dAdt); #endif amrex::ParallelFor(nbx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - bool zero_fluxes = false; - - Real rho = U(i,j,k,URHO); - Real drhoV = F(i,j,k,URHO) / V(i,j,k); - Real rhoInvNew = 1.0_rt / (rho + drhoV); - - for (int n = 0; n < NumSpec; ++n) { - Real rhoX = U(i,j,k,UFS+n); - Real drhoX = F(i,j,k,UFS+n) / V(i,j,k); - Real XNew = (rhoX + AMREX_SPACEDIM * drhoX) * rhoInvNew; - - if (XNew < -castro::abundance_failure_tolerance || - XNew > 1.0_rt + castro::abundance_failure_tolerance) { - zero_fluxes = true; - break; - } - } - - if (zero_fluxes) { - for (int n = 0; n < NUM_STATE; ++n) { - F(i,j,k,n) = 0.0; - } - } - }); + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + bool zero_fluxes = false; + + Real rho = U(i,j,k,URHO); + Real drhoV = F(i,j,k,URHO) / V(i,j,k); + Real rhoInvNew = 1.0_rt / (rho + drhoV); + + for (int n = 0; n < NumSpec; ++n) { + Real rhoX = U(i,j,k,UFS+n); + Real drhoX = F(i,j,k,UFS+n) / V(i,j,k); + Real XNew = (rhoX + AMREX_SPACEDIM * drhoX) * rhoInvNew; + + if (XNew < -castro::abundance_failure_tolerance || + XNew > 1.0_rt + castro::abundance_failure_tolerance) { + zero_fluxes = true; + break; + } + } + + if (zero_fluxes) { + for (int n = 0; n < NUM_STATE; ++n) { + F(i,j,k,n) = 0.0; + } + } + }); } } @@ -3116,7 +3116,7 @@ Castro::normalize_species (MultiFab& S_new, int ng) // then normalize them so that they sum to 1. reduce_op.eval(bx, reduce_data, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple { Real rhoX_sum = 0.0_rt; Real rhoInv = 1.0_rt / u(i,j,k,URHO); @@ -3193,7 +3193,7 @@ Castro::enforce_consistent_e ( #endif ParallelFor(box, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rhoInv = 1.0_rt / S_arr(i,j,k,URHO); Real u = S_arr(i,j,k,UMX) * rhoInv; @@ -3369,7 +3369,7 @@ Castro::enforce_speed_limit (MultiFab& state_in, int ng) auto u = state_in[mfi].array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rho = u(i,j,k,URHO); Real rhoInv = 1.0_rt / rho; @@ -3504,7 +3504,7 @@ Castro::apply_problem_tags (TagBoxArray& tags, Real time) const GeometryData& geomdata = geom.data(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { problem_tagging(i, j, k, tag_arr, state_arr, lev, geomdata); }); @@ -3563,7 +3563,7 @@ Castro::apply_tagging_restrictions(TagBoxArray& tags, [[maybe_unused]] Real time auto tag = tags[mfi].array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { bool outer_boundary_test[3] = {false}; @@ -3607,7 +3607,7 @@ Castro::apply_tagging_restrictions(TagBoxArray& tags, [[maybe_unused]] Real time auto tag = tags[mfi].array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { const Real* problo = geomdata.ProbLo(); const Real* probhi = geomdata.ProbHi(); @@ -3700,7 +3700,7 @@ Castro::reset_internal_energy(const Box& bx, Real ldual_energy_eta2 = dual_energy_eta2; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rhoInv = 1.0_rt / u(i,j,k,URHO); Real Up = u(i,j,k,UMX) * rhoInv; @@ -3827,7 +3827,7 @@ Castro::add_magnetic_e( MultiFab& Bx, ParallelFor(box, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real bx_cell_c = 0.5_rt * (Bx_arr(i,j,k) + Bx_arr(i+1,j,k)); @@ -3872,7 +3872,7 @@ Castro::check_div_B( MultiFab& Bx, const auto dx = geom.CellSizeArray(); reduce_op.eval(box, reduce_data, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple { Real divB = (Bx_arr(i+1,j,k) - Bx_arr(i,j,k))/dx[0] + @@ -4019,7 +4019,7 @@ Castro::computeTemp( Array4 const u = u_fab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rhoInv = 1.0_rt / u(i,j,k,URHO); @@ -4046,7 +4046,7 @@ Castro::computeTemp( if (clamp_ambient_temp == 1) { amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rhoInv = 1.0_rt / u(i,j,k,URHO); @@ -4293,7 +4293,7 @@ Castro::define_new_center(MultiFab& S, Real time) Real cen = data(mi[0], mi[1], mi[2]); amrex::ParallelFor(box, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) { + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { data(i,j,k) -= cen; }); diff --git a/Source/driver/Castro_advance.cpp b/Source/driver/Castro_advance.cpp index aa3e068748..b87b8fbcf1 100644 --- a/Source/driver/Castro_advance.cpp +++ b/Source/driver/Castro_advance.cpp @@ -429,7 +429,7 @@ Castro::initialize_advance(Real time, Real dt, int amr_iteration) auto geomdata = geom.data(); amrex::ParallelFor(box, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // redo the problem initialization. We want to preserve // the current velocity though, so save that and then diff --git a/Source/driver/Castro_io.cpp b/Source/driver/Castro_io.cpp index e3bb00a5d2..3e058f2c85 100644 --- a/Source/driver/Castro_io.cpp +++ b/Source/driver/Castro_io.cpp @@ -292,7 +292,7 @@ Castro::restart (Amr& papa, auto geomdata = geom.data(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // C++ problem initialization; has no effect if not implemented // by a problem setup (defaults to an empty routine). diff --git a/Source/driver/Derive.cpp b/Source/driver/Derive.cpp index aaf3786f48..2cc4fedd7c 100644 --- a/Source/driver/Derive.cpp +++ b/Source/driver/Derive.cpp @@ -30,7 +30,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rhoInv = 1.0_rt / dat(i,j,k,URHO); @@ -63,7 +63,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real rhoInv = 1.0_rt/dat(i,j,k,URHO); @@ -85,7 +85,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = dat(i,j,k,UEINT) / dat(i,j,k,URHO); @@ -101,7 +101,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = std::log10(dat(i,j,k,0)); }); @@ -116,7 +116,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -151,7 +151,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -186,7 +186,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -222,7 +222,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -257,7 +257,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -295,7 +295,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -367,7 +367,7 @@ extern "C" const int coord_type = geomdata.Coord(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // (k grad T)_{i+1/2} @@ -455,7 +455,7 @@ extern "C" int enuc_comp = datfab.nComp()-1; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // the nuclear energy (rho H_nuc) is tacked onto the end of @@ -503,7 +503,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // The derive data is (rho, rho_enuc) Real enuc = dat(i,j,k,1) / dat(i,j,k,0); @@ -522,7 +522,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = dat(i,j,k,1) / dat(i,j,k,0); }); @@ -538,7 +538,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real deninv = 1.0_rt/dat(i,j,k,0); @@ -559,7 +559,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = std::sqrt(dat(i,j,k,0)*dat(i,j,k,0) + @@ -583,7 +583,7 @@ extern "C" auto problo = geomdata.ProbLoArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real x = problo[0] + (static_cast(i) + 0.5_rt) * dx[0] - problem::center[0]; @@ -638,7 +638,7 @@ extern "C" auto problo = geomdata.ProbLoArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real x = problo[0] + (static_cast(i) + 0.5_rt) * dx[0] - problem::center[0]; @@ -698,7 +698,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = std::sqrt(dat(i,j,k,0)*dat(i,j,k,0) + @@ -721,7 +721,7 @@ extern "C" auto const L = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real loc[3]; @@ -773,7 +773,7 @@ extern "C" auto const L = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real loc[3]; @@ -821,7 +821,7 @@ extern "C" auto const L = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real loc[3]; @@ -866,7 +866,7 @@ extern "C" auto const kineng = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { kineng(i,j,k,0) = 0.5_rt / dat(i,j,k,0) * ( dat(i,j,k,1)*dat(i,j,k,1) + dat(i,j,k,2)*dat(i,j,k,2) + @@ -897,7 +897,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = dat(i,j,k,1) / dat(i,j,k,0); }); @@ -913,7 +913,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real sum = 0.0_rt; @@ -935,7 +935,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real sum = 0.0_rt; @@ -965,7 +965,7 @@ extern "C" #endif amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // Calculate vorticity. @@ -1062,7 +1062,7 @@ extern "C" const int coord_type = geomdata.Coord(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real uhi = dat(i+1,j,k,1) / dat(i+1,j,k,0); @@ -1122,7 +1122,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // density @@ -1148,7 +1148,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = 0.5_rt * (dat(i,j,k,0) + dat(i+1,j,k,0)); }); @@ -1164,7 +1164,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = 0.5_rt * (dat(i,j,k,0) + dat(i,j+1,k,0)); }); @@ -1180,7 +1180,7 @@ extern "C" auto const der = derfab.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { der(i,j,k,0) = 0.5_rt * (dat(i,j,k,0) + dat(i,j,k+1,0)); }); @@ -1198,7 +1198,7 @@ extern "C" auto dx = geomdata.CellSizeArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real dBx = dat(i+1,j,k,0) - dat(i,j,k,0); der(i,j,k,0) = dBx / dx[0]; diff --git a/Source/driver/MGutils.cpp b/Source/driver/MGutils.cpp index 31e3c58685..62e5bbd85c 100644 --- a/Source/driver/MGutils.cpp +++ b/Source/driver/MGutils.cpp @@ -17,7 +17,7 @@ apply_metric(const Box& bx, if (coord_type == 1) { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { IntVect idx(D_DECL(i, j, k)); @@ -62,7 +62,7 @@ do_weight_cc(const Box& bx, // At centers amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real r = (static_cast(i) + 0.5_rt) * dx[0]; cc(i,j,k) *= r; @@ -86,7 +86,7 @@ do_unweight_cc(const Box& bx, // At centers amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real r = (static_cast(i) + 0.5_rt) * dx[0]; cc(i,j,k) /= r; @@ -113,7 +113,7 @@ do_unweight_edges(const Box& bx, // On x-edges amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { if (i != 0) { Real r = static_cast(i) * dx[0]; @@ -125,7 +125,7 @@ do_unweight_edges(const Box& bx, // On y-edges amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real r = (static_cast(i) + 0.5_rt) * dx[0]; ec(i,j,k) /= r; diff --git a/Source/gravity/Castro_gravity.cpp b/Source/gravity/Castro_gravity.cpp index 094727e6a8..0fd34e046f 100644 --- a/Source/gravity/Castro_gravity.cpp +++ b/Source/gravity/Castro_gravity.cpp @@ -329,7 +329,7 @@ void Castro::construct_old_gravity_source(MultiFab& source, MultiFab& state_in, Array4 const source_arr = source.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // Temporary array for seeing what the new state would be if the update were applied here. @@ -496,7 +496,7 @@ void Castro::construct_new_gravity_source(MultiFab& source, MultiFab& state_old, Array4 const source_arr = source.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray src{}; diff --git a/Source/gravity/Castro_pointmass.cpp b/Source/gravity/Castro_pointmass.cpp index fd5a3ce022..674cc059f5 100644 --- a/Source/gravity/Castro_pointmass.cpp +++ b/Source/gravity/Castro_pointmass.cpp @@ -37,7 +37,7 @@ Castro::pointmass_update(Real time, Real dt) Array4 const vol = volume.array(mfi); reduce_op.eval(bx, reduce_data, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple { // This is just a small number to keep precision issues from making // icen, jcen, kcen one cell too low. @@ -117,7 +117,7 @@ Castro::pointmass_update(Real time, Real dt) Array4 const uout = S_new.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // This is just a small number to keep precision issues from making // icen, jcen, kcen one cell too low. diff --git a/Source/gravity/Gravity.cpp b/Source/gravity/Gravity.cpp index 77d12f68e5..28a5986e8e 100644 --- a/Source/gravity/Gravity.cpp +++ b/Source/gravity/Gravity.cpp @@ -1026,7 +1026,7 @@ Gravity::test_residual (const Box& bx, AMREX_ALWAYS_ASSERT(coord_type >= 0 && coord_type <= 2); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // Cartesian if (coord_type == 0) { @@ -1351,7 +1351,7 @@ Gravity::interpolate_monopole_grav(int level, RealVector& radial_grav, MultiFab& // including the ghost cells. amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray loc; @@ -1497,7 +1497,7 @@ Gravity::compute_radial_mass(const Box& bx, #endif amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real xc = problo[0] + (static_cast(i) + 0.5_rt) * dx[0] - problem::center[0]; Real lo_i = problo[0] + static_cast(i) * dx[0] - problem::center[0]; @@ -1910,7 +1910,7 @@ Gravity::fill_multipole_BCs(int crse_level, int fine_level, const Vector loc, locb; loc[0] = problo[0] + (static_cast(i) + 0.5_rt) * dx[0]; @@ -2801,7 +2801,7 @@ Gravity::fill_direct_sum_BCs(int crse_level, int fine_level, const Vector const phi_arr = phi.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // Compute radial gravity due to a point mass at center[:]. @@ -3147,7 +3147,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) Real* const lev_mass = radial_mass[lev].dataPtr(); amrex::ParallelFor(n1d, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { #ifdef GR_GRAV lev_pres[i] = 0.; @@ -3267,7 +3267,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) // First add the contribution from this level amrex::ParallelFor(n1d, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { mass_summed[i] = level_mass[i]; }); @@ -3285,7 +3285,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) Real* const lev_mass = radial_mass[lev].dataPtr(); amrex::ParallelFor(n1d/ratio, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { for (int n = 0; n < ratio; n++) { @@ -3327,7 +3327,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) // First add the contribution from this level amrex::ParallelFor(n1d, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { vol_summed[i] = level_vol[i]; }); @@ -3345,7 +3345,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) const Real* lev_vol = radial_vol[lev].dataPtr(); amrex::ParallelFor(n1d/ratio, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { for (int n = 0; n < ratio; n++) { @@ -3356,7 +3356,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) } amrex::ParallelFor(n1d, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { den_summed[i] = mass_summed[i]; if (vol_summed[i] > 0.) { @@ -3372,7 +3372,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) // First add the contribution from this level amrex::ParallelFor(n1d, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { pres_summed[i] = level_pres[i]; }); @@ -3388,7 +3388,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) const Real* lev_pres = radial_pres[lev].dataPtr(); amrex::ParallelFor(n1d/ratio, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { for (int n = 0; n < ratio; n++) { pres_summed[ratio*i+n] += 1./double(ratio) * lev_pres[i]; @@ -3398,7 +3398,7 @@ Gravity::make_radial_gravity(int level, Real time, RealVector& radial_grav) } amrex::ParallelFor(n1d, - [=] AMREX_GPU_DEVICE (int i) + [=] AMREX_GPU_DEVICE (int i) noexcept { if (vol_summed[i] > 0.) { pres_summed[i] /= vol_summed[i]; diff --git a/Source/hydro/Castro_ctu.cpp b/Source/hydro/Castro_ctu.cpp index 445cb9bbcf..af5a833f06 100644 --- a/Source/hydro/Castro_ctu.cpp +++ b/Source/hydro/Castro_ctu.cpp @@ -29,7 +29,7 @@ Castro::consup_hydro(const Box& bx, auto geomdata = geom.data(); amrex::ParallelFor(bx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { Real volinv = 1.0 / geometry_util::volume(i, j, k, geomdata); @@ -317,7 +317,7 @@ Castro::add_sdc_source_to_states(const Box& bx, const int idir, const Real dt, { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // of the state variables, only pressure, rhoe, and diff --git a/Source/hydro/Castro_ctu_hydro.cpp b/Source/hydro/Castro_ctu_hydro.cpp index d3215ee6b7..fe1a510c02 100644 --- a/Source/hydro/Castro_ctu_hydro.cpp +++ b/Source/hydro/Castro_ctu_hydro.cpp @@ -216,7 +216,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) Array4 const rho_inv_arr = rho_inv.array(); amrex::ParallelFor(qbx3, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { rho_inv_arr(i,j,k) = 1.0 / U_old_arr(i,j,k,URHO); }); @@ -268,7 +268,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) } else { amrex::ParallelFor(obx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { shk_arr(i,j,k) = 0.0; }); @@ -285,7 +285,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) Array4 const src_corr_arr = source_corrector.array(mfi); amrex::ParallelFor(qbx3, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { hydro::src_to_prim(i, j, k, dt, U_old_arr, q_arr, old_src_arr, src_corr_arr, src_q_arr); }); @@ -1170,7 +1170,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) // Zero out shock and temp fluxes -- these are physically meaningless here amrex::ParallelFor(nbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { flux_arr(i,j,k,UTEMP) = 0.e0; #ifdef SHOCK_VAR @@ -1242,7 +1242,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) auto dx_arr = geom.CellSizeArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray loc; @@ -1321,7 +1321,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) if (!mom_flux_has_p(0, 0, coord)) { #endif amrex::ParallelFor(nbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { pradial_fab(i,j,k) = qex_arr(i,j,k,GDPRES) * dt; }); @@ -1346,7 +1346,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) Array4 fluxes_fab = (*fluxes[idir]).array(mfi); amrex::ParallelFor(mfi.nodaltilebox(idir), NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { fluxes_fab(i,j,k,n) += flux_fab(i,j,k,n); }); @@ -1356,7 +1356,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) Array4 rad_fluxes_fab = (*rad_fluxes[idir]).array(mfi); amrex::ParallelFor(mfi.nodaltilebox(idir), Radiation::nGroups, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { rad_fluxes_fab(i,j,k,n) += rad_flux_fab(i,j,k,n); }); @@ -1373,7 +1373,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) Array4 P_radial_fab = P_radial.array(mfi); amrex::ParallelFor(mfi.nodaltilebox(0), - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { P_radial_fab(i,j,k,0) += pradial_fab(i,j,k,0); }); @@ -1387,7 +1387,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) Array4 mass_fluxes_fab = (*mass_fluxes[idir]).array(mfi); amrex::ParallelFor(mfi.nodaltilebox(idir), - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // This is a copy, not an add, since we need mass_fluxes to be // only this subcycle's data when we evaluate the gravitational diff --git a/Source/hydro/Castro_ctu_rad.cpp b/Source/hydro/Castro_ctu_rad.cpp index b347395858..b97745921b 100644 --- a/Source/hydro/Castro_ctu_rad.cpp +++ b/Source/hydro/Castro_ctu_rad.cpp @@ -63,7 +63,7 @@ Castro::ctu_rad_consup(const Box& bx, // radiation energy update. amrex::ParallelFor(bx, NGROUPS, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int g) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int g) noexcept { Erout(i,j,k,g) = Erin(i,j,k,g) + dt * @@ -81,7 +81,7 @@ Castro::ctu_rad_consup(const Box& bx, // directions amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // radiation contribution -- this is sum{lambda E_r} @@ -169,7 +169,7 @@ Castro::ctu_rad_consup(const Box& bx, using ReduceTuple = typename decltype(reduce_data)::Type; reduce_op.eval(bx, reduce_data, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple { Real ux = 0.5_rt * (qx(i,j,k,GDU) + qx(i+1,j,k,GDU)); diff --git a/Source/hydro/Castro_hybrid.cpp b/Source/hydro/Castro_hybrid.cpp index 13b2260d4c..ca8920ef2f 100644 --- a/Source/hydro/Castro_hybrid.cpp +++ b/Source/hydro/Castro_hybrid.cpp @@ -102,7 +102,7 @@ Castro::fill_hybrid_hydro_source(MultiFab& sources, MultiFab& state_in, Real mul auto src = sources.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray loc; @@ -145,7 +145,7 @@ Castro::linear_to_hybrid_momentum(MultiFab& state_in, int ng) // Convert linear momentum to hybrid momentum. amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray loc; @@ -191,7 +191,7 @@ Castro::hybrid_to_linear_momentum(MultiFab& state_in, int ng) // Convert hybrid momentum to linear momentum. amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray loc; diff --git a/Source/hydro/Castro_mol.cpp b/Source/hydro/Castro_mol.cpp index e60e0a6ac7..907be84528 100644 --- a/Source/hydro/Castro_mol.cpp +++ b/Source/hydro/Castro_mol.cpp @@ -38,7 +38,7 @@ Castro::mol_plm_reconstruct(const Box& bx, // piecewise linear slopes amrex::ParallelFor(bx, NQ, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { bool lo_bc_test = lo_symm && ((idir == 0 && i == domlo[0]) || @@ -63,7 +63,7 @@ Castro::mol_plm_reconstruct(const Box& bx, if (use_pslope == 1) { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real s[nslp]; @@ -92,7 +92,7 @@ Castro::mol_plm_reconstruct(const Box& bx, } amrex::ParallelFor(bx, NQ, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { @@ -153,7 +153,7 @@ Castro::mol_ppm_reconstruct(const Box& bx, Array4 const& qp) { amrex::ParallelFor(bx, NQ, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { Real s[nslp]; @@ -241,7 +241,7 @@ Castro::mol_consup(const Box& bx, #endif amrex::ParallelFor(bx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { #if AMREX_SPACEDIM == 1 @@ -290,7 +290,7 @@ Castro::mol_consup(const Box& bx, // we'll be multiplying that for the update calculation. amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { update(i,j,k,USHK) = shk(i,j,k) / dt; }); @@ -309,7 +309,7 @@ Castro::mol_diffusive_flux(const Box& bx, const auto dx = geom.CellSizeArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real cond_int; diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index 5cfa4741fe..875cd724b2 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -114,7 +114,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) if (first_order_hydro == 1) { amrex::ParallelFor(obx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { flatn_arr(i,j,k) = 0.0; }); @@ -122,7 +122,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) uflatten(obx, q_arr, flatn_arr, QPRES); } else { amrex::ParallelFor(obx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { flatn_arr(i,j,k) = 1.0; }); @@ -149,7 +149,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) } else { amrex::ParallelFor(obx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { shk_arr(i,j,k) = 0.0; }); @@ -267,7 +267,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) if (do_hydro == 0) { amrex::ParallelFor(nbx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { f_avg_arr(i,j,k,n) = 0.0; }); @@ -296,7 +296,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) Array4 const flux_arr = (flux[0]).array(); amrex::ParallelFor(nbx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { flux_arr(i,j,k,n) = f_avg_arr(i,j,k,n); }); @@ -316,7 +316,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) } amrex::ParallelFor(nbx, NQ, - [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { bool test = (n == QGC) || (n == QTEMP); @@ -337,7 +337,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) if (do_hydro == 0) { amrex::ParallelFor(nbx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { f_arr(i,j,k,n) = 0.0; }); @@ -361,7 +361,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) Array4 const flux_arr = (flux[idir]).array(); amrex::ParallelFor(nbx, NUM_STATE, - [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { Real lap = trans_laplacian(i, j, k, n, idir, f_avg_arr, @@ -395,7 +395,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) Array4 const avis_arr = avis.array(); amrex::ParallelFor(nbx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { if (n == UTEMP) { flux_arr(i,j,k,n) = 0.0; @@ -482,7 +482,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) Array4 const src_q_arr = src_q.array(); amrex::ParallelFor(qbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { hydro::src_to_prim(i, j, k, dt, uin_arr, q_arr, source_in_arr, src_q_arr); }); @@ -520,7 +520,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) Array4 const uin_arr = Sborder.array(mfi); amrex::ParallelFor(nbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { flux_arr(i,j,k,UTEMP) = 0.e0; #ifdef SHOCK_VAR @@ -637,7 +637,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) auto dx_arr = geom.CellSizeArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray loc; @@ -690,7 +690,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) #if AMREX_SPACEDIM == 1 if (!Geom().IsCartesian()) { amrex::ParallelFor(nbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { pradial_fab(i,j,k) = qex_fab(i,j,k,prescomp) * dt; }); @@ -700,7 +700,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) #if AMREX_SPACEDIM == 2 if (!mom_flux_has_p(0, 0, coord)) { amrex::ParallelFor(nbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { pradial_fab(i,j,k) = qex_fab(i,j,k,prescomp) * dt; }); diff --git a/Source/hydro/advection_util.cpp b/Source/hydro/advection_util.cpp index c73d1ac106..7b3beb079c 100644 --- a/Source/hydro/advection_util.cpp +++ b/Source/hydro/advection_util.cpp @@ -42,7 +42,7 @@ Castro::ctoprim(const Box& bx, amrex::ignore_unused(time); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { auto q = [&] (int n) -> Real& { return q_arr(i,j,k,n); }; auto qaux = [&] (int n) -> Real& { return qaux_arr(i,j,k,n); }; @@ -87,7 +87,7 @@ Castro::shock(const Box& bx, #endif amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real div_u = 0.0_rt; @@ -244,7 +244,7 @@ Castro::divu(const Box& bx, #endif amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { #if AMREX_SPACEDIM == 1 @@ -347,7 +347,7 @@ Castro::apply_av(const Box& bx, Real diff_coeff = difmag; amrex::ParallelFor(bx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { if (n == UTEMP) { @@ -406,7 +406,7 @@ Castro::apply_av_rad(const Box& bx, Real diff_coeff = difmag; amrex::ParallelFor(bx, Radiation::nGroups, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { Real div1; @@ -448,7 +448,7 @@ Castro::normalize_species_fluxes(const Box& bx, // defined in Plewa & Muller, 1999, A&A, 342, 179. amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real sum = 0.0_rt; @@ -492,7 +492,7 @@ Castro::scale_flux(const Box& bx, #endif amrex::ParallelFor(bx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { flux(i,j,k,n) = dt * flux(i,j,k,n) * area_arr(i,j,k); @@ -514,7 +514,7 @@ Castro::scale_rad_flux(const Box& bx, const Real dt) { amrex::ParallelFor(bx, Radiation::nGroups, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int g) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int g) noexcept { rflux(i,j,k,g) = dt * rflux(i,j,k,g) * area_arr(i,j,k); }); @@ -555,7 +555,7 @@ Castro::limit_hydro_fluxes_on_small_dens(const Box& bx, Real density_floor = small_dens * density_floor_tolerance; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // Grab the states on either side of the interface we are working with, // depending on which dimension we're currently calling this with. @@ -641,7 +641,7 @@ Castro::do_enforce_minimum_density(const Box& bx, amrex::ignore_unused(verbose); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { if (state_arr(i,j,k,URHO) < small_dens) { @@ -749,7 +749,7 @@ Castro::enforce_reflect_states(const Box& bx, const int idir, if (lo_bc_test) { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // reset the left state at domlo if needed -- it is outside the domain @@ -770,7 +770,7 @@ Castro::enforce_reflect_states(const Box& bx, const int idir, if (hi_bc_test) { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // reset the right state at domhi+1 if needed -- it is outside the domain diff --git a/Source/hydro/edge_util.cpp b/Source/hydro/edge_util.cpp index 0ba055e822..35530e30d3 100644 --- a/Source/hydro/edge_util.cpp +++ b/Source/hydro/edge_util.cpp @@ -14,7 +14,7 @@ Castro::reset_edge_state_thermo(const Box& bx, Real small_p = small_pres; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { #ifdef RADIATION @@ -85,7 +85,7 @@ Castro::edge_state_temp_to_pres(const Box& bx, // use T to define p amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // We just got the extremes corresponding to a particular cell-center, but now diff --git a/Source/hydro/flatten.cpp b/Source/hydro/flatten.cpp index 9571324169..67ab08543b 100644 --- a/Source/hydro/flatten.cpp +++ b/Source/hydro/flatten.cpp @@ -23,7 +23,7 @@ Castro::uflatten(const Box& bx, constexpr Real dzcut = 1.0_rt / (zcut2-zcut1); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // x-direction flattening coef diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp index 942a792b0e..498f37de3c 100644 --- a/Source/hydro/fourth_center_average.cpp +++ b/Source/hydro/fourth_center_average.cpp @@ -40,7 +40,7 @@ Castro::make_cell_center(const Box& bx, } amrex::ParallelFor(bx, U.nComp(), - [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { Real lap = compute_laplacian(i, j, k, n, U, lo_periodic, hi_periodic, domlo, domhi); @@ -75,14 +75,14 @@ Castro::make_cell_center_in_place(const Box& bx, for (int n = 0; n < U.nComp(); n++) { amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { tmp(i,j,k) = compute_laplacian(i, j, k, n, U, lo_periodic, hi_periodic, domlo, domhi); }); amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { U(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * tmp(i,j,k); }); @@ -110,7 +110,7 @@ Castro::compute_lap_term(const Box& bx, } amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { lap(i,j,k) = (1.0_rt/24.0_rt) * compute_laplacian(i, j, k, ncomp, U, @@ -141,7 +141,7 @@ Castro::make_fourth_average(const Box& bx, } amrex::ParallelFor(bx, q.nComp(), - [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { Real lap = compute_laplacian(i, j, k, n, q_bar, lo_periodic, hi_periodic, domlo, domhi); @@ -191,14 +191,14 @@ Castro::make_fourth_in_place_n(const Box& bx, } amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { tmp(i,j,k) = compute_laplacian(i, j, k, ncomp, q, lo_periodic, hi_periodic, domlo, domhi); }); amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { q(i,j,k,ncomp) += (1.0_rt/24.0_rt) * tmp(i,j,k); }); diff --git a/Source/hydro/fourth_order.cpp b/Source/hydro/fourth_order.cpp index 5613cd48ee..bbe8c109d3 100644 --- a/Source/hydro/fourth_order.cpp +++ b/Source/hydro/fourth_order.cpp @@ -31,7 +31,7 @@ Castro::fourth_interfaces(const Box& bx, // this loop is over interfaces amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // interpolate to the edges -- this is a_{i-1/2} @@ -95,7 +95,7 @@ Castro::fourth_interfaces(const Box& bx, // this loop is over interfaces amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // interpolate to the edges @@ -159,7 +159,7 @@ Castro::fourth_interfaces(const Box& bx, // this loop is over interfaces amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // interpolate to the edges @@ -252,7 +252,7 @@ Castro::states(const Box& bx, if (limit_fourth_order == 0) { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { al(i+1,j,k,ncomp) = a_int(i+1,j,k); ar(i,j,k,ncomp) = a_int(i,j,k); @@ -266,7 +266,7 @@ Castro::states(const Box& bx, // i-1/2,R and i+1/2,L amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { al(i+1,j,k,ncomp) = a_int(i+1,j,k); @@ -419,7 +419,7 @@ Castro::states(const Box& bx, if (limit_fourth_order == 0) { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { al(i,j+1,k,ncomp) = a_int(i,j+1,k); ar(i,j,k,ncomp) = a_int(i,j,k); @@ -433,7 +433,7 @@ Castro::states(const Box& bx, // j-1/2,R and j+1/2,L amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { al(i,j+1,k,ncomp) = a_int(i,j+1,k); ar(i,j,k,ncomp) = a_int(i,j,k); @@ -584,7 +584,7 @@ Castro::states(const Box& bx, if (limit_fourth_order == 0) { amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { al(i,j,k+1,ncomp) = a_int(i,j,k+1); ar(i,j,k,ncomp) = a_int(i,j,k); @@ -596,7 +596,7 @@ Castro::states(const Box& bx, // k-1/2,R and k+1/2,L amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { al(i,j,k+1,ncomp) = a_int(i,j,k+1); @@ -775,7 +775,7 @@ Castro::fourth_avisc(const Box& bx, #endif amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real cmin; @@ -858,7 +858,7 @@ Castro::fourth_add_diffusive_flux(const Box& bx, const auto dx = geom.CellSizeArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { eos_t eos_state; diff --git a/Source/hydro/riemann.cpp b/Source/hydro/riemann.cpp index 3133b6e69b..d0a157ac3b 100644 --- a/Source/hydro/riemann.cpp +++ b/Source/hydro/riemann.cpp @@ -67,7 +67,7 @@ Castro::cmpflx_plus_godunov(const Box& bx, const auto domhi = geom.Domain().hiVect3d(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { diff --git a/Source/hydro/riemann_util.cpp b/Source/hydro/riemann_util.cpp index b5ebc6457c..866ba861fe 100644 --- a/Source/hydro/riemann_util.cpp +++ b/Source/hydro/riemann_util.cpp @@ -62,7 +62,7 @@ Castro::compute_flux_from_q(const Box& bx, #endif amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real u_adv = qint(i,j,k,iu); @@ -136,7 +136,7 @@ Castro::store_godunov_state(const Box& bx, // hydro advancement. amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { diff --git a/Source/hydro/trace_plm.cpp b/Source/hydro/trace_plm.cpp index aee8a67b1c..dc1ef06931 100644 --- a/Source/hydro/trace_plm.cpp +++ b/Source/hydro/trace_plm.cpp @@ -98,7 +98,7 @@ Castro::trace_plm(const Box& bx, const int idir, // Compute left and right traced states amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { bool lo_bc_test = lo_symm && ((idir == 0 && i == domlo[0]) || diff --git a/Source/hydro/trace_ppm.cpp b/Source/hydro/trace_ppm.cpp index 222dc582ca..8082f8486e 100644 --- a/Source/hydro/trace_ppm.cpp +++ b/Source/hydro/trace_ppm.cpp @@ -146,7 +146,7 @@ Castro::trace_ppm(const Box& bx, // Trace to left and right edges using upwind PPM amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { diff --git a/Source/hydro/trans.cpp b/Source/hydro/trans.cpp index 4236279315..0e5f4fc8f5 100644 --- a/Source/hydro/trans.cpp +++ b/Source/hydro/trans.cpp @@ -108,7 +108,7 @@ Castro::actual_trans_single(const Box& bx, Real small_p = small_pres; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // We are handling the states at the interface of @@ -544,7 +544,7 @@ Castro::actual_trans_final(const Box& bx, Real small_p = small_pres; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // the normal state diff --git a/Source/mhd/Castro_mhd.cpp b/Source/mhd/Castro_mhd.cpp index a77a1b054d..94d7839d53 100644 --- a/Source/mhd/Castro_mhd.cpp +++ b/Source/mhd/Castro_mhd.cpp @@ -183,7 +183,7 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) q_arr, qaux_arr); amrex::ParallelFor(bx_gc, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { hydro::src_to_prim(i, j, k, dt, u_arr, q_arr, old_src_arr, src_corr_arr, src_q_arr); }); @@ -203,7 +203,7 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) if (use_flattening == 0) { amrex::ParallelFor(bxi, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { flatn_arr(i,j,k) = 1.0; }); @@ -214,7 +214,7 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) uflatten(bxi, q_arr, flatg_arr, QPTOT); amrex::ParallelFor(bxi, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { flatn_arr(i,j,k) = flatn_arr(i,j,k) * flatg_arr(i,j,k); }); @@ -485,19 +485,19 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) // eq. 42 and 43 amrex::ParallelFor(ccbx, NUM_STATE+3, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { flxx1D_arr(i,j,k,n) = 0.5_rt * (flx_xy_arr(i,j,k,n) + flx_xz_arr(i,j,k,n)); }); amrex::ParallelFor(ccby, NUM_STATE+3, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { flxy1D_arr(i,j,k,n) = 0.5_rt * (flx_yx_arr(i,j,k,n) + flx_yz_arr(i,j,k,n)); }); amrex::ParallelFor(ccbz, NUM_STATE+3, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { flxz1D_arr(i,j,k,n) = 0.5_rt * (flx_zx_arr(i,j,k,n) + flx_zy_arr(i,j,k,n)); }); @@ -624,7 +624,7 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) // Zero out shock and temp fluxes -- these are physically meaningless here amrex::ParallelFor(nbox, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { flux_arr(i,j,k,UTEMP) = 0.e0; #ifdef SHOCK_VAR @@ -648,7 +648,7 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) Real dtdx = dt / dx[0]; amrex::ParallelFor(nbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Bxo_arr(i,j,k) = Bx_arr(i,j,k) + dtdx * ((Ey_arr(i,j,k+1) - Ey_arr(i,j,k)) - (Ez_arr(i,j+1,k) - Ez_arr(i,j,k))); @@ -661,7 +661,7 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) #endif amrex::ParallelFor(nby, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Byo_arr(i,j,k) = By_arr(i,j,k) + dtdx * ((Ez_arr(i+1,j,k) - Ez_arr(i,j,k)) - (Ex_arr(i,j,k+1) - Ex_arr(i,j,k))); @@ -674,7 +674,7 @@ Castro::construct_ctu_mhd_source(Real time, Real dt) #endif amrex::ParallelFor(nbz, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Bzo_arr(i,j,k) = Bz_arr(i,j,k) + dtdx * ((Ex_arr(i,j+1,k) - Ex_arr(i,j,k)) - (Ey_arr(i+1,j,k) - Ey_arr(i,j,k))); diff --git a/Source/mhd/ct_upwind.cpp b/Source/mhd/ct_upwind.cpp index 8480ef41f0..3df301edf0 100644 --- a/Source/mhd/ct_upwind.cpp +++ b/Source/mhd/ct_upwind.cpp @@ -89,7 +89,7 @@ Castro::corner_couple(const Box& bx, int UMAGD3 = UMAGX + d3; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // first the conserved state @@ -166,7 +166,7 @@ Castro::corner_couple(const Box& bx, ell[d1] -= 1; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // conservative state @@ -293,7 +293,7 @@ Castro::half_step(const Box& bx, amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // first the conservative state @@ -382,7 +382,7 @@ Castro::half_step(const Box& bx, amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // left interface (e.g., U_{i+1/2,j,k,L} or the "+" state in MM notation) diff --git a/Source/mhd/electric.cpp b/Source/mhd/electric.cpp index 6eba8a94d0..f6ef7f1d97 100644 --- a/Source/mhd/electric.cpp +++ b/Source/mhd/electric.cpp @@ -15,7 +15,7 @@ Castro::electric_edge_x(const Box& bx, // Compute Ex on an edge. This will compute Ex(i, j-1/2, k-1/2) amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real q_zone[NQ]; @@ -174,7 +174,7 @@ Castro::electric_edge_y(const Box& bx, // Compute Ey on an edge. This will compute Ey(i-1/2, j, k-1/2) amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real q_zone[NQ]; @@ -329,7 +329,7 @@ Castro::electric_edge_z(const Box& bx, // Compute Ez on an edge. This will compute Ez(i-1/2, j-1/2, k) amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real q_zone[NQ]; diff --git a/Source/mhd/hlld.cpp b/Source/mhd/hlld.cpp index a80871b4f6..1589f89f08 100644 --- a/Source/mhd/hlld.cpp +++ b/Source/mhd/hlld.cpp @@ -71,7 +71,7 @@ Castro::hlld(const Box& bx, } amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // this is a loop over interfaces, so, e.g., for idir = 0 (x), we are seeing diff --git a/Source/mhd/mhd_plm.cpp b/Source/mhd/mhd_plm.cpp index 059a13fb5c..f04145f746 100644 --- a/Source/mhd/mhd_plm.cpp +++ b/Source/mhd/mhd_plm.cpp @@ -36,7 +36,7 @@ Castro::plm(const Box& bx, Real dtdx = dt/dx[idir]; amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { bool lo_bc_test = lo_symm && ((idir == 0 && i == domlo[0]) || diff --git a/Source/mhd/mhd_ppm.cpp b/Source/mhd/mhd_ppm.cpp index 55f28a1063..1b31f1fa54 100644 --- a/Source/mhd/mhd_ppm.cpp +++ b/Source/mhd/mhd_ppm.cpp @@ -71,7 +71,7 @@ Castro::ppm_mhd(const Box& bx, amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // compute the eigenvectors and eigenvalues for this coordinate direction diff --git a/Source/mhd/mhd_util.cpp b/Source/mhd/mhd_util.cpp index a1788ccfe2..9c1591eefe 100644 --- a/Source/mhd/mhd_util.cpp +++ b/Source/mhd/mhd_util.cpp @@ -28,7 +28,7 @@ Castro::consup_mhd(const Box& bx, const Real dt, #endif amrex::ParallelFor(bx, NUM_STATE, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k, int n) + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { if (n == UTEMP) { @@ -60,7 +60,7 @@ Castro::PrimToCons(const Box& bx, // calculate the conserved variables from the primitive amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { u_arr(i,j,k,URHO) = q_arr(i,j,k,QRHO); @@ -121,7 +121,7 @@ Castro::prim_half(const Box& bx, auto dx = geom.CellSizeArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real divF[NUM_STATE+3]; diff --git a/Source/problems/Castro_bc_fill_nd.cpp b/Source/problems/Castro_bc_fill_nd.cpp index d254911f24..f8ed08ed75 100644 --- a/Source/problems/Castro_bc_fill_nd.cpp +++ b/Source/problems/Castro_bc_fill_nd.cpp @@ -117,7 +117,7 @@ void ca_statefill(Box const& bx, FArrayBox& data, const auto geomdata = geom.data(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { problem_bc_fill(i, j, k, state, time, bcs, geomdata); }); diff --git a/Source/problems/Castro_problem_source.cpp b/Source/problems/Castro_problem_source.cpp index cece897e4a..0001c3578e 100644 --- a/Source/problems/Castro_problem_source.cpp +++ b/Source/problems/Castro_problem_source.cpp @@ -134,7 +134,7 @@ Castro::fill_ext_source (const Real time, const Real dt, Array4 const src = ext_src.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { problem_source(i, j, k, geomdata, snew, src, dt, time); }); diff --git a/Source/problems/ambient_fill.cpp b/Source/problems/ambient_fill.cpp index e618c6095a..14f30ab302 100644 --- a/Source/problems/ambient_fill.cpp +++ b/Source/problems/ambient_fill.cpp @@ -14,7 +14,7 @@ ambient_denfill(const Box& bx, Array4 const& state, const auto domhi = geom.Domain().hiVect3d(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { bool ambient_x_lo = (castro::ambient_fill_dir == 0 || castro::ambient_fill_dir == -1) && (bc.lo(0) == FOEXTRAP || bc.lo(0) == HOEXTRAP); @@ -73,7 +73,7 @@ ambient_fill(const Box& bx, Array4 const& state, const auto domhi = geom.Domain().hiVect3d(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { bool ambient_x_lo = (castro::ambient_fill_dir == 0 || castro::ambient_fill_dir == -1) && (bcs(URHO).lo(0) == FOEXTRAP || bcs(URHO).lo(0) == HOEXTRAP); diff --git a/Source/problems/hse_fill.cpp b/Source/problems/hse_fill.cpp index cbff091621..6d210fdd34 100644 --- a/Source/problems/hse_fill.cpp +++ b/Source/problems/hse_fill.cpp @@ -48,7 +48,7 @@ hse_fill(const Box& bx, Array4 const& adv, IntVect(D_DECL(domlo[0]-1, hi[1], hi[2]))); amrex::ParallelFor(gbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { amrex::ignore_unused(i); @@ -244,7 +244,7 @@ hse_fill(const Box& bx, Array4 const& adv, IntVect(D_DECL(domhi[0]+1, hi[1], hi[2]))); amrex::ParallelFor(gbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { amrex::ignore_unused(i); @@ -440,7 +440,7 @@ hse_fill(const Box& bx, Array4 const& adv, IntVect(D_DECL(hi[0], domlo[1]-1, hi[2]))); amrex::ParallelFor(gbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { amrex::ignore_unused(j); @@ -634,7 +634,7 @@ hse_fill(const Box& bx, Array4 const& adv, IntVect(D_DECL(hi[0], domhi[1]+1, hi[2]))); amrex::ParallelFor(gbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { amrex::ignore_unused(j); @@ -829,7 +829,7 @@ hse_fill(const Box& bx, Array4 const& adv, IntVect(D_DECL(hi[0], hi[1], domlo[2]-1))); amrex::ParallelFor(gbx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { amrex::ignore_unused(k); diff --git a/Source/rotation/Rotation.cpp b/Source/rotation/Rotation.cpp index d89fe13490..4966950201 100644 --- a/Source/rotation/Rotation.cpp +++ b/Source/rotation/Rotation.cpp @@ -27,7 +27,7 @@ Castro::fill_rotational_psi(const Box& bx, auto dx = geom.CellSizeArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { GpuArray r; diff --git a/Source/rotation/rotation_sources.cpp b/Source/rotation/rotation_sources.cpp index fc6410febc..f3af08546d 100644 --- a/Source/rotation/rotation_sources.cpp +++ b/Source/rotation/rotation_sources.cpp @@ -15,7 +15,7 @@ Castro::rsrc(const Box& bx, GeometryData geomdata = geom.data(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real Sr[3] = {}; @@ -227,7 +227,7 @@ Castro::corrrsrc(const Box& bx, } amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real Sr_old[3] = {}; diff --git a/Source/scf/scf_relax.cpp b/Source/scf/scf_relax.cpp index 8d0ff9f792..b45c9a8d3f 100644 --- a/Source/scf/scf_relax.cpp +++ b/Source/scf/scf_relax.cpp @@ -450,7 +450,7 @@ Castro::do_hscf_solve() auto phi_arr = (*phi[lev])[mfi].array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // The Bernoulli equation says that energy is conserved: // enthalpy + gravitational potential + rotational potential = const diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp index cfd7d1b27c..5ef99ed6f7 100644 --- a/Source/sdc/Castro_sdc.cpp +++ b/Source/sdc/Castro_sdc.cpp @@ -189,7 +189,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) auto C_arr = C2.array(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) noexcept + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { sdc_update_o2(i, j, k, k_m, k_n, A_m, A_n, C_arr, dt_m, sdc_iteration, m_start); }); @@ -215,7 +215,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // sometimes the Laplacian can make the species go negative near discontinuities amrex::ParallelFor(bx1, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) noexcept + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { normalize_species_sdc(i, j, k, U_center_arr); }); @@ -240,7 +240,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) make_cell_center(bx1, Sburn.array(mfi), U_new_center_arr, domain_lo, domain_hi); amrex::ParallelFor(bx1, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) noexcept + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { sdc_update_centers_o4(i, j, k, U_center_arr, U_new_center_arr, C_center_arr, dt_m, sdc_iteration); }); @@ -255,7 +255,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // BL_TO_FORTRAN_3D(U_new_center), // BL_TO_FORTRAN_3D(R_new)); amrex::ParallelFor(bx1, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) noexcept + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { instantaneous_react(i, j, k, U_new_center_arr, R_new_arr); }); @@ -373,7 +373,7 @@ Castro::construct_old_react_source(MultiFab& U_state, // BL_TO_FORTRAN_3D(U_center), // BL_TO_FORTRAN_3D(R_center)); amrex::ParallelFor(obx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) noexcept + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { instantaneous_react(i, j, k, U_center_arr, R_center_arr); }); @@ -413,7 +413,7 @@ Castro::construct_old_react_source(MultiFab& U_state, // BL_TO_FORTRAN_3D(U_state[mfi]), // BL_TO_FORTRAN_3D(R_source[mfi])); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) noexcept + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { instantaneous_react(i, j, k, U_state_arr, R_source_arr); }); diff --git a/Source/sources/Castro_geom.cpp b/Source/sources/Castro_geom.cpp index c34492cf1d..2340e93dc3 100644 --- a/Source/sources/Castro_geom.cpp +++ b/Source/sources/Castro_geom.cpp @@ -134,7 +134,7 @@ Castro::fill_geom_source ([[maybe_unused]] Real time, [[maybe_unused]] Real dt, Array4 const src = geom_src.array(mfi); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // radius for non-Cartesian diff --git a/Source/sources/Castro_sponge.cpp b/Source/sources/Castro_sponge.cpp index efd66150cc..8a79f0eea5 100644 --- a/Source/sources/Castro_sponge.cpp +++ b/Source/sources/Castro_sponge.cpp @@ -84,7 +84,7 @@ Castro::apply_sponge(const Box& bx, auto problo = geom.ProbLoArray(); amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real src[NSRC] = {0.0}; diff --git a/Source/sources/Castro_thermo.cpp b/Source/sources/Castro_thermo.cpp index 2a941678f3..a81a42e18f 100644 --- a/Source/sources/Castro_thermo.cpp +++ b/Source/sources/Castro_thermo.cpp @@ -145,7 +145,7 @@ Castro::fill_thermo_source (MultiFab& state_in, MultiFab& thermo_src) amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // radius for non-Cartesian