Skip to content

Commit

Permalink
Merge branch 'main' into development
Browse files Browse the repository at this point in the history
  • Loading branch information
zingale committed Feb 10, 2024
2 parents 744521b + 1f3bfff commit 80422fc
Showing 1 changed file with 55 additions and 30 deletions.
85 changes: 55 additions & 30 deletions Source/reactions/Castro_react.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,14 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra
MultiFab tmp_mask_mf;
const MultiFab& mask_mf = mask_covered_zones ? getLevel(level+1).build_fine_mask() : tmp_mask_mf;

ReduceOps<ReduceOpSum> reduce_op;
ReduceData<Real> reduce_data(reduce_op);
using ReduceTuple = typename decltype(reduce_data)::Type;
#if defined(AMREX_USE_GPU)
Gpu::Buffer<int> d_num_failed({0});
auto* p_num_failed = d_num_failed.data();
#endif
int num_failed = 0;

#ifdef _OPENMP
#pragma omp parallel
#pragma omp parallel reduction(+:num_failed)
#endif
for (MFIter mfi(s, TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
Expand All @@ -208,8 +210,11 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra
const auto problo = geom.ProbLoArray();
#endif

reduce_op.eval(bx, reduce_data,
[=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple
#if defined(AMREX_USE_GPU)
ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k)
#else
LoopOnCpu(bx, [&] (int i, int j, int k) mutable
#endif
{

burn_t burn_state;
Expand All @@ -230,7 +235,7 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra

bool do_burn = true;
burn_state.success = true;
Real burn_failed = 0.0_rt;
int burn_failed = 0;

// Don't burn on zones inside shock regions, if the relevant option is set.

Expand Down Expand Up @@ -329,7 +334,7 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra
// If we were unsuccessful, update the failure count.

if (!burn_state.success) {
burn_failed = 1.0_rt;
burn_failed = 1;
}

// Add burning rates to reactions MultiFab, but be
Expand Down Expand Up @@ -399,19 +404,25 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra

}


return {burn_failed};

#if defined(AMREX_USE_GPU)
if (burn_failed) {
Gpu::Atomic::Add(p_num_failed, burn_failed);
}
#else
num_failed += burn_failed;
#endif
});

#if defined(AMREX_USE_HIP)
Gpu::streamSynchronize(); // otherwise HIP may faile to allocate the necessary resources.
#endif
}

ReduceTuple hv = reduce_data.value();
Real burn_failed = amrex::get<0>(hv);
#if defined(AMREX_USE_GPU)
num_failed = *(d_num_failed.copyToHost());
#endif

if (burn_failed != 0.0) {
burn_success = 0;
}
burn_success = !num_failed;

ParallelDescriptor::ReduceIntMin(burn_success);

Expand Down Expand Up @@ -516,11 +527,13 @@ Castro::react_state(Real time, Real dt)

int burn_success = 1;

ReduceOps<ReduceOpSum> reduce_op;
ReduceData<Real> reduce_data(reduce_op);

using ReduceTuple = typename decltype(reduce_data)::Type;
#if defined(AMREX_USE_GPU)
Gpu::Buffer<int> d_num_failed({0});
auto* p_num_failed = d_num_failed.data();
#endif
int num_failed = 0;

// why no omp here?
for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
const Box& bx = mfi.growntilebox(ng);
Expand All @@ -542,8 +555,11 @@ Castro::react_state(Real time, Real dt)
const auto dx = geom.CellSizeArray();
const auto problo = geom.ProbLoArray();

reduce_op.eval(bx, reduce_data,
[=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple
#if defined(AMREX_USE_GPU)
ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k)
#else
LoopOnCpu(bx, [&] (int i, int j, int k) mutable
#endif
{
burn_t burn_state;

Expand All @@ -563,7 +579,7 @@ Castro::react_state(Real time, Real dt)

bool do_burn = true;
burn_state.success = true;
Real burn_failed = 0.0_rt;
int burn_failed = 0;

// Don't burn on zones inside shock regions, if the
// relevant option is set.
Expand Down Expand Up @@ -687,7 +703,7 @@ Castro::react_state(Real time, Real dt)
// If we were unsuccessful, update the failure count.

if (!burn_state.success) {
burn_failed = 1.0_rt;
burn_failed = 1;
}

// update the state data.
Expand Down Expand Up @@ -780,16 +796,25 @@ Castro::react_state(Real time, Real dt)
}
}

return {burn_failed};
#if defined(AMREX_USE_GPU)
if (burn_failed) {
Gpu::Atomic::Add(p_num_failed, burn_failed);
}
#else
num_failed += burn_failed;
#endif
});

#if defined(AMREX_USE_HIP)
Gpu::streamSynchronize(); // otherwise HIP may faile to allocate the necessary resources.
#endif
}

ReduceTuple hv = reduce_data.value();
Real burn_failed = amrex::get<0>(hv);
#if defined(AMREX_USE_GPU)
num_failed = *(d_num_failed.copyToHost());
#endif

if (burn_failed != 0.0) {
burn_success = 0;
}
burn_success = !num_failed;

ParallelDescriptor::ReduceIntMin(burn_success);

Expand Down

0 comments on commit 80422fc

Please sign in to comment.