Skip to content

Commit

Permalink
Merge pull request #6 from E3SM-Project/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
ambrad authored Nov 14, 2018
2 parents b00f916 + 83cbb95 commit 1d81b05
Show file tree
Hide file tree
Showing 15 changed files with 312 additions and 189 deletions.
39 changes: 29 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ function (prc var)
message ("${var}: ${${var}}")
endfunction ()

find_package (MPI REQUIRED)
set (COMPOSE_DEBUG_MPI FALSE CACHE LOGICAL "If true, insert debugging code into MPI wrappers.")

if (Kokkos_DIR)
include (${Kokkos_DIR}/kokkos.cmake)
include (${Kokkos_DIR}/kokkos_generated_settings.cmake)
set (Kokkos_INCLUDE ${Kokkos_DIR}/include)
else ()
message (FATAL_ERROR "COMPOSE requires Kokkos_DIR")
Expand Down Expand Up @@ -47,23 +47,42 @@ set (HEADERS
siqk/siqk_intersect.hpp
siqk/siqk_quadrature.hpp
siqk/siqk_search.hpp
siqk/siqk_sqr.hpp)
siqk/siqk_sqr.hpp
share/compose_config.hpp)

if (NOT COMPOSE_TEST_MPIRUN)
set (COMPOSE_TEST_MPIRUN mpirun)
set (COMPOSE_TEST_MPIFLAGS)
endif ()
if (NOT COMPOSE_TEST_NRANK)
set (COMPOSE_TEST_NRANK 8)
endif ()

set (COMPOSE_COMPILE_FLAGS "${MPI_COMPILE_FLAGS} ${KOKKOS_CXXFLAGS} ${CMAKE_CXX_FLAGS}")
set (COMPOSE_LINK_FLAGS "${MPI_LINK_FLAGS} ${KOKKOS_LDFLAGS}")
set (COMPOSE_INCLUDES "${Kokkos_INCLUDE}")
set (COMPOSE_LIBRARIES ${MPI_LIBRARIES} ${KOKKOS_LIBS})
string (TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_str)
set (DEBUG_BUILD FALSE)
if ("${cmake_build_type_str}" STREQUAL "debug")
set (DEBUG_BUILD TRUE)
endif ()

if (DEBUG_BUILD)
message ("Enable COMPOSE_DEBUG_MPI because build type is DEBUG.")
set (COMPOSE_DEBUG_MPI TRUE)
endif ()

set (COMPOSE_COMPILE_FLAGS "-g ${KOKKOS_LINK_FLAGS} ${CMAKE_CXX_FLAGS}")
set (COMPOSE_LINK_FLAGS "-L${Kokkos_DIR}/lib ${KOKKOS_LINK_FLAGS}")
set (COMPOSE_INCLUDES ${Kokkos_INCLUDE} ${CMAKE_CURRENT_SOURCE_DIR}/share ${CMAKE_BINARY_DIR}/config)
set (COMPOSE_LIBRARIES ${KOKKOS_LIBS_LIST})

prc (COMPOSE_COMPILE_FLAGS)
prc (COMPOSE_LINK_FLAGS)
prc (COMPOSE_INCLUDES)
prc (COMPOSE_LIBRARIES)
prc (COMPOSE_DEBUG_MPI)

add_definitions (-DCOMPOSE_CONFIG_IS_CMAKE)
configure_file (${CMAKE_CURRENT_SOURCE_DIR}/compose_config.h.in ${CMAKE_BINARY_DIR}/config/compose_config.h)

prc(MPI_COMPILE_FLAGS)
prc(MPI_LINK_FLAGS)
prc(MPI_LIBRARIES)
add_library (${PROJECT_NAME} ${SOURCES})
set_target_properties (${PROJECT_NAME} PROPERTIES
COMPILE_FLAGS ${COMPOSE_COMPILE_FLAGS}
Expand Down
2 changes: 1 addition & 1 deletion cedr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ target_link_libraries (cedr_test ${PROJECT_NAME} ${COMPOSE_LIBRARIES})
add_test (NAME cedr-test-unit
COMMAND $<TARGET_FILE:cedr_test> -t)
add_test (NAME cedr-test-unit-mpi
COMMAND ${COMPOSE_TEST_MPIRUN} -np ${COMPOSE_TEST_NRANK}
COMMAND ${COMPOSE_TEST_MPIRUN} ${COMPOSE_TEST_MPIFLAGS} -np ${COMPOSE_TEST_NRANK}
$<TARGET_FILE:cedr_test> -t --proc-random -nc 111 -nt 11)
add_test (NAME cedr-test-t1d
COMMAND $<TARGET_FILE:cedr_test> -t -t1d -nc 111)
77 changes: 58 additions & 19 deletions cedr/cedr_caas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ namespace cedr {
namespace caas {

template <typename ES>
CAAS<ES>::CAAS (const mpi::Parallel::Ptr& p, const Int nlclcells)
: p_(p), nlclcells_(nlclcells), nrhomidxs_(0), need_conserve_(false)
CAAS<ES>::CAAS (const mpi::Parallel::Ptr& p, const Int nlclcells,
const typename UserAllReducer::Ptr& uar)
: p_(p), user_reducer_(uar), nlclcells_(nlclcells), nrhomidxs_(0),
need_conserve_(false)
{
cedr_throw_if(nlclcells == 0, "CAAS does not support 0 cells on a rank.");
tracer_decls_ = std::make_shared<std::vector<Decl> >();
Expand Down Expand Up @@ -40,7 +42,7 @@ void CAAS<ES>::end_tracer_declarations () {
d_ = RealList("CAAS data", nlclcells_ * ((3+e)*probs_.size() + 1));
const auto nslots = 4*probs_.size();
// (e'Qm_clip, e'Qm, e'Qm_min, e'Qm_max, [e'Qm_prev])
send_ = RealList("CAAS send", nslots);
send_ = RealList("CAAS send", nslots*(user_reducer_ ? nlclcells_ : 1));
recv_ = RealList("CAAS recv", nslots);
}

Expand All @@ -57,6 +59,7 @@ Int CAAS<ES>::get_num_tracers () const {

template <typename ES>
void CAAS<ES>::reduce_locally () {
const bool user_reduces = user_reducer_ != nullptr;
const Int nt = probs_.size();
Int k = 0;
Int os = nlclcells_;
Expand All @@ -65,33 +68,47 @@ void CAAS<ES>::reduce_locally () {
Real Qm_sum = 0, Qm_clip_sum = 0;
for (Int i = 0; i < nlclcells_; ++i) {
const Real Qm = d_(os+i);
Qm_sum += (probs_(k) & ProblemType::conserve ?
d_(os + nlclcells_*3*nt + i) /* Qm_prev */ :
Qm);
const Real Qm_term = (probs_(k) & ProblemType::conserve ?
d_(os + nlclcells_*3*nt + i) /* Qm_prev */ :
Qm);
const Real Qm_min = d_(os + nlclcells_* nt + i);
const Real Qm_max = d_(os + nlclcells_*2*nt + i);
const Real Qm_clip = cedr::impl::min(Qm_max, cedr::impl::max(Qm_min, Qm));
Qm_clip_sum += Qm_clip;
d_(os+i) = Qm_clip;
if (user_reduces) {
send_(nlclcells_* k + i) = Qm_clip;
send_(nlclcells_*(nt + k) + i) = Qm_term;
} else {
Qm_clip_sum += Qm_clip;
Qm_sum += Qm_term;
}
}
if ( ! user_reduces) {
send_( k) = Qm_clip_sum;
send_(nt + k) = Qm_sum;
}
send_( k) = Qm_clip_sum;
send_(nt + k) = Qm_sum;
os += nlclcells_;
}
k += nt;
// Qm_min, Qm_max
for ( ; k < 4*nt; ++k) {
Real accum = 0;
for (Int i = 0; i < nlclcells_; ++i)
accum += d_(os+i);
send_(k) = accum;
if (user_reduces) {
for (Int i = 0; i < nlclcells_; ++i)
send_(nlclcells_*k + i) = d_(os+i);
} else {
Real accum = 0;
for (Int i = 0; i < nlclcells_; ++i)
accum += d_(os+i);
send_(k) = accum;
}
os += nlclcells_;
}
}

template <typename ES>
void CAAS<ES>::reduce_globally () {
int err = mpi::all_reduce(*p_, send_.data(), recv_.data(), send_.size(), MPI_SUM);
const int err = mpi::all_reduce(*p_, send_.data(), recv_.data(),
send_.size(), MPI_SUM);
cedr_throw_if(err != MPI_SUCCESS,
"CAAS::reduce_globally MPI_Allreduce returned " << err);
}
Expand All @@ -110,7 +127,7 @@ void CAAS<ES>::finish_locally () {
if (fac > 0) {
fac = m/fac;
for (Int i = 0; i < nlclcells_; ++i) {
const Real Qm_min = d_(os + nlclcells_* nt + i);
const Real Qm_min = d_(os + nlclcells_ * nt + i);
Real& Qm = d_(os+i);
Qm += fac*(Qm - Qm_min);
}
Expand All @@ -134,23 +151,44 @@ void CAAS<ES>::finish_locally () {
template <typename ES>
void CAAS<ES>::run () {
reduce_locally();
reduce_globally();
if (user_reducer_)
(*user_reducer_)(*p_, send_.data(), recv_.data(),
nlclcells_, recv_.size(), MPI_SUM);
else
reduce_globally();
finish_locally();
}

namespace test {
struct TestCAAS : public cedr::test::TestRandomized {
typedef CAAS<Kokkos::DefaultExecutionSpace> CAAST;

TestCAAS (const mpi::Parallel::Ptr& p, const Int& ncells, const bool verbose)
struct TestAllReducer : public CAAST::UserAllReducer {
int operator() (const mpi::Parallel& p, Real* sendbuf, Real* rcvbuf,
int nlcl, int count, MPI_Op op) const override {
for (int i = 1; i < nlcl; ++i)
sendbuf[0] += sendbuf[i];
for (int k = 1; k < count; ++k) {
sendbuf[k] = sendbuf[nlcl*k];
for (int i = 1; i < nlcl; ++i)
sendbuf[k] += sendbuf[nlcl*k + i];
}
return mpi::all_reduce(p, sendbuf, rcvbuf, count, op);
}
};

TestCAAS (const mpi::Parallel::Ptr& p, const Int& ncells,
const bool use_own_reducer, const bool verbose)
: TestRandomized("CAAS", p, ncells, verbose),
p_(p)
{
const auto np = p->size(), rank = p->rank();
nlclcells_ = ncells / np;
const Int todo = ncells - nlclcells_ * np;
if (rank < todo) ++nlclcells_;
caas_ = std::make_shared<CAAST>(p, nlclcells_);
caas_ = std::make_shared<CAAST>(
p, nlclcells_,
use_own_reducer ? std::make_shared<TestAllReducer>() : nullptr);
init();
}

Expand Down Expand Up @@ -205,7 +243,8 @@ Int unittest (const mpi::Parallel::Ptr& p) {
for (Int nlclcells : {1, 2, 4, 11}) {
Long ncells = np*nlclcells;
if (ncells > np) ncells -= np/2;
nerr += TestCAAS(p, ncells, false).run(1, false);
nerr += TestCAAS(p, ncells, false, false).run(1, false);
nerr += TestCAAS(p, ncells, true, false).run(1, false);
}
return nerr;
}
Expand Down
24 changes: 20 additions & 4 deletions cedr/cedr_caas.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,22 @@ class CAAS : public CDR {
typedef std::shared_ptr<Me> Ptr;

public:
CAAS(const mpi::Parallel::Ptr& p, const Int nlclcells);
struct UserAllReducer {
typedef std::shared_ptr<const UserAllReducer> Ptr;
virtual int operator()(const mpi::Parallel& p,
// In Fortran, these are formatted as
// sendbuf(nlocal, nfld)
// rcvbuf(nfld)
// The implementation is permitted to modify sendbuf.
Real* sendbuf, Real* rcvbuf,
// nlocal is number of values to reduce in this rank.
// nfld is number of fields.
int nlocal, int nfld,
MPI_Op op) const = 0;
};

CAAS(const mpi::Parallel::Ptr& p, const Int nlclcells,
const typename UserAllReducer::Ptr& r = nullptr);

void declare_tracer(int problem_type, const Int& rhomidx) override;

Expand All @@ -32,14 +47,14 @@ class CAAS : public CDR {
KOKKOS_INLINE_FUNCTION
void set_Qm(const Int& lclcellidx, const Int& tracer_idx,
const Real& Qm, const Real& Qm_min, const Real& Qm_max,
const Real Qm_prev = -1) override;
const Real Qm_prev = std::numeric_limits<Real>::infinity()) override;

void run() override;

KOKKOS_INLINE_FUNCTION
Real get_Qm(const Int& lclcellidx, const Int& tracer_idx) override;

private:
protected:
typedef Kokkos::View<Real*, Kokkos::LayoutLeft, Device> RealList;
typedef cedr::impl::Unmanaged<RealList> UnmanagedRealList;
typedef Kokkos::View<Int*, Kokkos::LayoutLeft, Device> IntList;
Expand All @@ -52,7 +67,8 @@ class CAAS : public CDR {
};

mpi::Parallel::Ptr p_;

typename UserAllReducer::Ptr user_reducer_;

Int nlclcells_, nrhomidxs_;
std::shared_ptr<std::vector<Decl> > tracer_decls_;
bool need_conserve_;
Expand Down
2 changes: 1 addition & 1 deletion cedr/cedr_cdr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct CDR {
const Real& Qm_min, const Real& Qm_max,
// If mass conservation is requested, provide the previous Qm, which will be
// summed to give the desired global mass.
const Real Qm_prev = -1) = 0;
const Real Qm_prev = std::numeric_limits<Real>::infinity()) = 0;

// Run the QLT algorithm with the values set by set_{rho,Q}. It is an error to
// call this function from a parallel region.
Expand Down
55 changes: 49 additions & 6 deletions cedr/cedr_mpi.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "cedr_mpi.hpp"
#include "cedr_util.hpp"

namespace cedr {
namespace mpi {
Expand All @@ -19,16 +20,58 @@ Int Parallel::rank () const {
return pid;
}

#ifdef COMPOSE_DEBUG_MPI
Request::Request () : unfreed(0) {}
Request::~Request () {
if (unfreed) {
std::stringstream ss;
ss << "Request is being deleted with unfreed = " << unfreed;
int fin;
MPI_Finalized(&fin);
if (fin) {
ss << "\n";
std::cerr << ss.str();
} else {
pr(ss.str());
}
}
}
#endif

template <> MPI_Datatype get_type<int>() { return MPI_INT; }
template <> MPI_Datatype get_type<double>() { return MPI_DOUBLE; }
template <> MPI_Datatype get_type<long>() { return MPI_LONG_INT; }

int waitany (int count, MPI_Request* reqs, int* index, MPI_Status* stats) {
return MPI_Waitany(count, reqs, index, stats ? stats : MPI_STATUS_IGNORE);
int waitany (int count, Request* reqs, int* index, MPI_Status* stats) {
#ifdef COMPOSE_DEBUG_MPI
std::vector<MPI_Request> vreqs(count);
for (int i = 0; i < count; ++i) vreqs[i] = reqs[i].request;
const auto out = MPI_Waitany(count, vreqs.data(), index,
stats ? stats : MPI_STATUS_IGNORE);
for (int i = 0; i < count; ++i) reqs[i].request = vreqs[i];
reqs[*index].unfreed--;
return out;
#else
return MPI_Waitany(count, reinterpret_cast<MPI_Request*>(reqs), index,
stats ? stats : MPI_STATUS_IGNORE);
#endif
}

int waitall (int count, MPI_Request* reqs, MPI_Status* stats) {
return MPI_Waitall(count, reqs, stats ? stats : MPI_STATUS_IGNORE);
int waitall (int count, Request* reqs, MPI_Status* stats) {
#ifdef COMPOSE_DEBUG_MPI
std::vector<MPI_Request> vreqs(count);
for (int i = 0; i < count; ++i) vreqs[i] = reqs[i].request;
const auto out = MPI_Waitall(count, vreqs.data(),
stats ? stats : MPI_STATUS_IGNORE);
for (int i = 0; i < count; ++i) {
reqs[i].request = vreqs[i];
reqs[i].unfreed--;
}
return out;
#else
return MPI_Waitall(count, reinterpret_cast<MPI_Request*>(reqs),
stats ? stats : MPI_STATUS_IGNORE);
#endif
}

bool all_ok (const Parallel& p, bool im_ok) {
Expand All @@ -37,5 +80,5 @@ bool all_ok (const Parallel& p, bool im_ok) {
return static_cast<bool>(msg);
}

}
}
} // namespace mpi
} // namespace cedr
Loading

0 comments on commit 1d81b05

Please sign in to comment.