diff --git a/src/exchangeAMRDonors.C b/src/exchangeAMRDonors.C index e6a6af5..eba12c0 100644 --- a/src/exchangeAMRDonors.C +++ b/src/exchangeAMRDonors.C @@ -48,9 +48,6 @@ void tioga::exchangeAMRDonors(void) // // setup communicator for all to all now // since the receiver side is unknown - // FIXME: - // add sophisticated code later to fix the all_to_all - // using MPI-2 standard // pc_cart->getMap(&nsend_sav,&nrecv_sav,&sndMap,&rcvMap); sndMapAll=(int *)malloc(sizeof(int)*pc_cart->numprocs); @@ -158,6 +155,18 @@ void tioga::exchangeAMRDonors(void) } } } + + // Remove sends that won't be used. The first 2 ints are obdonors + // and obreceptors. If nothing else is being sent, then these won't + // be used so we can safely remove them and avoid unnecessary + // communication + for(int i=0;i +#include #include "parallelComm.h" #define REAL double @@ -26,16 +28,11 @@ void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack) { int i; int *sint,*sreal,*rint,*rreal; - int tag,irnum; - MPI_Request *request; - MPI_Status *status; // sint=(int *)malloc(sizeof(int)*numprocs); sreal=(int *) malloc(sizeof(int)*numprocs); rint=(int *)malloc(sizeof(int)*numprocs); rreal=(int *) malloc(sizeof(int)*numprocs); - request=(MPI_Request *) malloc(sizeof(MPI_Request)*4*numprocs); - status=(MPI_Status *) malloc(sizeof(MPI_Status)*4*numprocs); // for(i=0;i 0) { - tag=1; - rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); - MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints, - MPI_INT,i, - tag,scomm,&request[irnum++]); - } - if (rcvPack[i].nreals > 0) { - tag=2; - rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); - MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals, - MPI_DOUBLE,i, - tag,scomm,&request[irnum++]); - } + + int all_snd_nints = std::accumulate(sint, sint + numprocs, 0); + int all_rcv_nints = std::accumulate(rint, rint + numprocs, 0); + int *all_snd_intData, *all_rcv_intData; + all_snd_intData=(int *) malloc(sizeof(int)*all_snd_nints); + all_rcv_intData=(int *) malloc(sizeof(int)*all_rcv_nints); + std::vector snd_int_displs(numprocs+1, 0); + std::vector rcv_int_displs(numprocs+1, 0); + for (int i=1; i <= numprocs; i++) { + snd_int_displs[i] = snd_int_displs[i-1] + sint[i-1]; + rcv_int_displs[i] = rcv_int_displs[i-1] + rint[i-1]; + } + for (int i=0; i < numprocs; i++) { + int displ = snd_int_displs[i]; + for(int j=0; j < sint[i]; j++){ + all_snd_intData[displ+j] = sndPack[i].intData[j]; } - for(i=0;i 0){ - tag=1; - MPI_Isend(sndPack[i].intData,sndPack[i].nints, - MPI_INT,i, - tag,scomm,&request[irnum++]); - } - if (sndPack[i].nreals > 0){ - tag=2; - MPI_Isend(sndPack[i].realData,sndPack[i].nreals, - MPI_DOUBLE,i, - tag,scomm,&request[irnum++]); - } + } + MPI_Request int_request; + MPI_Ialltoallv(all_snd_intData, + sint, + snd_int_displs.data(), + MPI_INT, + all_rcv_intData, + rint, + rcv_int_displs.data(), + MPI_INT, + scomm, + &int_request); + + int all_snd_nreals = std::accumulate(sreal, sreal + numprocs, 0); + int all_rcv_nreals = std::accumulate(rreal, rreal + numprocs, 0); + REAL *all_snd_realData, *all_rcv_realData; + all_snd_realData=(REAL *) malloc(sizeof(REAL)*all_snd_nreals); + all_rcv_realData=(REAL *) malloc(sizeof(REAL)*all_rcv_nreals); + std::vector snd_real_displs(numprocs+1, 0); + std::vector rcv_real_displs(numprocs+1, 0); + for (int i=1; i <= numprocs; i++) { + snd_real_displs[i] = snd_real_displs[i-1] + sreal[i-1]; + rcv_real_displs[i] = rcv_real_displs[i-1] + rreal[i-1]; + } + for (int i=0; i < numprocs; i++) { + int displ = snd_real_displs[i]; + for(int j=0; j < sreal[i]; j++){ + all_snd_realData[displ+j] = sndPack[i].realData[j]; } - MPI_Waitall(irnum,request,status); - + } + MPI_Request real_request; + MPI_Ialltoallv(all_snd_realData, + sreal, + snd_real_displs.data(), + MPI_DOUBLE, + all_rcv_realData, + rreal, + rcv_real_displs.data(), + MPI_DOUBLE, + scomm, + &real_request); + + MPI_Wait(&int_request, MPI_STATUS_IGNORE); + for(i=0;i 0) { + rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); + } + if (rcvPack[i].nreals > 0) { + rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); + } + } + + MPI_Wait(&real_request, MPI_STATUS_IGNORE); + for (int i=0; i < numprocs; i++) { + int displ = rcv_int_displs[i]; + for(int j=0; j < rint[i]; j++){ + rcvPack[i].intData[j] = all_rcv_intData[displ+j]; + } + } + for (int i=0; i < numprocs; i++) { + int displ = rcv_real_displs[i]; + for(int j=0; j < rreal[i]; j++){ + rcvPack[i].realData[j] = all_rcv_realData[displ+j]; + } + } + + TIOGA_FREE(all_snd_intData); + TIOGA_FREE(all_rcv_intData); + TIOGA_FREE(all_snd_realData); + TIOGA_FREE(all_rcv_realData); TIOGA_FREE(sint); TIOGA_FREE(sreal); TIOGA_FREE(rint); TIOGA_FREE(rreal); - TIOGA_FREE(request); - TIOGA_FREE(status); } +// Old version that does not use ialltoallv +// void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack) +// { +// int i; +// int *sint,*sreal,*rint,*rreal; +// int tag,irnum; +// MPI_Request *request; +// MPI_Status *status; +// // +// sint=(int *)malloc(sizeof(int)*numprocs); +// sreal=(int *) malloc(sizeof(int)*numprocs); +// rint=(int *)malloc(sizeof(int)*numprocs); +// rreal=(int *) malloc(sizeof(int)*numprocs); +// request=(MPI_Request *) malloc(sizeof(MPI_Request)*4*numprocs); +// status=(MPI_Status *) malloc(sizeof(MPI_Status)*4*numprocs); +// // +// for(i=0;i 0) { +// tag=1; +// rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); +// MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints, +// MPI_INT,i, +// tag,scomm,&request[irnum++]); +// } +// if (rcvPack[i].nreals > 0) { +// tag=2; +// rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); +// MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals, +// MPI_DOUBLE,i, +// tag,scomm,&request[irnum++]); +// } +// } +// for(i=0;i 0){ +// tag=1; +// MPI_Isend(sndPack[i].intData,sndPack[i].nints, +// MPI_INT,i, +// tag,scomm,&request[irnum++]); +// } +// if (sndPack[i].nreals > 0){ +// tag=2; +// MPI_Isend(sndPack[i].realData,sndPack[i].nreals, +// MPI_DOUBLE,i, +// tag,scomm,&request[irnum++]); +// } +// } +// MPI_Waitall(irnum,request,status); + +// TIOGA_FREE(sint); +// TIOGA_FREE(sreal); +// TIOGA_FREE(rint); +// TIOGA_FREE(rreal); +// TIOGA_FREE(request); +// TIOGA_FREE(status); +// } + void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) { int i;