diff --git a/.gitignore b/.gitignore index 4de7fb76e..88b83a749 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,9 @@ CMakeFiles CMakeCache.txt cmake_install.cmake Makefile.local +git-state.txt +Kokkos_Version_info.* + # test artifacts test/**/*.o @@ -36,6 +39,8 @@ test/**/KokkosCore* test/**/*.csv test/**/*.pyc test/**/*.dat +test/**/cmake_packages* +test/**/ # machine specific cache and hidden files .* diff --git a/CMakeLists.txt b/CMakeLists.txt index 364ee4ef3..a7c856ddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,10 +99,7 @@ if(Idefix_MPI) add_compile_definitions("WITH_MPI") find_package(MPI REQUIRED) target_link_libraries(idefix MPI::MPI_CXX) - target_sources(idefix - PUBLIC src/mpi.cpp - PUBLIC src/mpi.hpp - ) + add_subdirectory(src/mpi) endif() if(Idefix_HDF5) @@ -234,6 +231,7 @@ target_include_directories(idefix PUBLIC src/gravity src/utils src/utils/iterativesolver + src/mpi src ) diff --git a/reference b/reference index c4082b99a..1028989b3 160000 --- a/reference +++ b/reference @@ -1 +1 @@ -Subproject commit c4082b99a4c542def3177c96cb35b1c9d9002f18 +Subproject commit 1028989b3847200da9b54ed74df0d5c0b0d13045 diff --git a/src/dataBlock/dataBlock.cpp b/src/dataBlock/dataBlock.cpp index 81874cef6..2877a8346 100644 --- a/src/dataBlock/dataBlock.cpp +++ b/src/dataBlock/dataBlock.cpp @@ -367,6 +367,7 @@ real DataBlock::ComputeTimestep() { void DataBlock::DeriveVectorPotential() { if constexpr(DefaultPhysics::mhd) { #ifdef EVOLVE_VECTOR_POTENTIAL + hydro->emf->EnforceVectorPotentialBoundary(hydro->Ve); hydro->emf->ComputeMagFieldFromA(hydro->Ve, hydro->Vs); #endif } diff --git a/src/dataBlock/fargo.cpp b/src/dataBlock/fargo.cpp index a509ef886..7aa2d26a5 100644 --- a/src/dataBlock/fargo.cpp +++ b/src/dataBlock/fargo.cpp @@ -138,10 +138,15 @@ Fargo::Fargo(Input &input, int nmax, DataBlock *data) { for(int i=0 ; i < nvar ; i++) { vars.push_back(i); } + #if GEOMETRY == CARTESIAN || GEOMETRY == POLAR + const int dirShift = JDIR; + #elif GEOMETRY == SPHERICAL + const int dirShift = KDIR; + #endif #if MHD == YES - this->mpi.Init(data->mygrid, vars, this->nghost.data(), data->np_int.data(), true); + this->mpiExchanger.Init(data->mygrid, dirShift, vars, this->nghost, data->np_int, true); #else - this->mpi.Init(data->mygrid, vars, this->nghost.data(), data->np_int.data()); + this->mpiExchanger.Init(data->mygrid, dirShift, vars, this->nghost, data->np_int, false); #endif } #endif diff --git a/src/dataBlock/fargo.hpp b/src/dataBlock/fargo.hpp index 099c99350..a1efcb2b6 100644 --- a/src/dataBlock/fargo.hpp +++ b/src/dataBlock/fargo.hpp @@ -11,7 +11,7 @@ #include #include "idefix.hpp" #ifdef WITH_MPI - #include "mpi.hpp" + #include "exchanger.hpp" #endif #include "physics.hpp" @@ -63,7 +63,7 @@ class Fargo { IdefixArray4D scrhVs; #ifdef WITH_MPI - Mpi mpi; // Fargo-specific MPI layer + Exchanger mpiExchanger; // Fargo-specific MPI layer #endif std::array beg; @@ -290,11 +290,7 @@ void Fargo::StoreToScratch(Fluid* hydro) { } #if WITH_MPI if(haveDomainDecomposition) { - #if GEOMETRY == CARTESIAN || GEOMETRY == POLAR - this->mpi.ExchangeX2(scrhUc, scrhVs); - #elif GEOMETRY == SPHERICAL - this->mpi.ExchangeX3(scrhUc, scrhVs); - #endif + this->mpiExchanger.Exchange(scrhUc, scrhVs); } #endif } diff --git a/src/fluid/boundary/axis.cpp b/src/fluid/boundary/axis.cpp index afdeb8655..47f0c2954 100644 --- a/src/fluid/boundary/axis.cpp +++ b/src/fluid/boundary/axis.cpp @@ -22,10 +22,9 @@ void Axis::ShowConfig() { } -void Axis::SymmetrizeEx1Side(int jref) { +void Axis::SymmetrizeEx1Side(int jref, IdefixArray3D Ex1) { #if DIMENSIONS == 3 - IdefixArray3D Ex1 = this->ex; IdefixArray1D Ex1Avg = this->Ex1Avg; idefix_for("Ex1_ini",0,data->np_tot[IDIR], @@ -63,9 +62,7 @@ void Axis::SymmetrizeEx1Side(int jref) { // Hence, we enforce a regularisation of Ex3 for consistancy. -void Axis::RegularizeEx3side(int jref) { - IdefixArray3D Ex3 = this->ez; - +void Axis::RegularizeEx3side(int jref, IdefixArray3D Ex3) { idefix_for("Ex3_Regularise",0,data->np_tot[KDIR],0,data->np_tot[IDIR], KOKKOS_LAMBDA(int k,int i) { Ex3(k,jref,i) = 0.0; @@ -133,18 +130,20 @@ void Axis::RegularizeCurrentSide(int side) { // Average the Emf component along the axis -void Axis::RegularizeEMFs() { +void Axis::RegularizeEMFs(IdefixArray3D ex, + IdefixArray3D ey, + IdefixArray3D ez) { idfx::pushRegion("Axis::RegularizeEMFs"); if(this->axisLeft) { int jref = data->beg[JDIR]; - SymmetrizeEx1Side(jref); - RegularizeEx3side(jref); + SymmetrizeEx1Side(jref, ex); + RegularizeEx3side(jref, ez); } if(this->axisRight) { int jref = data->end[JDIR]; - SymmetrizeEx1Side(jref); - RegularizeEx3side(jref); + SymmetrizeEx1Side(jref, ex); + RegularizeEx3side(jref, ez); } idfx::popRegion(); diff --git a/src/fluid/boundary/axis.hpp b/src/fluid/boundary/axis.hpp index b74697862..1a1227b61 100644 --- a/src/fluid/boundary/axis.hpp +++ b/src/fluid/boundary/axis.hpp @@ -26,15 +26,16 @@ class Axis { public: template explicit Axis(Boundary *); // Initialisation - void RegularizeEMFs(); // Regularize the EMF sitting on the axis + void RegularizeEMFs(IdefixArray3D, IdefixArray3D, IdefixArray3D); + // Regularize the EMF sitting on the axis void RegularizeCurrent(); // Regularize the currents along the axis void EnforceAxisBoundary(int side); // Enforce the boundary conditions (along X2) void RegularizeBX2s(); // Regularize BX2s on the axis void ShowConfig(); - - void SymmetrizeEx1Side(int); // Symmetrize on a specific side (internal method) - void RegularizeEx3side(int); // Regularize Ex3 along the axis (internal method) + // Internal methods + void SymmetrizeEx1Side(int, IdefixArray3D); // Symmetrize on a specific side + void RegularizeEx3side(int side, IdefixArray3D ex3); // Regularize Ex3 along the axis void RegularizeCurrentSide(int); // Regularize J along the axis (internal method) void FixBx2sAxis(int side); // Fix BX2s on the axis using the field around it (internal) void FixBx2sAxisGhostAverage(int side); //Fix BX2s on the axis using the average of neighbouring @@ -76,9 +77,6 @@ class Axis { IdefixArray1D symmetryVc; IdefixArray1D symmetryVs; - IdefixArray3D ex; - IdefixArray3D ey; - IdefixArray3D ez; IdefixArray4D J; IdefixArray4D Vc; @@ -94,11 +92,6 @@ Axis::Axis(Boundary *boundary) { Vc = boundary->Vc; Vs = boundary->Vs; J = boundary->fluid->J; - if constexpr(Phys::mhd) { - ex = boundary->fluid->emf->ex; - ey = boundary->fluid->emf->ey; - ez = boundary->fluid->emf->ez; - } data = boundary->data; haveMHD = Phys::mhd; diff --git a/src/fluid/boundary/boundary.hpp b/src/fluid/boundary/boundary.hpp index 897c1ce10..4065cc60d 100644 --- a/src/fluid/boundary/boundary.hpp +++ b/src/fluid/boundary/boundary.hpp @@ -34,6 +34,8 @@ template using InternalBoundaryFunc = void (*) (Fluid *, const real t); using InternalBoundaryFuncOld = void (*) (DataBlock &, const real t); // DEPRECATED +using BoundingBox = std::array,3>; + template class Boundary { public: @@ -83,12 +85,22 @@ class Boundary { const BoundarySide &, Function ); + template + void BoundaryFor(const std::string &, + BoundingBox box, + Function ); + template void BoundaryForAll(const std::string &, const int &, const BoundarySide &, Function ); + template + void BoundaryForAll(const std::string &, + BoundingBox box, + Function ); + template void BoundaryForX1s(const std::string &, const int &, @@ -115,6 +127,10 @@ class Boundary { bool haveLeftAxis{false}; ///< True if the left boundary is an axis bool haveRightAxis{false}; ///< True if the right boundary is an axis + std::array,3> GhostBoxVc; ///< A bounding box for each ghost regions + std::array,3>,3> + GhostBoxVs; ///< A bounding box each Vs component + private: friend class Axis; Fluid *fluid; // pointer to parent hydro object @@ -154,6 +170,49 @@ Boundary::Boundary(Fluid* fluid) { data->nghost[IDIR]); } + // Initialise the Bounding Boxes for cell-centered variables + for(int dir = 0 ; dir < 3 ; dir++) { + // dir=direction along which we plan to apply the boundary conditions + for(int side = 0; side < 2 ; side++) { + // Side on which we apply the boundaries + for(int dim = 0 ; dim < 3 ; dim++) { + // Dimension of the datacube + if(dim != dir) { + GhostBoxVc[dir][side][dim][0] = 0; + GhostBoxVc[dir][side][dim][1] = data->np_tot[dim]; + } else { + GhostBoxVc[dir][side][dim][0] = side*(data->end[dim]); + GhostBoxVc[dir][side][dim][1] = side*(data->end[dim])+data->nghost[dim]; + } + } + } + } + + // Initialise the Bounding Boxes for face-centered variables (NB: we need one for each component) + for(int component = 0 ; component < DIMENSIONS ; component++) { + // Initialise the boxes for face-centered variables with the same bounding box + GhostBoxVs[component] = GhostBoxVc; + for(int dir = 0 ; dir < 3 ; dir++) { + for(int side = 0 ; side < 2 ; side++) { + // Add one element in the normal direction since we're staggered + GhostBoxVs[component][dir][side][component][1] += 1; + // Do not overwrite last active BXs normal if not serial+periodic + if(dir == component) { + if(side==left) { + if(data->mygrid->nproc[dir] > 1 || data->lbound[dir] != BoundaryType::periodic) { + GhostBoxVs[component][dir][side][component][1] -= 1; + } + } + if(side==right) { + if(data->mygrid->nproc[dir] > 1 || data->rbound[dir] != BoundaryType::periodic) { + GhostBoxVs[component][dir][side][component][0] += 1; + } + } + } + } + } + } + // Init MPI stack when needed #ifdef WITH_MPI //////////////////////////////////////////////////////////////////////////// @@ -185,7 +244,8 @@ Boundary::Boundary(Fluid* fluid) { } } - mpi.Init(data->mygrid, mapVars, data->nghost.data(), data->np_int.data(), Phys::mhd); + mpi.Init(data->mygrid, mapVars, data->nghost, data->np_int, + data->lbound, data->rbound, Phys::mhd); #endif // MPI idfx::popRegion(); @@ -970,54 +1030,47 @@ template template inline void Boundary::BoundaryForAll( const std::string & name, - const int &dir, - const BoundarySide &side, + BoundingBox box, Function function) { - const int nxi = data->np_int[IDIR]; - const int nxj = data->np_int[JDIR]; - const int nxk = data->np_int[KDIR]; - - const int ighost = data->nghost[IDIR]; - const int jghost = data->nghost[JDIR]; - const int kghost = data->nghost[KDIR]; - - // Boundaries of the loop - const int ibeg = (dir == IDIR) ? side*(ighost+nxi) : 0; - const int iend = (dir == IDIR) ? ighost + side*(ighost+nxi) : data->np_tot[IDIR]; - const int jbeg = (dir == JDIR) ? side*(jghost+nxj) : 0; - const int jend = (dir == JDIR) ? jghost + side*(jghost+nxj) : data->np_tot[JDIR]; - const int kbeg = (dir == KDIR) ? side*(kghost+nxk) : 0; - const int kend = (dir == KDIR) ? kghost + side*(kghost+nxk) : data->np_tot[KDIR]; - - idefix_for(name, 0, this->nVar, kbeg, kend, jbeg, jend, ibeg, iend, function); + idefix_for(name, 0, this->nVar, + box[KDIR][0], box[KDIR][1], + box[JDIR][0], box[JDIR][1], + box[IDIR][0], box[IDIR][1], + function); } template template -inline void Boundary::BoundaryFor( +inline void Boundary::BoundaryForAll( const std::string & name, const int &dir, const BoundarySide &side, Function function) { - const int nxi = data->np_int[IDIR]; - const int nxj = data->np_int[JDIR]; - const int nxk = data->np_int[KDIR]; - - const int ighost = data->nghost[IDIR]; - const int jghost = data->nghost[JDIR]; - const int kghost = data->nghost[KDIR]; - - // Boundaries of the loop - const int ibeg = (dir == IDIR) ? side*(ighost+nxi) : 0; - const int iend = (dir == IDIR) ? ighost + side*(ighost+nxi) : data->np_tot[IDIR]; - const int jbeg = (dir == JDIR) ? side*(jghost+nxj) : 0; - const int jend = (dir == JDIR) ? jghost + side*(jghost+nxj) : data->np_tot[JDIR]; - const int kbeg = (dir == KDIR) ? side*(kghost+nxk) : 0; - const int kend = (dir == KDIR) ? kghost + side*(kghost+nxk) : data->np_tot[KDIR]; + BoundaryForAll(name,GhostBoxVc[dir][side],function); +} +template +template +inline void Boundary::BoundaryFor( + const std::string & name, + BoundingBox box, + Function function) { + idefix_for(name, + box[KDIR][0], box[KDIR][1], + box[JDIR][0], box[JDIR][1], + box[IDIR][0], box[IDIR][1], + function); +} - idefix_for(name, kbeg, kend, jbeg, jend, ibeg, iend, function); +template +template +inline void Boundary::BoundaryFor( + const std::string & name, + const int &dir, + const BoundarySide &side, + Function function) { + BoundaryFor(name,GhostBoxVc[dir][side],function); } template @@ -1027,23 +1080,7 @@ inline void Boundary::BoundaryForX1s( const int &dir, const BoundarySide &side, Function function) { - const int nxi = data->np_int[IDIR]+1; - const int nxj = data->np_int[JDIR]; - const int nxk = data->np_int[KDIR]; - - const int ighost = data->nghost[IDIR]; - const int jghost = data->nghost[JDIR]; - const int kghost = data->nghost[KDIR]; - - // Boundaries of the loop - const int ibeg = (dir == IDIR) ? side*(ighost+nxi) : 0; - const int iend = (dir == IDIR) ? ighost + side*(ighost+nxi) : data->np_tot[IDIR]+1; - const int jbeg = (dir == JDIR) ? side*(jghost+nxj) : 0; - const int jend = (dir == JDIR) ? jghost + side*(jghost+nxj) : data->np_tot[JDIR]; - const int kbeg = (dir == KDIR) ? side*(kghost+nxk) : 0; - const int kend = (dir == KDIR) ? kghost + side*(kghost+nxk) : data->np_tot[KDIR]; - - idefix_for(name, kbeg, kend, jbeg, jend, ibeg, iend, function); + BoundaryFor(name,GhostBoxVs[BX1s][dir][side],function); } template @@ -1053,23 +1090,7 @@ inline void Boundary::BoundaryForX2s( const int &dir, const BoundarySide &side, Function function) { - const int nxi = data->np_int[IDIR]; - const int nxj = data->np_int[JDIR]+1; - const int nxk = data->np_int[KDIR]; - - const int ighost = data->nghost[IDIR]; - const int jghost = data->nghost[JDIR]; - const int kghost = data->nghost[KDIR]; - - // Boundaries of the loop - const int ibeg = (dir == IDIR) ? side*(ighost+nxi) : 0; - const int iend = (dir == IDIR) ? ighost + side*(ighost+nxi) : data->np_tot[IDIR]; - const int jbeg = (dir == JDIR) ? side*(jghost+nxj) : 0; - const int jend = (dir == JDIR) ? jghost + side*(jghost+nxj) : data->np_tot[JDIR]+1; - const int kbeg = (dir == KDIR) ? side*(kghost+nxk) : 0; - const int kend = (dir == KDIR) ? kghost + side*(kghost+nxk) : data->np_tot[KDIR]; - - idefix_for(name, kbeg, kend, jbeg, jend, ibeg, iend, function); + BoundaryFor(name,GhostBoxVs[BX2s][dir][side],function); } template @@ -1079,23 +1100,7 @@ inline void Boundary::BoundaryForX3s( const int &dir, const BoundarySide &side, Function function) { - const int nxi = data->np_int[IDIR]; - const int nxj = data->np_int[JDIR]; - const int nxk = data->np_int[KDIR]+1; - - const int ighost = data->nghost[IDIR]; - const int jghost = data->nghost[JDIR]; - const int kghost = data->nghost[KDIR]; - - // Boundaries of the loop - const int ibeg = (dir == IDIR) ? side*(ighost+nxi) : 0; - const int iend = (dir == IDIR) ? ighost + side*(ighost+nxi) : data->np_tot[IDIR]; - const int jbeg = (dir == JDIR) ? side*(jghost+nxj) : 0; - const int jend = (dir == JDIR) ? jghost + side*(jghost+nxj) : data->np_tot[JDIR]; - const int kbeg = (dir == KDIR) ? side*(kghost+nxk) : 0; - const int kend = (dir == KDIR) ? kghost + side*(kghost+nxk) : data->np_tot[KDIR]+1; - - idefix_for(name, kbeg, kend, jbeg, jend, ibeg, iend, function); + BoundaryFor(name,GhostBoxVs[BX3s][dir][side],function); } diff --git a/src/fluid/constrainedTransport/CMakeLists.txt b/src/fluid/constrainedTransport/CMakeLists.txt index 06efc782b..7d99b805c 100644 --- a/src/fluid/constrainedTransport/CMakeLists.txt +++ b/src/fluid/constrainedTransport/CMakeLists.txt @@ -5,6 +5,7 @@ target_sources(idefix PUBLIC ${CMAKE_CURRENT_LIST_DIR}/constrainedTransport.hpp PUBLIC ${CMAKE_CURRENT_LIST_DIR}/EMFexchange.hpp PUBLIC ${CMAKE_CURRENT_LIST_DIR}/enforceEMFBoundary.hpp + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/enforceVectorPotentialBoundary.hpp PUBLIC ${CMAKE_CURRENT_LIST_DIR}/evolveMagField.hpp PUBLIC ${CMAKE_CURRENT_LIST_DIR}/evolveVectorPotential.hpp ) diff --git a/src/fluid/constrainedTransport/EMFexchange.hpp b/src/fluid/constrainedTransport/EMFexchange.hpp index 187186271..f6e90f75a 100644 --- a/src/fluid/constrainedTransport/EMFexchange.hpp +++ b/src/fluid/constrainedTransport/EMFexchange.hpp @@ -14,16 +14,18 @@ #ifdef WITH_MPI template -void ConstrainedTransport::ExchangeAll() { - if(data->mygrid->nproc[IDIR]>1) this->ExchangeX1(); - if(data->mygrid->nproc[JDIR]>1) this->ExchangeX2(); - if(data->mygrid->nproc[KDIR]>1) this->ExchangeX3(); +void ConstrainedTransport::ExchangeAll(IdefixArray3D ex, + IdefixArray3D ey, + IdefixArray3D ez) { + if(data->mygrid->nproc[IDIR]>1) this->ExchangeX1(ey,ez); + if(data->mygrid->nproc[JDIR]>1) this->ExchangeX2(ex,ez); + if(data->mygrid->nproc[KDIR]>1) this->ExchangeX3(ex,ey); } // Exchange EMFs in X1 template -void ConstrainedTransport::ExchangeX1() { +void ConstrainedTransport::ExchangeX1(IdefixArray3D ey, IdefixArray3D ez) { idfx::pushRegion("Emf::ExchangeX1"); @@ -34,8 +36,6 @@ void ConstrainedTransport::ExchangeX1() { IdefixArray1D BufferLeft=BufferSendX1[faceLeft]; IdefixArray1D BufferRight=BufferSendX1[faceRight]; - IdefixArray3D ey=this->ey; - IdefixArray3D ez=this->ez; // If MPI Persistent, start receiving even before the buffers are filled @@ -61,7 +61,6 @@ void ConstrainedTransport::ExchangeX1() { idefix_for("LoadBufferX1Emfz",kbeg,kend,jbeg,jend+1, KOKKOS_LAMBDA (int k, int j) { - BufferLeft( (j-jbeg) + (k-kbeg)*(ny+1) ) = ez(k,j,ileft); BufferRight( (j-jbeg) + (k-kbeg)*(ny+1) ) = ez(k,j,iright); } ); @@ -70,7 +69,6 @@ void ConstrainedTransport::ExchangeX1() { idefix_for("LoadBufferX1Emfy",kbeg,kend+1,jbeg,jend, KOKKOS_LAMBDA (int k, int j) { - BufferLeft( (j-jbeg) + (k-kbeg)*ny + Vsindex ) = ey(k,j,ileft); BufferRight( (j-jbeg) + (k-kbeg)*ny + Vsindex ) = ey(k,j,iright); } ); @@ -91,29 +89,21 @@ void ConstrainedTransport::ExchangeX1() { BufferLeft=BufferRecvX1[faceLeft]; BufferRight=BufferRecvX1[faceRight]; - // We average the edge emfs zones + // Erase the emf with the one coming from the left process + idefix_for("StoreBufferX1Emfz",kbeg,kend,jbeg,jend+1, KOKKOS_LAMBDA (int k, int j) { if(lbound == internal || lbound == periodic) { - ez(k,j,ileft) = HALF_F*( - BufferLeft( (j-jbeg) + (k-kbeg)*(ny+1) ) + ez(k,j,ileft) ); - } - if(rbound == internal || rbound == periodic) { - ez(k,j,iright) = HALF_F*( - BufferRight( (j-jbeg) + (k-kbeg)*(ny+1) ) + ez(k,j,iright) ); + ez(k,j,ileft) = BufferLeft( (j-jbeg) + (k-kbeg)*(ny+1)); } }); + #if DIMENSIONS == 3 Vsindex = (ny+1)*nz; idefix_for("StoreBufferX1Emfy",kbeg,kend+1,jbeg,jend, KOKKOS_LAMBDA (int k, int j) { if(lbound == internal || lbound == periodic) { - ey(k,j,ileft) = HALF_F*( - BufferLeft( (j-jbeg) + (k-kbeg)*ny +Vsindex) + ey(k,j,ileft) ); - } - if(rbound == internal || rbound == periodic) { - ey(k,j,iright) = HALF_F*( - BufferRight( (j-jbeg) + (k-kbeg)*ny +Vsindex) + ey(k,j,iright) ); + ey(k,j,ileft) = BufferLeft( (j-jbeg) + (k-kbeg)*ny +Vsindex); } }); #endif @@ -124,7 +114,7 @@ void ConstrainedTransport::ExchangeX1() { // Exchange EMFs in X2 template -void ConstrainedTransport::ExchangeX2() { +void ConstrainedTransport::ExchangeX2(IdefixArray3D ex, IdefixArray3D ez) { idfx::pushRegion("Emf::ExchangeX2"); // Load the buffers with data @@ -133,8 +123,6 @@ void ConstrainedTransport::ExchangeX2() { [[maybe_unused]] int nz; IdefixArray1D BufferLeft=BufferSendX2[faceLeft]; IdefixArray1D BufferRight=BufferSendX2[faceRight]; - IdefixArray3D ex=this->ex; - IdefixArray3D ez=this->ez; // If MPI Persistent, start receiving even before the buffers are filled double tStart = MPI_Wtime(); @@ -158,7 +146,6 @@ void ConstrainedTransport::ExchangeX2() { idefix_for("LoadBufferX2Emfz",kbeg,kend,ibeg,iend+1, KOKKOS_LAMBDA (int k, int i) { - BufferLeft( (i-ibeg) + (k-kbeg)*(nx+1) ) = ez(k,jleft,i); BufferRight( (i-ibeg) + (k-kbeg)*(nx+1) ) = ez(k,jright,i); } ); @@ -167,7 +154,6 @@ void ConstrainedTransport::ExchangeX2() { idefix_for("LoadBufferX1Emfx",kbeg,kend+1,ibeg,iend, KOKKOS_LAMBDA (int k, int i) { - BufferLeft( (i-ibeg) + (k-kbeg)*nx + Vsindex ) = ex(k,jleft,i); BufferRight( (i-ibeg) + (k-kbeg)*nx + Vsindex ) = ex(k,jright,i); } ); @@ -191,12 +177,7 @@ void ConstrainedTransport::ExchangeX2() { idefix_for("StoreBufferX2Emfz",kbeg,kend,ibeg,iend+1, KOKKOS_LAMBDA (int k, int i) { if(lbound == internal || lbound == periodic) { - ez(k,jleft,i) = HALF_F*( - BufferLeft( (i-ibeg) + (k-kbeg)*(nx+1) ) + ez(k,jleft,i) ); - } - if(rbound == internal || rbound == periodic) { - ez(k,jright,i) = HALF_F*( - BufferRight( (i-ibeg) + (k-kbeg)*(nx+1) ) + ez(k,jright,i) ); + ez(k,jleft,i) = BufferLeft( (i-ibeg) + (k-kbeg)*(nx+1) ); } }); #if DIMENSIONS == 3 @@ -204,12 +185,7 @@ void ConstrainedTransport::ExchangeX2() { idefix_for("StoreBufferX1Emfy",kbeg,kend+1,ibeg,iend, KOKKOS_LAMBDA (int k, int i) { if(lbound == internal || lbound == periodic) { - ex(k,jleft,i) = HALF_F*( - BufferLeft( (i-ibeg) + (k-kbeg)*nx +Vsindex) + ex(k,jleft,i) ); - } - if(rbound == internal || rbound == periodic) { - ex(k,jright,i) = HALF_F*( - BufferRight( (i-ibeg) + (k-kbeg)*nx +Vsindex) + ex(k,jright,i) ); + ex(k,jleft,i) = BufferLeft( (i-ibeg) + (k-kbeg)*nx +Vsindex); } }); #endif @@ -220,7 +196,7 @@ void ConstrainedTransport::ExchangeX2() { // Exchange EMFs in X3 template -void ConstrainedTransport::ExchangeX3() { +void ConstrainedTransport::ExchangeX3(IdefixArray3D ex, IdefixArray3D ey) { idfx::pushRegion("Emf::ExchangeX3"); @@ -229,8 +205,6 @@ void ConstrainedTransport::ExchangeX3() { int nx,ny; IdefixArray1D BufferLeft=BufferSendX3[faceLeft]; IdefixArray1D BufferRight=BufferSendX3[faceRight]; - IdefixArray3D ex=this->ex; - IdefixArray3D ey=this->ey; int Vsindex = 0; @@ -259,7 +233,6 @@ void ConstrainedTransport::ExchangeX3() { idefix_for("LoadBufferX3Emfx",jbeg,jend+1,ibeg,iend, KOKKOS_LAMBDA (int j, int i) { - BufferLeft( (i-ibeg) + (j-jbeg)*nx ) = ex(kleft,j,i); BufferRight( (i-ibeg) + (j-jbeg)*nx ) = ex(kright,j,i); } ); @@ -267,7 +240,6 @@ void ConstrainedTransport::ExchangeX3() { idefix_for("LoadBufferX3Emfy",jbeg,jend,ibeg,iend+1, KOKKOS_LAMBDA (int j, int i) { - BufferLeft( (i-ibeg) + (j-jbeg)*(nx+1) + Vsindex ) = ey(kleft,j,i); BufferRight( (i-ibeg) + (j-jbeg)*(nx+1) + Vsindex ) = ey(kright,j,i); } ); @@ -290,12 +262,7 @@ void ConstrainedTransport::ExchangeX3() { idefix_for("StoreBufferX3Emfx",jbeg,jend+1,ibeg,iend, KOKKOS_LAMBDA (int j, int i) { if(lbound == internal || lbound == periodic) { - ex(kleft,j,i) = HALF_F*( - BufferLeft( (i-ibeg) + (j-jbeg)*nx ) + ex(kleft,j,i) ); - } - if(rbound == internal || rbound == periodic) { - ex(kright,j,i) = HALF_F*( - BufferRight( (i-ibeg) + (j-jbeg)*nx ) + ex(kright,j,i) ); + ex(kleft,j,i) = BufferLeft( (i-ibeg) + (j-jbeg)*nx ); } }); @@ -303,16 +270,9 @@ void ConstrainedTransport::ExchangeX3() { idefix_for("StoreBufferX3Emfy",jbeg,jend,ibeg,iend+1, KOKKOS_LAMBDA (int j, int i) { if(lbound == internal || lbound == periodic) { - ey(kleft,j,i) = HALF_F*( - BufferLeft( (i-ibeg) + (j-jbeg)*(nx+1) + Vsindex ) + ey(kleft,j,i) ); - } - if(rbound == internal || rbound == periodic) { - ey(kright,j,i) = HALF_F*( - BufferRight( (i-ibeg) + (j-jbeg)*(nx+1) + Vsindex ) + ey(kright,j,i) ); + ey(kleft,j,i) = BufferLeft( (i-ibeg) + (j-jbeg)*(nx+1) + Vsindex ); } }); - - idfx::popRegion(); } diff --git a/src/fluid/constrainedTransport/constrainedTransport.hpp b/src/fluid/constrainedTransport/constrainedTransport.hpp index c32e4ec96..24c317c09 100644 --- a/src/fluid/constrainedTransport/constrainedTransport.hpp +++ b/src/fluid/constrainedTransport/constrainedTransport.hpp @@ -101,13 +101,17 @@ class ConstrainedTransport { // Routines for evolving the magnetic potential (only available when EVOLVE_VECTOR_POTENTIAL) void EvolveVectorPotential(real, IdefixArray4D &); void ComputeMagFieldFromA(IdefixArray4D &Vein, IdefixArray4D &Vsout); + void EnforceVectorPotentialBoundary(IdefixArray4D &Vein); // Enforce BCs on A + void EnforceEMFBoundaryPeriodic(IdefixArray3D ex, + IdefixArray3D ey, + IdefixArray3D ez); #ifdef WITH_MPI // Exchange surface EMFs to remove interprocess round off errors - void ExchangeAll(); - void ExchangeX1(); - void ExchangeX2(); - void ExchangeX3(); + void ExchangeAll(IdefixArray3D ex, IdefixArray3D ey, IdefixArray3D ez); + void ExchangeX1(IdefixArray3D ey, IdefixArray3D ez); + void ExchangeX2(IdefixArray3D ex, IdefixArray3D ez); + void ExchangeX3(IdefixArray3D ex, IdefixArray3D ey); #endif private: @@ -449,6 +453,7 @@ void ConstrainedTransport::ShowConfig() { #include "calcRiemannEmf.hpp" #include "EMFexchange.hpp" #include "enforceEMFBoundary.hpp" +#include "enforceVectorPotentialBoundary.hpp" #include "evolveMagField.hpp" #include "evolveVectorPotential.hpp" diff --git a/src/fluid/constrainedTransport/enforceEMFBoundary.hpp b/src/fluid/constrainedTransport/enforceEMFBoundary.hpp index 192ec12e4..9d3d6913f 100644 --- a/src/fluid/constrainedTransport/enforceEMFBoundary.hpp +++ b/src/fluid/constrainedTransport/enforceEMFBoundary.hpp @@ -19,8 +19,9 @@ // This is because in some specific cases involving curvilinear coordinates, roundoff errors // can accumulate, leading to a small drift of face-values that should be strictly equal. // This behaviour is enabled using the flag below. - -//#define ENFORCE_EMF_CONSISTENCY +#ifdef EVOLVE_VECTOR_POTENTIAL + #define ENFORCE_EMF_CONSISTENCY +#endif template void ConstrainedTransport::EnforceEMFBoundary() { @@ -30,81 +31,73 @@ void ConstrainedTransport::EnforceEMFBoundary() { this->data->hydro->emfBoundaryFunc(*data, data->t); if(this->data->hydro->haveAxis) { - this->data->hydro->boundary->axis->RegularizeEMFs(); + this->data->hydro->boundary->axis->RegularizeEMFs(this->ex, this->ey, this->ez); } #ifdef ENFORCE_EMF_CONSISTENCY #ifdef WITH_MPI // This average the EMFs at the domain surface with immediate neighbours // to ensure the EMFs exactly match - this->ExchangeAll(); + this->ExchangeAll(this->ex, this->ey, this->ez); #endif #endif - IdefixArray3D ex = this->ex; - IdefixArray3D ey = this->ey; - IdefixArray3D ez = this->ez; - // Enforce specific EMF regularisation for(int dir=0 ; dir < DIMENSIONS ; dir++ ) { if(data->lbound[dir] == shearingbox || data->rbound[dir] == shearingbox) { SymmetrizeEMFShearingBox(); } - #ifdef ENFORCE_EMF_CONSISTENCY - if(data->lbound[dir] == periodic && data->rbound[dir] == periodic) { - // If domain decomposed, periodicity is already enforced by ExchangeAll - if(data->mygrid->nproc[dir] == 1) { - int ioffset = (dir == IDIR) ? data->np_int[IDIR] : 0; - int joffset = (dir == JDIR) ? data->np_int[JDIR] : 0; - int koffset = (dir == KDIR) ? data->np_int[KDIR] : 0; - - int ibeg = (dir == IDIR) ? data->beg[IDIR] : 0; - int iend = (dir == IDIR) ? data->beg[IDIR]+1 : data->np_tot[IDIR]; - int jbeg = (dir == JDIR) ? data->beg[JDIR] : 0; - int jend = (dir == JDIR) ? data->beg[JDIR]+1 : data->np_tot[JDIR]; - int kbeg = (dir == KDIR) ? data->beg[KDIR] : 0; - int kend = (dir == KDIR) ? data->beg[KDIR]+1 : data->np_tot[KDIR]; - idefix_for("BoundaryEMFPeriodic",kbeg,kend,jbeg,jend,ibeg,iend, - KOKKOS_LAMBDA (int k, int j, int i) { - real em; - - if(dir==IDIR) { - em = HALF_F*(ez(k,j,i)+ez(k,j,i+ioffset)); - ez(k,j,i) = em; - ez(k,j,i+ioffset) = em; - - #if DIMENSIONS == 3 - em = HALF_F*(ey(k,j,i)+ey(k,j,i+ioffset)); - ey(k,j,i) = em; - ey(k,j,i+ioffset) = em; - #endif - } - - if(dir==JDIR) { - em = HALF_F*(ez(k,j,i)+ez(k,j+joffset,i)); - ez(k,j,i) = em; - ez(k,j+joffset,i) = em; - - #if DIMENSIONS == 3 - em = HALF_F*(ex(k,j,i)+ex(k,j+joffset,i)); - ex(k,j,i) = em; - ex(k,j+joffset,i) = em; - #endif - } - - if(dir==KDIR) { - em = HALF_F*(ex(k,j,i)+ex(k+koffset,j,i)); - ex(k,j,i) = em; - ex(k+koffset,j,i) = em; - - em = HALF_F*(ey(k,j,i)+ey(k+koffset,j,i)); - ey(k,j,i) = em; - ey(k+koffset,j,i) = em; - } - }); - } + } + #ifdef ENFORCE_EMF_CONSISTENCY + EnforceEMFBoundaryPeriodic(this->ex, this->ey, this->ez); + #endif //ENFORCE_EMF_CONSISTENCY +#endif // MHD==YES + idfx::popRegion(); +} + +template +void ConstrainedTransport::EnforceEMFBoundaryPeriodic(IdefixArray3D ex, + IdefixArray3D ey, + IdefixArray3D ez) { + idfx::pushRegion("Emf::EnforceEMFBoundaryPeriodic"); + #if MHD == YES + for(int dir=0 ; dir < DIMENSIONS ; dir++ ) { + if(data->lbound[dir] == periodic && data->rbound[dir] == periodic) { + // If domain decomposed, periodicity is already enforced by ExchangeAll + if(data->mygrid->nproc[dir] == 1) { + int ioffset = (dir == IDIR) ? data->np_int[IDIR] : 0; + int joffset = (dir == JDIR) ? data->np_int[JDIR] : 0; + int koffset = (dir == KDIR) ? data->np_int[KDIR] : 0; + + int ibeg = (dir == IDIR) ? data->beg[IDIR] : 0; + int iend = (dir == IDIR) ? data->beg[IDIR]+1 : data->np_tot[IDIR]; + int jbeg = (dir == JDIR) ? data->beg[JDIR] : 0; + int jend = (dir == JDIR) ? data->beg[JDIR]+1 : data->np_tot[JDIR]; + int kbeg = (dir == KDIR) ? data->beg[KDIR] : 0; + int kend = (dir == KDIR) ? data->beg[KDIR]+1 : data->np_tot[KDIR]; + idefix_for("BoundaryEMFPeriodic",kbeg,kend,jbeg,jend,ibeg,iend, + KOKKOS_LAMBDA (int k, int j, int i) { + if(dir==IDIR) { + ez(k,j,i+ioffset) = ez(k,j,i); + #if DIMENSIONS == 3 + ey(k,j,i+ioffset) = ey(k,j,i); + #endif + } + + if(dir==JDIR) { + ez(k,j+joffset,i) = ez(k,j,i); + #if DIMENSIONS == 3 + ex(k,j+joffset,i) = ex(k,j,i); + #endif + } + + if(dir==KDIR) { + ex(k+koffset,j,i) = ex(k,j,i); + ey(k+koffset,j,i) = ey(k,j,i); + } + }); } - #endif //ENFORCE_EMF_CONSISTENCY + } } #endif // MHD==YES idfx::popRegion(); @@ -148,11 +141,13 @@ void ConstrainedTransport::SymmetrizeEMFShearingBox() { if(data->lbound[IDIR]==shearingbox) { // We send to our left (which, by periodicity, is the right end of the domain) // our value of sbEyL and get + Kokkos::fence(); MPI_Sendrecv(sbEyL.data(), size, realMPI, procLeft, 2001, sbEyR.data(), size, realMPI, procLeft, 2002, data->mygrid->CartComm, &status ); } if(data->rbound[IDIR]==shearingbox) { + Kokkos::fence(); // We send to our right (which, by periodicity, is the left end (=beginning) // of the domain) our value of sbEyR and get sbEyL MPI_Sendrecv(sbEyR.data(), size, realMPI, procRight, 2002, diff --git a/src/fluid/constrainedTransport/enforceVectorPotentialBoundary.hpp b/src/fluid/constrainedTransport/enforceVectorPotentialBoundary.hpp new file mode 100644 index 000000000..fea9b47b9 --- /dev/null +++ b/src/fluid/constrainedTransport/enforceVectorPotentialBoundary.hpp @@ -0,0 +1,36 @@ +// *********************************************************************************** +// Idefix MHD astrophysical code +// Copyright(C) Geoffroy R. J. Lesur +// and other code contributors +// Licensed under CeCILL 2.1 License, see COPYING for more information +// *********************************************************************************** + +#ifndef FLUID_CONSTRAINEDTRANSPORT_ENFORCEVECTORPOTENTIALBOUNDARY_HPP_ +#define FLUID_CONSTRAINEDTRANSPORT_ENFORCEVECTORPOTENTIALBOUNDARY_HPP_ +#include "constrainedTransport.hpp" + +template +void ConstrainedTransport::EnforceVectorPotentialBoundary(IdefixArray4D &Vein) { + idfx::pushRegion("Emf::EnforceVectorPotentialBoundary"); + + auto Ax1 = Kokkos::subview(Vein, IDIR, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto Ax2 = Kokkos::subview(Vein, JDIR, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto Ax3 = Kokkos::subview(Vein, KDIR, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + + if(this->hydro->haveAxis) { + this->hydro->boundary->axis->RegularizeEMFs(Ax1, Ax2, Ax3); + } + + #ifdef ENFORCE_EMF_CONSISTENCY + #ifdef WITH_MPI + // This average the vector potential at the domain surface with immediate neighbours + // to ensure the vector potentials exactly match + + this->ExchangeAll(Ax1, Ax2, Ax3); + #endif + EnforceEMFBoundaryPeriodic(Ax1, Ax2, Ax3); + #endif + + idfx::popRegion(); +} +#endif // FLUID_CONSTRAINEDTRANSPORT_ENFORCEVECTORPOTENTIALBOUNDARY_HPP_ diff --git a/src/gravity/laplacian.cpp b/src/gravity/laplacian.cpp index 2300db82d..7ac329ea3 100644 --- a/src/gravity/laplacian.cpp +++ b/src/gravity/laplacian.cpp @@ -103,7 +103,7 @@ Laplacian::Laplacian(DataBlock *datain, std::array left std::vector mapVars; mapVars.push_back(ntarget); - this->mpi.Init(data->mygrid, mapVars, this->nghost.data(), this->np_int.data()); + this->mpi.Init(data->mygrid, mapVars, nghost, np_int, datain->lbound, datain->rbound, false); #endif idfx::popRegion(); diff --git a/src/macros.hpp b/src/macros.hpp index 2c7bda127..0df5f9430 100644 --- a/src/macros.hpp +++ b/src/macros.hpp @@ -25,7 +25,6 @@ #if COMPONENTS == 3 #define EXPAND(a,b,c) a b c #define SELECT(a,b,c) c - #endif #if DIMENSIONS == 1 diff --git a/src/main.cpp b/src/main.cpp index 164c134ee..7b2c00c85 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -192,7 +192,9 @@ int main( int argc, char* argv[] ) { try { Tint.Cycle(data); } catch(std::exception &e) { - idfx::cout << "Main: WARNING! Caught an exception in TimeIntegrator." << std::endl; + idfx::cout << std::endl + << "Main: WARNING! Caught an exception in TimeIntegrator." + << std::endl; #ifdef WITH_MPI if(!Mpi::CheckSync(5)) { std::stringstream message; diff --git a/src/mpi.cpp b/src/mpi.cpp deleted file mode 100644 index 4de7739cb..000000000 --- a/src/mpi.cpp +++ /dev/null @@ -1,992 +0,0 @@ -// *********************************************************************************** -// Idefix MHD astrophysical code -// Copyright(C) Geoffroy R. J. Lesur -// and other code contributors -// Licensed under CeCILL 2.1 License, see COPYING for more information -// *********************************************************************************** - - -#include "mpi.hpp" -#include -#include -#include // NOLINT [build/c++11] -#include // NOLINT [build/c++11] -#include -#include -#include "idefix.hpp" -#include "dataBlock.hpp" - - -#if defined(OPEN_MPI) && OPEN_MPI -#include "mpi-ext.h" // Needed for CUDA-aware check */ -#endif - - -//#define MPI_NON_BLOCKING -#define MPI_PERSISTENT - -// init the number of instances -int Mpi::nInstances = 0; - -// MPI Routines exchange -void Mpi::ExchangeAll() { - IDEFIX_ERROR("Not Implemented"); -} - -/// -/// Initialise an instance of the MPI class. -/// @param grid: pointer to the grid object (needed to get the MPI neighbours) -/// @param inputMap: 1st indices of inputVc which are to be exchanged (i.e, the list of variables) -/// @param nghost: size of the ghost region in each direction -/// @param nint: size of the internal region in each direction -/// @param inputHaveVs: whether the instance should also treat face-centered variable -/// (optional, default false) -/// - -void Mpi::Init(Grid *grid, std::vector inputMap, - int nghost[3], int nint[3], - bool inputHaveVs) { - idfx::pushRegion("Mpi::Init"); - this->mygrid = grid; - - // increase the number of instances - nInstances++; - thisInstance=nInstances; - - // Transfer the vector of indices as an IdefixArray on the target - - // Allocate mapVars on target and copy it from the input argument list - this->mapVars = idfx::ConvertVectorToIdefixArray(inputMap); - this->mapNVars = inputMap.size(); - this->haveVs = inputHaveVs; - - // Compute indices of arrays we will be working with - for(int dir = 0 ; dir < 3 ; dir++) { - this->nghost[dir] = nghost[dir]; - this->nint[dir] = nint[dir]; - this->ntot[dir] = nint[dir]+2*nghost[dir]; - this->beg[dir] = nghost[dir]; - this->end[dir] = nghost[dir]+nint[dir]; - } - - ///////////////////////////////////////////////////////////////////////////// - // Init exchange datasets - bufferSizeX1 = 0; - bufferSizeX2 = 0; - bufferSizeX3 = 0; - - // Number of cells in X1 boundary condition: - bufferSizeX1 = nghost[IDIR] * nint[JDIR] * nint[KDIR] * mapNVars; - - if(haveVs) { - bufferSizeX1 += nghost[IDIR] * nint[JDIR] * nint[KDIR]; - #if DIMENSIONS>=2 - bufferSizeX1 += nghost[IDIR] * (nint[JDIR]+1) * nint[KDIR]; - #endif - - #if DIMENSIONS==3 - bufferSizeX1 += nghost[IDIR] * nint[JDIR] * (nint[KDIR]+1); - #endif // DIMENSIONS - } - - - BufferRecvX1[faceLeft ] = Buffer(bufferSizeX1); - BufferRecvX1[faceRight] = Buffer(bufferSizeX1); - BufferSendX1[faceLeft ] = Buffer(bufferSizeX1); - BufferSendX1[faceRight] = Buffer(bufferSizeX1); - - // Number of cells in X2 boundary condition (only required when problem >2D): -#if DIMENSIONS >= 2 - bufferSizeX2 = ntot[IDIR] * nghost[JDIR] * nint[KDIR] * mapNVars; - if(haveVs) { - // IDIR - bufferSizeX2 += (ntot[IDIR]+1) * nghost[JDIR] * nint[KDIR]; - #if DIMENSIONS>=2 - bufferSizeX2 += ntot[IDIR] * nghost[JDIR] * nint[KDIR]; - #endif - #if DIMENSIONS==3 - bufferSizeX2 += ntot[IDIR] * nghost[JDIR] * (nint[KDIR]+1); - #endif // DIMENSIONS - } - - BufferRecvX2[faceLeft ] = Buffer(bufferSizeX2); - BufferRecvX2[faceRight] = Buffer(bufferSizeX2); - BufferSendX2[faceLeft ] = Buffer(bufferSizeX2); - BufferSendX2[faceRight] = Buffer(bufferSizeX2); - -#endif -// Number of cells in X3 boundary condition (only required when problem is 3D): -#if DIMENSIONS ==3 - bufferSizeX3 = ntot[IDIR] * ntot[JDIR] * nghost[KDIR] * mapNVars; - - if(haveVs) { - // IDIR - bufferSizeX3 += (ntot[IDIR]+1) * ntot[JDIR] * nghost[KDIR]; - // JDIR - bufferSizeX3 += ntot[IDIR] * (ntot[JDIR]+1) * nghost[KDIR]; - // KDIR - bufferSizeX3 += ntot[IDIR] * ntot[JDIR] * nghost[KDIR]; - } - - BufferRecvX3[faceLeft ] = Buffer(bufferSizeX3); - BufferRecvX3[faceRight] = Buffer(bufferSizeX3); - BufferSendX3[faceLeft ] = Buffer(bufferSizeX3); - BufferSendX3[faceRight] = Buffer(bufferSizeX3); -#endif // DIMENSIONS - -#ifdef MPI_PERSISTENT - // Init persistent MPI communications - int procSend, procRecv; - - // X1-dir exchanges - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,0,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Send_init(BufferSendX1[faceRight].data(), bufferSizeX1, realMPI, procSend, - thisInstance*1000, mygrid->CartComm, &sendRequestX1[faceRight])); - - MPI_SAFE_CALL(MPI_Recv_init(BufferRecvX1[faceLeft].data(), bufferSizeX1, realMPI, procRecv, - thisInstance*1000, mygrid->CartComm, &recvRequestX1[faceLeft])); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,0,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Send_init(BufferSendX1[faceLeft].data(), bufferSizeX1, realMPI, procSend, - thisInstance*1000+1,mygrid->CartComm, &sendRequestX1[faceLeft])); - - MPI_SAFE_CALL(MPI_Recv_init(BufferRecvX1[faceRight].data(), bufferSizeX1, realMPI, procRecv, - thisInstance*1000+1,mygrid->CartComm, &recvRequestX1[faceRight])); - - #if DIMENSIONS >= 2 - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,1,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Send_init(BufferSendX2[faceRight].data(), bufferSizeX2, realMPI, procSend, - thisInstance*1000+10, mygrid->CartComm, &sendRequestX2[faceRight])); - - MPI_SAFE_CALL(MPI_Recv_init(BufferRecvX2[faceLeft].data(), bufferSizeX2, realMPI, procRecv, - thisInstance*1000+10, mygrid->CartComm, &recvRequestX2[faceLeft])); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,1,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Send_init(BufferSendX2[faceLeft].data(), bufferSizeX2, realMPI, procSend, - thisInstance*1000+11, mygrid->CartComm, &sendRequestX2[faceLeft])); - - MPI_SAFE_CALL(MPI_Recv_init(BufferRecvX2[faceRight].data(), bufferSizeX2, realMPI, procRecv, - thisInstance*1000+11, mygrid->CartComm, &recvRequestX2[faceRight])); - #endif - - #if DIMENSIONS == 3 - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,2,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Send_init(BufferSendX3[faceRight].data(), bufferSizeX3, realMPI, procSend, - thisInstance*1000+20, mygrid->CartComm, &sendRequestX3[faceRight])); - - MPI_SAFE_CALL(MPI_Recv_init(BufferRecvX3[faceLeft].data(), bufferSizeX3, realMPI, procRecv, - thisInstance*1000+20, mygrid->CartComm, &recvRequestX3[faceLeft])); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,2,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Send_init(BufferSendX3[faceLeft].data(), bufferSizeX3, realMPI, procSend, - thisInstance*1000+21, mygrid->CartComm, &sendRequestX3[faceLeft])); - - MPI_SAFE_CALL(MPI_Recv_init(BufferRecvX3[faceRight].data(), bufferSizeX3, realMPI, procRecv, - thisInstance*1000+21, mygrid->CartComm, &recvRequestX3[faceRight])); - #endif - -#endif // MPI_Persistent - - // say this instance is initialized. - isInitialized = true; - - idfx::popRegion(); -} - -// Destructor (clean up persistent communication channels) -Mpi::~Mpi() { - idfx::pushRegion("Mpi::~Mpi"); - if(isInitialized) { - // Properly clean up the mess - #ifdef MPI_PERSISTENT - idfx::cout << "Mpi(" << thisInstance - << "): Cleaning up MPI persistent communication channels" << std::endl; - for(int i=0 ; i< 2; i++) { - MPI_Request_free( &sendRequestX1[i]); - MPI_Request_free( &recvRequestX1[i]); - - #if DIMENSIONS >= 2 - MPI_Request_free( &sendRequestX2[i]); - MPI_Request_free( &recvRequestX2[i]); - #endif - - #if DIMENSIONS == 3 - MPI_Request_free( &sendRequestX3[i]); - MPI_Request_free( &recvRequestX3[i]); - #endif - } - #endif - if(thisInstance==1) { - idfx::cout << "Mpi(" << thisInstance << "): measured throughput is " - << bytesSentOrReceived/myTimer/1024.0/1024.0 << " MB/s" << std::endl; - idfx::cout << "Mpi(" << thisInstance << "): message sizes were " << std::endl; - idfx::cout << " X1: " << bufferSizeX1*sizeof(real)/1024.0/1024.0 << " MB" << std::endl; - idfx::cout << " X2: " << bufferSizeX2*sizeof(real)/1024.0/1024.0 << " MB" << std::endl; - idfx::cout << " X3: " << bufferSizeX3*sizeof(real)/1024.0/1024.0 << " MB" << std::endl; - } - isInitialized = false; - } - idfx::popRegion(); -} - -void Mpi::ExchangeX1(IdefixArray4D Vc, IdefixArray4D Vs) { - idfx::pushRegion("Mpi::ExchangeX1"); - - // Load the buffers with data - int ibeg,iend,jbeg,jend,kbeg,kend,offset,nx; - Buffer BufferLeft = BufferSendX1[faceLeft]; - Buffer BufferRight = BufferSendX1[faceRight]; - IdefixArray1D map = this->mapVars; - - // If MPI Persistent, start receiving even before the buffers are filled - myTimer -= MPI_Wtime(); - double tStart = MPI_Wtime(); -#ifdef MPI_PERSISTENT - MPI_Status sendStatus[2]; - MPI_Status recvStatus[2]; - - MPI_SAFE_CALL(MPI_Startall(2, recvRequestX1)); - idfx::mpiCallsTimer += MPI_Wtime() - tStart; -#endif - myTimer += MPI_Wtime(); - - // Coordinates of the ghost region which needs to be transfered - ibeg = 0; - iend = nghost[IDIR]; - nx = nghost[IDIR]; // Number of points in x - offset = end[IDIR]; // Distance between beginning of left and right ghosts - jbeg = beg[JDIR]; - jend = end[JDIR]; - - kbeg = beg[KDIR]; - kend = end[KDIR]; - - - BufferLeft.ResetPointer(); - BufferRight.ResetPointer(); - - BufferLeft.Pack(Vc, map, std::make_pair(ibeg+nx, iend+nx), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Pack(Vc, map, std::make_pair(ibeg+offset-nx, iend+offset-nx), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - // Load face-centered field in the buffer - if(haveVs) { - BufferLeft.Pack(Vs, BX1s,std::make_pair(ibeg+nx+1, iend+nx+1), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Pack(Vs, BX1s, std::make_pair(ibeg+offset-nx, iend+offset-nx), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - #if DIMENSIONS >= 2 - - BufferLeft.Pack(Vs, BX2s,std::make_pair(ibeg+nx, iend+nx), - std::make_pair(jbeg , jend+1), - std::make_pair(kbeg , kend)); - - BufferRight.Pack(Vs, BX2s, std::make_pair(ibeg+offset-nx, iend+offset-nx), - std::make_pair(jbeg , jend+1), - std::make_pair(kbeg , kend)); - - #endif - - #if DIMENSIONS == 3 - - BufferLeft.Pack(Vs, BX3s,std::make_pair(ibeg+nx, iend+nx), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend+1)); - - BufferRight.Pack(Vs, BX3s, std::make_pair(ibeg+offset-nx, iend+offset-nx), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend+1)); - - #endif - } - - // Wait for completion before sending out everything - Kokkos::fence(); - myTimer -= MPI_Wtime(); - tStart = MPI_Wtime(); -#ifdef MPI_PERSISTENT - MPI_SAFE_CALL(MPI_Startall(2, sendRequestX1)); - // Wait for buffers to be received - MPI_Waitall(2,recvRequestX1,recvStatus); - -#else - int procSend, procRecv; - - #ifdef MPI_NON_BLOCKING - MPI_Status sendStatus[2]; - MPI_Status recvStatus[2]; - MPI_Request sendRequest[2]; - MPI_Request recvRequest[2]; - - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,0,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Isend(BufferSendX1[faceRight].data(), bufferSizeX1, realMPI, procSend, 100, - mygrid->CartComm, &sendRequest[0])); - - MPI_SAFE_CALL(MPI_Irecv(BufferRecvX1[faceLeft].data(), bufferSizeX1, realMPI, procRecv, 100, - mygrid->CartComm, &recvRequest[0])); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,0,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Isend(BufferSendX1[faceLeft].data(), bufferSizeX1, realMPI, procSend, 101, - mygrid->CartComm, &sendRequest[1])); - - MPI_SAFE_CALL(MPI_Irecv(BufferRecvX1[faceRight].data(), bufferSizeX1, realMPI, procRecv, 101, - mygrid->CartComm, &recvRequest[1])); - - // Wait for recv to complete (we don't care about the sends) - MPI_Waitall(2, recvRequest, recvStatus); - - #else - MPI_Status status; - // Send to the right - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,0,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Sendrecv(BufferSendX1[faceRight].data(), bufferSizeX1, realMPI, procSend, 100, - BufferRecvX1[faceLeft].data(), bufferSizeX1, realMPI, procRecv, 100, - mygrid->CartComm, &status)); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,0,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Sendrecv(BufferSendX1[faceLeft].data(), bufferSizeX1, realMPI, procSend, 101, - BufferRecvX1[faceRight].data(), bufferSizeX1, realMPI, procRecv, 101, - mygrid->CartComm, &status)); - #endif -#endif - myTimer += MPI_Wtime(); - idfx::mpiCallsTimer += MPI_Wtime() - tStart; - // Unpack - BufferLeft=BufferRecvX1[faceLeft]; - BufferRight=BufferRecvX1[faceRight]; - - BufferLeft.ResetPointer(); - BufferRight.ResetPointer(); - - BufferLeft.Unpack(Vc, map,std::make_pair(ibeg, iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vc, map,std::make_pair(ibeg+offset, iend+offset), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - // We fill the ghost zones - - if(haveVs) { - BufferLeft.Unpack(Vs, BX1s, std::make_pair(ibeg, iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vs, BX1s, std::make_pair(ibeg+offset+1, iend+offset+1), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - #if DIMENSIONS >= 2 - BufferLeft.Unpack(Vs, BX2s, std::make_pair(ibeg, iend), - std::make_pair(jbeg , jend+1), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vs, BX2s, std::make_pair(ibeg+offset, iend+offset), - std::make_pair(jbeg , jend+1), - std::make_pair(kbeg , kend)); - #endif - - #if DIMENSIONS == 3 - BufferLeft.Unpack(Vs, BX3s, std::make_pair(ibeg, iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend+1)); - - BufferRight.Unpack(Vs, BX3s, std::make_pair(ibeg+offset, iend+offset), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend+1)); - #endif - } - -myTimer -= MPI_Wtime(); -#ifdef MPI_NON_BLOCKING - // Wait for the sends if they have not yet completed - MPI_Waitall(2, sendRequest, sendStatus); -#endif - -#ifdef MPI_PERSISTENT - MPI_Waitall(2, sendRequestX1, sendStatus); -#endif - myTimer += MPI_Wtime(); - bytesSentOrReceived += 4*bufferSizeX1*sizeof(real); - - idfx::popRegion(); -} - - -void Mpi::ExchangeX2(IdefixArray4D Vc, IdefixArray4D Vs) { - idfx::pushRegion("Mpi::ExchangeX2"); - - // Load the buffers with data - int ibeg,iend,jbeg,jend,kbeg,kend,offset,ny; - Buffer BufferLeft=BufferSendX2[faceLeft]; - Buffer BufferRight=BufferSendX2[faceRight]; - IdefixArray1D map = this->mapVars; - -// If MPI Persistent, start receiving even before the buffers are filled - myTimer -= MPI_Wtime(); - double tStart = MPI_Wtime(); -#ifdef MPI_PERSISTENT - MPI_Status sendStatus[2]; - MPI_Status recvStatus[2]; - - MPI_SAFE_CALL(MPI_Startall(2, recvRequestX2)); - idfx::mpiCallsTimer += MPI_Wtime() - tStart; -#endif - myTimer += MPI_Wtime(); - - // Coordinates of the ghost region which needs to be transfered - ibeg = 0; - iend = ntot[IDIR]; - - jbeg = 0; - jend = nghost[JDIR]; - offset = end[JDIR]; // Distance between beginning of left and right ghosts - ny = nghost[JDIR]; - - kbeg = beg[KDIR]; - kend = end[KDIR]; - - BufferLeft.ResetPointer(); - BufferRight.ResetPointer(); - - BufferLeft.Pack(Vc, map, std::make_pair(ibeg , iend), - std::make_pair(jbeg+ny , jend+ny), - std::make_pair(kbeg , kend)); - - BufferRight.Pack(Vc, map, std::make_pair(ibeg , iend), - std::make_pair(jbeg+offset-ny , jend+offset-ny), - std::make_pair(kbeg , kend)); - - // Load face-centered field in the buffer - if(haveVs) { - BufferLeft.Pack(Vs, BX1s,std::make_pair(ibeg , iend+1), - std::make_pair(jbeg+ny , jend+ny), - std::make_pair(kbeg , kend)); - - BufferRight.Pack(Vs, BX1s, std::make_pair(ibeg , iend+1), - std::make_pair(jbeg+offset-ny , jend+offset-ny), - std::make_pair(kbeg , kend)); - #if DIMENSIONS >= 2 - BufferLeft.Pack(Vs, BX2s,std::make_pair(ibeg , iend), - std::make_pair(jbeg+ny+1 , jend+ny+1), - std::make_pair(kbeg , kend)); - - BufferRight.Pack(Vs, BX2s, std::make_pair(ibeg , iend), - std::make_pair(jbeg+offset-ny , jend+offset-ny), - std::make_pair(kbeg , kend)); - #endif - #if DIMENSIONS == 3 - - BufferLeft.Pack(Vs, BX3s,std::make_pair(ibeg , iend), - std::make_pair(jbeg+ny , jend+ny), - std::make_pair(kbeg , kend+1)); - - BufferRight.Pack(Vs, BX3s, std::make_pair(ibeg , iend), - std::make_pair(jbeg+offset-ny , jend+offset-ny), - std::make_pair(kbeg , kend+1)); - - #endif - } - - // Send to the right - Kokkos::fence(); - - myTimer -= MPI_Wtime(); - tStart = MPI_Wtime(); -#ifdef MPI_PERSISTENT - MPI_SAFE_CALL(MPI_Startall(2, sendRequestX2)); - MPI_Waitall(2,recvRequestX2,recvStatus); - -#else - int procSend, procRecv; - - #ifdef MPI_NON_BLOCKING - MPI_Status sendStatus[2]; - MPI_Status recvStatus[2]; - MPI_Request sendRequest[2]; - MPI_Request recvRequest[2]; - - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,1,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Isend(BufferSendX2[faceRight].data(), bufferSizeX2, realMPI, procSend, 100, - mygrid->CartComm, &sendRequest[0])); - - MPI_SAFE_CALL(MPI_Irecv(BufferRecvX2[faceLeft].data(), bufferSizeX2, realMPI, procRecv, 100, - mygrid->CartComm, &recvRequest[0])); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,1,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Isend(BufferSendX2[faceLeft].data(), bufferSizeX2, realMPI, procSend, 101, - mygrid->CartComm, &sendRequest[1])); - - MPI_SAFE_CALL(MPI_Irecv(BufferRecvX2[faceRight].data(), bufferSizeX2, realMPI, procRecv, 101, - mygrid->CartComm, &recvRequest[1])); - - // Wait for recv to complete (we don't care about the sends) - MPI_Waitall(2, recvRequest, recvStatus); - - #else - MPI_Status status; - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,1,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Sendrecv(BufferSendX2[faceRight].data(), bufferSizeX2, realMPI, procSend, 200, - BufferRecvX2[faceLeft].data(), bufferSizeX2, realMPI, procRecv, 200, - mygrid->CartComm, &status)); - - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,1,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Sendrecv(BufferSendX2[faceLeft].data(), bufferSizeX2, realMPI, procSend, 201, - BufferRecvX2[faceRight].data(), bufferSizeX2, realMPI, procRecv, 201, - mygrid->CartComm, &status)); - #endif -#endif - myTimer += MPI_Wtime(); - idfx::mpiCallsTimer += MPI_Wtime() - tStart; - // Unpack - BufferLeft=BufferRecvX2[faceLeft]; - BufferRight=BufferRecvX2[faceRight]; - - BufferLeft.ResetPointer(); - BufferRight.ResetPointer(); - - // We fill the ghost zones - BufferLeft.Unpack(Vc, map,std::make_pair(ibeg, iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vc, map,std::make_pair(ibeg , iend), - std::make_pair(jbeg+offset , jend+offset), - std::make_pair(kbeg , kend)); - // We fill the ghost zones - - if(haveVs) { - BufferLeft.Unpack(Vs, BX1s, std::make_pair(ibeg, iend+1), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vs, BX1s, std::make_pair(ibeg, iend+1), - std::make_pair(jbeg+offset , jend+offset), - std::make_pair(kbeg , kend)); - #if DIMENSIONS >= 2 - BufferLeft.Unpack(Vs, BX2s, std::make_pair(ibeg, iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vs, BX2s, std::make_pair(ibeg, iend), - std::make_pair(jbeg+offset+1, jend+offset+1), - std::make_pair(kbeg , kend)); - #endif - #if DIMENSIONS == 3 - BufferLeft.Unpack(Vs, BX3s, std::make_pair(ibeg, iend), - std::make_pair(jbeg, jend), - std::make_pair(kbeg, kend+1)); - - BufferRight.Unpack(Vs, BX3s,std::make_pair(ibeg , iend), - std::make_pair(jbeg+offset, jend+offset), - std::make_pair(kbeg , kend+1)); - #endif - } - - myTimer -= MPI_Wtime(); -#ifdef MPI_NON_BLOCKING - // Wait for the sends if they have not yet completed - MPI_Waitall(2, sendRequest, sendStatus); -#endif - -#ifdef MPI_PERSISTENT - MPI_Waitall(2, sendRequestX2, sendStatus); -#endif - myTimer += MPI_Wtime(); - bytesSentOrReceived += 4*bufferSizeX2*sizeof(real); - - idfx::popRegion(); -} - - -void Mpi::ExchangeX3(IdefixArray4D Vc, IdefixArray4D Vs) { - idfx::pushRegion("Mpi::ExchangeX3"); - - - // Load the buffers with data - int ibeg,iend,jbeg,jend,kbeg,kend,offset,nz; - Buffer BufferLeft=BufferSendX3[faceLeft]; - Buffer BufferRight=BufferSendX3[faceRight]; - IdefixArray1D map = this->mapVars; - - // If MPI Persistent, start receiving even before the buffers are filled - myTimer -= MPI_Wtime(); - - double tStart = MPI_Wtime(); -#ifdef MPI_PERSISTENT - MPI_Status sendStatus[2]; - MPI_Status recvStatus[2]; - - MPI_SAFE_CALL(MPI_Startall(2, recvRequestX3)); - idfx::mpiCallsTimer += MPI_Wtime() - tStart; -#endif - myTimer += MPI_Wtime(); - // Coordinates of the ghost region which needs to be transfered - ibeg = 0; - iend = ntot[IDIR]; - - jbeg = 0; - jend = ntot[JDIR]; - - kbeg = 0; - kend = nghost[KDIR]; - offset = end[KDIR]; // Distance between beginning of left and right ghosts - nz = nghost[KDIR]; - - BufferLeft.ResetPointer(); - BufferRight.ResetPointer(); - - BufferLeft.Pack(Vc, map, std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg+nz, kend+nz)); - - BufferRight.Pack(Vc, map, std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg + offset-nz, kend+ offset-nz)); - - // Load face-centered field in the buffer - if(haveVs) { - BufferLeft.Pack(Vs, BX1s,std::make_pair(ibeg , iend+1), - std::make_pair(jbeg , jend), - std::make_pair(kbeg+nz , kend+nz)); - - BufferRight.Pack(Vs, BX1s, std::make_pair(ibeg , iend+1), - std::make_pair(jbeg , jend), - std::make_pair(kbeg + offset-nz, kend+ offset-nz)); - - #if DIMENSIONS >= 2 - - BufferLeft.Pack(Vs, BX2s,std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend+1), - std::make_pair(kbeg+nz , kend+nz)); - - BufferRight.Pack(Vs, BX2s, std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend+1), - std::make_pair(kbeg + offset-nz, kend+ offset-nz)); - - #endif - - #if DIMENSIONS == 3 - BufferLeft.Pack(Vs, BX3s,std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg+nz+1 , kend+nz+1)); - - BufferRight.Pack(Vs, BX3s, std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg + offset-nz, kend+ offset-nz)); - #endif - } - - // Send to the right - Kokkos::fence(); - - myTimer -= MPI_Wtime(); - tStart = MPI_Wtime(); -#ifdef MPI_PERSISTENT - MPI_SAFE_CALL(MPI_Startall(2, sendRequestX3)); - MPI_Waitall(2,recvRequestX3,recvStatus); - idfx::mpiCallsTimer += MPI_Wtime() - tStart; - -#else - int procSend, procRecv; - - #ifdef MPI_NON_BLOCKING - MPI_Status sendStatus[2]; - MPI_Status recvStatus[2]; - MPI_Request sendRequest[2]; - MPI_Request recvRequest[2]; - - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,2,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Isend(BufferSendX3[faceRight].data(), bufferSizeX3, realMPI, procSend, 100, - mygrid->CartComm, &sendRequest[0])); - - MPI_SAFE_CALL(MPI_Irecv(BufferRecvX3[faceLeft].data(), bufferSizeX3, realMPI, procRecv, 100, - mygrid->CartComm, &recvRequest[0])); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,2,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Isend(BufferSendX3[faceLeft].data(), bufferSizeX3, realMPI, procSend, 101, - mygrid->CartComm, &sendRequest[1])); - - MPI_SAFE_CALL(MPI_Irecv(BufferRecvX3[faceRight].data(), bufferSizeX3, realMPI, procRecv, 101, - mygrid->CartComm, &recvRequest[1])); - - // Wait for recv to complete (we don't care about the sends) - MPI_Waitall(2, recvRequest, recvStatus); - - #else - MPI_Status status; - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,2,1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Sendrecv(BufferSendX3[faceRight].data(), bufferSizeX3, realMPI, procSend, 300, - BufferRecvX3[faceLeft].data(), bufferSizeX3, realMPI, procRecv, 300, - mygrid->CartComm, &status)); - - // Send to the left - // We receive from procRecv, and we send to procSend - MPI_SAFE_CALL(MPI_Cart_shift(mygrid->CartComm,2,-1,&procRecv,&procSend )); - - MPI_SAFE_CALL(MPI_Sendrecv(BufferSendX3[faceLeft].data(), bufferSizeX3, realMPI, procSend, 301, - BufferRecvX3[faceRight].data(), bufferSizeX3, realMPI, procRecv, 301, - mygrid->CartComm, &status)); - #endif -#endif - myTimer += MPI_Wtime(); - idfx::mpiCallsTimer += MPI_Wtime() - tStart; - // Unpack - BufferLeft=BufferRecvX3[faceLeft]; - BufferRight=BufferRecvX3[faceRight]; - - BufferLeft.ResetPointer(); - BufferRight.ResetPointer(); - - - // We fill the ghost zones - BufferLeft.Unpack(Vc, map,std::make_pair(ibeg, iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vc, map,std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg+offset , kend+offset)); - // We fill the ghost zones - - if(haveVs) { - BufferLeft.Unpack(Vs, BX1s, std::make_pair(ibeg, iend+1), - std::make_pair(jbeg , jend), - std::make_pair(kbeg , kend)); - - BufferRight.Unpack(Vs, BX1s, std::make_pair(ibeg, iend+1), - std::make_pair(jbeg , jend), - std::make_pair(kbeg+offset , kend+offset)); - - #if DIMENSIONS >=2 - BufferLeft.Unpack(Vs, BX2s, std::make_pair(ibeg, iend), - std::make_pair(jbeg, jend+1), - std::make_pair(kbeg, kend)); - - BufferRight.Unpack(Vs, BX2s,std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend+1), - std::make_pair(kbeg+offset, kend+offset)); - #endif - - #if DIMENSIONS == 3 - BufferLeft.Unpack(Vs, BX3s, std::make_pair(ibeg, iend), - std::make_pair(jbeg, jend), - std::make_pair(kbeg, kend)); - - BufferRight.Unpack(Vs, BX3s,std::make_pair(ibeg , iend), - std::make_pair(jbeg , jend), - std::make_pair(kbeg+offset+1, kend+offset+1)); - #endif - } - - myTimer -= MPI_Wtime(); -#ifdef MPI_NON_BLOCKING - // Wait for the sends if they have not yet completed - MPI_Waitall(2, sendRequest, sendStatus); -#endif - -#ifdef MPI_PERSISTENT - MPI_Waitall(2, sendRequestX3, sendStatus); -#endif - myTimer += MPI_Wtime(); - bytesSentOrReceived += 4*bufferSizeX3*sizeof(real); - - idfx::popRegion(); -} - - - -void Mpi::CheckConfig() { - idfx::pushRegion("Mpi::CheckConfig"); - // compile time check - #ifdef KOKKOS_ENABLE_CUDA - #if defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT - #error Your MPI library is not CUDA Aware (check Idefix requirements). - #endif - #endif /* MPIX_CUDA_AWARE_SUPPORT */ - - // Run-time check that we can do a reduce on device arrays - IdefixArray1D src("MPIChecksrc",1); - IdefixArray1D::HostMirror srcHost = Kokkos::create_mirror_view(src); - - if(idfx::prank == 0) { - srcHost(0) = 0; - Kokkos::deep_copy(src, srcHost); - } - - if(idfx::psize > 1) { - MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); - - // Capture segfaults - struct sigaction newHandler; - struct sigaction oldHandler; - memset(&newHandler, 0, sizeof(newHandler)); - newHandler.sa_flags = SA_SIGINFO; - newHandler.sa_sigaction = Mpi::SigErrorHandler; - sigaction(SIGSEGV, &newHandler, &oldHandler); - try { - // We next circulate the info round-robin accross all the nodes to check that - // MPI can exchange buffers in idefix arrays - - MPI_Status status; - int ierrSend, ierrRecv; - if(idfx::prank == 0) { - ierrSend = MPI_Send(src.data(), 1, MPI_INT64_T, idfx::prank+1, 1, MPI_COMM_WORLD); - ierrRecv = MPI_Recv(src.data(), 1, MPI_INT64_T, idfx::psize-1, 1, MPI_COMM_WORLD, &status); - } else { - ierrRecv = MPI_Recv(src.data(), 1, MPI_INT64_T, idfx::prank-1, 1, MPI_COMM_WORLD, &status); - // Add our own rank to the data - Kokkos::deep_copy(srcHost, src); - srcHost(0) += idfx::prank; - Kokkos::deep_copy(src, srcHost); - ierrSend = MPI_Send(src.data(), 1, MPI_INT64_T, (idfx::prank+1)%idfx::psize, 1, - MPI_COMM_WORLD); - } - - if(ierrSend != 0) { - char MPImsg[MPI_MAX_ERROR_STRING]; - int MPImsgLen; - MPI_Error_string(ierrSend, MPImsg, &MPImsgLen); - throw std::runtime_error(std::string(MPImsg, MPImsgLen)); - } - if(ierrRecv != 0) { - char MPImsg[MPI_MAX_ERROR_STRING]; - int MPImsgLen; - MPI_Error_string(ierrSend, MPImsg, &MPImsgLen); - throw std::runtime_error(std::string(MPImsg, MPImsgLen)); - } - } catch(std::exception &e) { - std::stringstream errmsg; - errmsg << "Your MPI library is unable to perform Send/Recv on Idefix arrays."; - errmsg << std::endl; - #ifdef KOKKOS_ENABLE_CUDA - errmsg << "Check that your MPI library is CUDA aware." << std::endl; - #elif defined(KOKKOS_ENABLE_HIP) - errmsg << "Check that your MPI library is RocM aware." << std::endl; - #else - errmsg << "Check your MPI library configuration." << std::endl; - #endif - errmsg << "Error: " << e.what() << std::endl; - IDEFIX_ERROR(errmsg); - } - // Restore old handlers - sigaction(SIGSEGV, &oldHandler, NULL ); - MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL); - } - - // Check that we have the proper end result - Kokkos::deep_copy(srcHost, src); - int64_t size = static_cast(idfx::psize); - int64_t rank = static_cast(idfx::prank); - int64_t result = rank == 0 ? size*(size-1)/2 : rank*(rank+1)/2; - - if(srcHost(0) != result) { - idfx::cout << "got " << srcHost(0) << " expected " << result << std::endl; - std::stringstream errmsg; - errmsg << "Your MPI library managed to perform MPI exchanges on Idefix Arrays, but the result "; - errmsg << "is incorrect. " << std::endl; - errmsg << "Check your MPI library configuration." << std::endl; - IDEFIX_ERROR(errmsg); - } - idfx::popRegion(); -} - -void Mpi::SigErrorHandler(int nSignum, siginfo_t* si, void* vcontext) { - std::stringstream errmsg; - errmsg << "A segmentation fault was triggered while attempting to test your MPI library."; - errmsg << std::endl; - errmsg << "Your MPI library is unable to perform reductions on Idefix arrays."; - errmsg << std::endl; - #ifdef KOKKOS_ENABLE_CUDA - errmsg << "Check that your MPI library is CUDA aware." << std::endl; - #elif defined(KOKKOS_ENABLE_HIP) - errmsg << "Check that your MPI library is RocM aware." << std::endl; - #else - errmsg << "Check your MPI library configuration." << std::endl; - #endif - IDEFIX_ERROR(errmsg); -} - -// This routine check that all of the processes are synced. -// Returns true if this is the case, false otherwise - -bool Mpi::CheckSync(real timeout) { - // If no parallelism, then we're in sync! - if(idfx::psize == 1) return(true); - - int send = idfx::prank; - int recv = 0; - MPI_Request request; - - MPI_Iallreduce(&send, &recv, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &request); - - double start = MPI_Wtime(); - int flag = 0; - MPI_Status status; - - while((MPI_Wtime()-start < timeout) && !flag) { - MPI_Test(&request, &flag, &status); - // sleep for 10 ms - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } - if(!flag) { - // We did not managed to do an allreduce, so this is a failure. - return(false); - } - if(recv != idfx::psize*(idfx::psize-1)/2) { - IDEFIX_ERROR("wrong result for synchronisation"); - } - - return(true); -} diff --git a/src/mpi.hpp b/src/mpi.hpp deleted file mode 100644 index a4250a17c..000000000 --- a/src/mpi.hpp +++ /dev/null @@ -1,269 +0,0 @@ -// *********************************************************************************** -// Idefix MHD astrophysical code -// Copyright(C) Geoffroy R. J. Lesur -// and other code contributors -// Licensed under CeCILL 2.1 License, see COPYING for more information -// *********************************************************************************** - -#ifndef MPI_HPP_ -#define MPI_HPP_ - -#include -#include -#include -#include "idefix.hpp" -#include "grid.hpp" - - -class DataBlock; -class Buffer { - public: - Buffer() = default; - explicit Buffer(size_t size): pointer{0}, array{IdefixArray1D("BufferArray",size)} { }; - - void* data() { - return(array.data()); - } - - int Size() { - return(array.size()); - } - - void ResetPointer() { - this->pointer = 0; - } - - void Pack(IdefixArray3D& in, - std::pair ib, - std::pair jb, - std::pair kb) { - const int ni = ib.second-ib.first; - const int ninj = (jb.second-jb.first)*ni; - const int ninjnk = (kb.second-kb.first)*ninj; - const int ibeg = ib.first; - const int jbeg = jb.first; - const int kbeg = kb.first; - const int offset = this->pointer; - - auto arr = this->array; - idefix_for("LoadBuffer3D",kb.first,kb.second,jb.first,jb.second,ib.first,ib.second, - KOKKOS_LAMBDA (int k, int j, int i) { - arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ) = in(k,j,i); - }); - - // Update pointer - this->pointer += ninjnk; - } - - void Pack(IdefixArray4D& in, - const int var, - std::pair ib, - std::pair jb, - std::pair kb) { - const int ni = ib.second-ib.first; - const int ninj = (jb.second-jb.first)*ni; - const int ninjnk = (kb.second-kb.first)*ninj; - const int ibeg = ib.first; - const int jbeg = jb.first; - const int kbeg = kb.first; - const int offset = this->pointer; - - auto arr = this->array; - idefix_for("LoadBuffer4D",kb.first,kb.second,jb.first,jb.second,ib.first,ib.second, - KOKKOS_LAMBDA (int k, int j, int i) { - arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ) = in(var, k,j,i); - }); - - // Update pointer - this->pointer += ninjnk; - } - - void Pack(IdefixArray4D& in, - IdefixArray1D& map, - std::pair ib, - std::pair jb, - std::pair kb) { - const int ni = ib.second-ib.first; - const int ninj = (jb.second-jb.first)*ni; - const int ninjnk = (kb.second-kb.first)*ninj; - const int ibeg = ib.first; - const int jbeg = jb.first; - const int kbeg = kb.first; - const int offset = this->pointer; - auto arr = this->array; - - idefix_for("LoadBuffer4D",0,map.size(), - kb.first,kb.second, - jb.first,jb.second, - ib.first,ib.second, - KOKKOS_LAMBDA (int n, int k, int j, int i) { - arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + n*ninjnk + offset ) = in(map(n), k,j,i); - }); - - // Update pointer - this->pointer += ninjnk*map.size(); - } - - void Unpack(IdefixArray3D& out, - std::pair ib, - std::pair jb, - std::pair kb) { - const int ni = ib.second-ib.first; - const int ninj = (jb.second-jb.first)*ni; - const int ninjnk = (kb.second-kb.first)*ninj; - const int ibeg = ib.first; - const int jbeg = jb.first; - const int kbeg = kb.first; - const int offset = this->pointer; - auto arr = this->array; - - idefix_for("LoadBuffer3D",kb.first,kb.second,jb.first,jb.second,ib.first,ib.second, - KOKKOS_LAMBDA (int k, int j, int i) { - out(k,j,i) = arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ); - }); - - // Update pointer - this->pointer += ninjnk; - } - - void Unpack(IdefixArray4D& out, - const int var, - std::pair ib, - std::pair jb, - std::pair kb) { - const int ni = ib.second-ib.first; - const int ninj = (jb.second-jb.first)*ni; - const int ninjnk = (kb.second-kb.first)*ninj; - const int ibeg = ib.first; - const int jbeg = jb.first; - const int kbeg = kb.first; - const int offset = this->pointer; - - auto arr = this->array; - idefix_for("LoadBuffer3D",kb.first,kb.second,jb.first,jb.second,ib.first,ib.second, - KOKKOS_LAMBDA (int k, int j, int i) { - out(var,k,j,i) = arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ); - }); - - // Update pointer - this->pointer += ninjnk; - } - - void Unpack(IdefixArray4D& out, - IdefixArray1D& map, - std::pair ib, - std::pair jb, - std::pair kb) { - const int ni = ib.second-ib.first; - const int ninj = (jb.second-jb.first)*ni; - const int ninjnk = (kb.second-kb.first)*ninj; - const int ibeg = ib.first; - const int jbeg = jb.first; - const int kbeg = kb.first; - const int offset = this->pointer; - - auto arr = this->array; - idefix_for("LoadBuffer4D",0,map.size(), - kb.first,kb.second, - jb.first,jb.second, - ib.first,ib.second, - KOKKOS_LAMBDA (int n, int k, int j, int i) { - out(map(n),k,j,i) = arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + n*ninjnk + offset ); - }); - - // Update pointer - this->pointer += ninjnk*map.size(); - } - - - private: - size_t pointer; - IdefixArray1D array; -}; - -class Mpi { - public: - Mpi() = default; - // MPI Exchange functions - void ExchangeAll(); ///< Exchange boundary elements in all directions (todo) - void ExchangeX1(IdefixArray4D inputVc, - IdefixArray4D inputVs = IdefixArray4D()); - ///< Exchange boundary elements in the X1 direction - void ExchangeX2(IdefixArray4D inputVc, - IdefixArray4D inputVs = IdefixArray4D()); - ///< Exchange boundary elements in the X2 direction - void ExchangeX3(IdefixArray4D inputVc, - IdefixArray4D inputVs = IdefixArray4D()); - ///< Exchange boundary elements in the X3 direction - - // Init from datablock - void Init(Grid *grid, std::vector inputMap, - int nghost[3], int nint[3], bool inputHaveVs = false ); - - // Check that MPI will work with the designated target (in particular GPU Direct) - static void CheckConfig(); - - // Check that MPI processes are synced - static bool CheckSync(real); - - - // Destructor - ~Mpi(); - - private: - // Because the MPI class initialise internal pointers, we do not allow copies of this class - // These lines should not be removed as they constitute a safeguard - Mpi(const Mpi&); - Mpi operator=(const Mpi&); - - static int nInstances; // total number of mpi instances in the code - int thisInstance; // unique number of the current instance - int nReferences; // # of references to this instance - bool isInitialized{false}; - - DataBlock *data; // pointer to datablock object - - enum {faceRight, faceLeft}; - - // Buffers for MPI calls - Buffer BufferSendX1[2]; - Buffer BufferSendX2[2]; - Buffer BufferSendX3[2]; - Buffer BufferRecvX1[2]; - Buffer BufferRecvX2[2]; - Buffer BufferRecvX3[2]; - - IdefixArray1D mapVars; - int mapNVars{0}; - - int nint[3]; //< number of internal elements of the arrays we treat - int nghost[3]; //< number of ghost zone of the arrays we treat - int ntot[3]; //< total number of cells of the arrays we treat - int beg[3]; //< begining index of the active zone - int end[3]; //< end index of the active zone - - int bufferSizeX1; - int bufferSizeX2; - int bufferSizeX3; - - bool haveVs{false}; - - // Requests for MPI persistent communications - MPI_Request sendRequestX1[2]; - MPI_Request sendRequestX2[2]; - MPI_Request sendRequestX3[2]; - MPI_Request recvRequestX1[2]; - MPI_Request recvRequestX2[2]; - MPI_Request recvRequestX3[2]; - - Grid *mygrid; - - // MPI throughput timer specific to this object - double myTimer{0}; - int64_t bytesSentOrReceived{0}; - - // Error handler used by CheckConfig - static void SigErrorHandler(int, siginfo_t* , void* ); -}; - -#endif // MPI_HPP_ diff --git a/src/mpi/CMakeLists.txt b/src/mpi/CMakeLists.txt new file mode 100644 index 000000000..6042da654 --- /dev/null +++ b/src/mpi/CMakeLists.txt @@ -0,0 +1,7 @@ +target_sources(idefix + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/buffer.hpp + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/exchanger.hpp + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/exchanger.cpp + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/mpi.hpp + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/mpi.cpp +) diff --git a/src/mpi/buffer.hpp b/src/mpi/buffer.hpp new file mode 100644 index 000000000..ff4dff146 --- /dev/null +++ b/src/mpi/buffer.hpp @@ -0,0 +1,195 @@ +// *********************************************************************************** +// Idefix MHD astrophysical code +// Copyright(C) Geoffroy R. J. Lesur +// and other code contributors +// Licensed under CeCILL 2.1 License, see COPYING for more information +// *********************************************************************************** + +#ifndef MPI_BUFFER_HPP_ +#define MPI_BUFFER_HPP_ + +#include "idefix.hpp" +#include "arrays.hpp" + +using BoundingBox = std::array,3>; + + +class Buffer { + public: + Buffer() = default; + explicit Buffer(size_t size): pointer{0}, array{IdefixArray1D("BufferArray",size)} {}; + + // Compute the size of a bounding box + static size_t ComputeBoxSize(BoundingBox box) { + const int ni = box[IDIR][1]-box[IDIR][0]; + const int ninj = (box[JDIR][1]-box[JDIR][0])*ni; + const int ninjnk = (box[KDIR][1]-box[KDIR][0])*ninj; + return(ninjnk); + } + + void* data() { + return(array.data()); + } + + int Size() { + return(array.size()); + } + + void ResetPointer() { + this->pointer = 0; + } + + void Pack(IdefixArray3D& in, BoundingBox box) { + const int ni = box[IDIR][1]-box[IDIR][0]; + const int ninj = (box[JDIR][1]-box[JDIR][0])*ni; + const int ninjnk = (box[KDIR][1]-box[KDIR][0])*ninj; + const int ibeg = box[IDIR][0]; + const int jbeg = box[JDIR][0]; + const int kbeg = box[KDIR][0]; + const int iend = box[IDIR][1]; + const int jend = box[JDIR][1]; + const int kend = box[KDIR][1]; + const int offset = this->pointer; + + auto arr = this->array; + idefix_for("LoadBuffer3D",kbeg,kend,jbeg,jend,ibeg,iend, + KOKKOS_LAMBDA (int k, int j, int i) { + arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ) = in(k,j,i); + }); + + // Update pointer + this->pointer += ninjnk; + } + + void Pack(IdefixArray4D& in, + const int var, + BoundingBox box) { + const int ni = box[IDIR][1]-box[IDIR][0]; + const int ninj = (box[JDIR][1]-box[JDIR][0])*ni; + const int ninjnk = (box[KDIR][1]-box[KDIR][0])*ninj; + const int ibeg = box[IDIR][0]; + const int jbeg = box[JDIR][0]; + const int kbeg = box[KDIR][0]; + const int iend = box[IDIR][1]; + const int jend = box[JDIR][1]; + const int kend = box[KDIR][1]; + const int offset = this->pointer; + + auto arr = this->array; + idefix_for("LoadBuffer4D_var",kbeg,kend,jbeg,jend,ibeg,iend, + KOKKOS_LAMBDA (int k, int j, int i) { + arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ) = in(var, k,j,i); + }); + + // Update pointer + this->pointer += ninjnk; + } + + void Pack(IdefixArray4D& in, + IdefixArray1D& map, + BoundingBox box) { + const int ni = box[IDIR][1]-box[IDIR][0]; + const int ninj = (box[JDIR][1]-box[JDIR][0])*ni; + const int ninjnk = (box[KDIR][1]-box[KDIR][0])*ninj; + const int ibeg = box[IDIR][0]; + const int jbeg = box[JDIR][0]; + const int kbeg = box[KDIR][0]; + const int iend = box[IDIR][1]; + const int jend = box[JDIR][1]; + const int kend = box[KDIR][1]; + const int offset = this->pointer; + auto arr = this->array; + + idefix_for("LoadBuffer4D_map",0,map.size(), + kbeg,kend, + jbeg,jend, + ibeg,iend, + KOKKOS_LAMBDA (int n, int k, int j, int i) { + arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + n*ninjnk + offset ) = in(map(n), k,j,i); + }); + + // Update pointer + this->pointer += ninjnk*map.size(); + } + + void Unpack(IdefixArray3D& out, + BoundingBox box) { + const int ni = box[IDIR][1]-box[IDIR][0]; + const int ninj = (box[JDIR][1]-box[JDIR][0])*ni; + const int ninjnk = (box[KDIR][1]-box[KDIR][0])*ninj; + const int ibeg = box[IDIR][0]; + const int jbeg = box[JDIR][0]; + const int kbeg = box[KDIR][0]; + const int iend = box[IDIR][1]; + const int jend = box[JDIR][1]; + const int kend = box[KDIR][1]; + const int offset = this->pointer; + auto arr = this->array; + + idefix_for("UnLoadBuffer3D",kbeg,kend,jbeg,jend,ibeg,iend, + KOKKOS_LAMBDA (int k, int j, int i) { + out(k,j,i) = arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ); + }); + + // Update pointer + this->pointer += ninjnk; + } + + void Unpack(IdefixArray4D& out, + const int var, + BoundingBox box) { + const int ni = box[IDIR][1]-box[IDIR][0]; + const int ninj = (box[JDIR][1]-box[JDIR][0])*ni; + const int ninjnk = (box[KDIR][1]-box[KDIR][0])*ninj; + const int ibeg = box[IDIR][0]; + const int jbeg = box[JDIR][0]; + const int kbeg = box[KDIR][0]; + const int iend = box[IDIR][1]; + const int jend = box[JDIR][1]; + const int kend = box[KDIR][1]; + const int offset = this->pointer; + + auto arr = this->array; + idefix_for("UnLoadBuffer4D_var",kbeg,kend,jbeg,jend,ibeg,iend, + KOKKOS_LAMBDA (int k, int j, int i) { + out(var,k,j,i) = arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + offset ); + }); + + // Update pointer + this->pointer += ninjnk; + } + + void Unpack(IdefixArray4D& out, + IdefixArray1D& map, + BoundingBox box) { + const int ni = box[IDIR][1]-box[IDIR][0]; + const int ninj = (box[JDIR][1]-box[JDIR][0])*ni; + const int ninjnk = (box[KDIR][1]-box[KDIR][0])*ninj; + const int ibeg = box[IDIR][0]; + const int jbeg = box[JDIR][0]; + const int kbeg = box[KDIR][0]; + const int iend = box[IDIR][1]; + const int jend = box[JDIR][1]; + const int kend = box[KDIR][1]; + const int offset = this->pointer; + + auto arr = this->array; + idefix_for("UnLoadBuffer4D_map",0,map.size(), + kbeg,kend, + jbeg,jend, + ibeg,iend, + KOKKOS_LAMBDA (int n, int k, int j, int i) { + out(map(n),k,j,i) = arr(i-ibeg + (j-jbeg)*ni + (k-kbeg)*ninj + n*ninjnk + offset ); + }); + + // Update pointer + this->pointer += ninjnk*map.size(); + } + + + private: + size_t pointer; + IdefixArray1D array; +}; + +#endif // MPI_BUFFER_HPP_ diff --git a/src/mpi/exchanger.cpp b/src/mpi/exchanger.cpp new file mode 100644 index 000000000..033ce4041 --- /dev/null +++ b/src/mpi/exchanger.cpp @@ -0,0 +1,308 @@ +// *********************************************************************************** +// Idefix MHD astrophysical code +// Copyright(C) Geoffroy R. J. Lesur +// and other code contributors +// Licensed under CeCILL 2.1 License, see COPYING for more information +// *********************************************************************************** + +#include +#include "exchanger.hpp" +#include "idefix.hpp" +#include "buffer.hpp" +#include "grid.hpp" +#include "arrays.hpp" + +//#define MPI_NON_BLOCKING +#define MPI_PERSISTENT + +int Exchanger::nInstances = 0; + +void Exchanger::Init( + Grid *grid, + int direction, + std::vector inputMap, + std::array nghost, + std::array nint, + bool inputHaveVs, + std::array overwriteBXn) { + idfx::pushRegion("Exchanger::Init"); + this->grid = grid; + this->direction = direction; + // Allocate mapVars on target and copy it from the input argument list + this->mapVars = idfx::ConvertVectorToIdefixArray(inputMap); + this->mapNVars = inputMap.size(); + this->haveVs = inputHaveVs; + + // increase the number of instances + this->thisInstance = nInstances; + + // Compute indices of arrays we will be working with + for(int dir = 0 ; dir < 3 ; dir++) { + this->nghost[dir] = nghost[dir]; + this->nint[dir] = nint[dir]; + this->ntot[dir] = nint[dir]+2*nghost[dir]; + this->beg[dir] = nghost[dir]; + this->end[dir] = nghost[dir]+nint[dir]; + } + + ///////////////////////////////////////////////////////////////////////////// + // Init exchange datasets + // Buffer size direction are for sends (i.e. buffer left is for a send from the left side) + + // Make left buffer + // Init zone to the full domain + for(int dir = 0 ; dir < 3 ; dir++) { + if(dir < direction) { + boxRecv[faceLeft][dir][0] = 0; + boxRecv[faceLeft][dir][1] = ntot[dir]; + } else if(dir > direction) { + boxRecv[faceLeft][dir][0] = beg[dir]; + boxRecv[faceLeft][dir][1] = end[dir]; + } else { + // dir == direction + boxRecv[faceLeft][dir][0] = 0; + boxRecv[faceLeft][dir][1] = nghost[dir]; + } + } + // Copy all the boxes from boxRecvLeft + boxRecv[faceRight] = boxRecv[faceLeft]; + boxSend = boxRecv; + + // Adjust the indices for send and receive in the direction of interest + boxRecv[faceRight][direction][0] = end[direction]; + boxRecv[faceRight][direction][1] = ntot[direction]; + + boxSend[faceLeft][direction][0] = beg[direction]; + boxSend[faceLeft][direction][1] = beg[direction]+nghost[direction]; + + boxSend[faceRight][direction][0] = end[direction] - nghost[direction]; + boxSend[faceRight][direction][1] = end[direction]; + + // Face-centered boxes + + // Add one element in the field direction + for(int component = 0 ; component < 3 ; component++) { + // Init as centered boxes + boxSendVs[component] = boxSend; + boxRecvVs[component] = boxRecv; + const int normalDir = component; + if(component != direction) { + for(int face = 0 ; face <2 ; face++) { + boxSendVs[component][face][normalDir][1] += 1; + boxRecvVs[component][face][normalDir][1] += 1; + } + } else { + // component == direction + if(!overwriteBXn[faceLeft]) boxSendVs[component][faceLeft][normalDir][0] += 1; + boxSendVs[component][faceLeft][normalDir][1] += 1; + + if(!overwriteBXn[faceRight]) boxRecvVs[component][faceRight][normalDir][0] += 1; + boxRecvVs[component][faceRight][normalDir][1] += 1; + } + } + + // Compute buffer sizes + for(int face=0 ; face < 2 ; face++) { + bufferSizeSend[face] = mapNVars * Buffer::ComputeBoxSize(boxSend[face]); + bufferSizeRecv[face] = mapNVars * Buffer::ComputeBoxSize(boxRecv[face]); + if(haveVs) { + for(int component = 0 ; component CartComm,direction,1,&procRecv[faceLeft],&procSend[faceRight]); + MPI_Cart_shift(grid->CartComm,direction,-1,&procRecv[faceRight],&procSend[faceLeft]); + + #ifdef MPI_PERSISTENT + + // X1-dir exchanges + // We receive from procRecv, and we send to procSend + + MPI_Send_init(BufferSend[faceRight].data(), bufferSizeSend[faceRight], realMPI, + procSend[faceRight], thisInstance*2, + grid->CartComm, &sendRequest[faceRight]); + + MPI_Recv_init(BufferRecv[faceLeft].data(), bufferSizeRecv[faceLeft], realMPI, + procRecv[faceLeft],thisInstance*2, + grid->CartComm, &recvRequest[faceLeft]); + + // Send to the left + // We receive from procRecv, and we send to procSend + + MPI_Send_init(BufferSend[faceLeft].data(), bufferSizeSend[faceLeft], realMPI, + procSend[faceLeft],thisInstance*2+1, + grid->CartComm, &sendRequest[faceLeft]); + + MPI_Recv_init(BufferRecv[faceRight].data(), bufferSizeRecv[faceRight], realMPI, + procRecv[faceRight], thisInstance*2+1, + grid->CartComm, &recvRequest[faceRight]); + + #endif // MPI_PERSISTENT + + // say this instance is initialized. + isInitialized = true; + nInstances++; + + idfx::popRegion(); +} + +Exchanger::~Exchanger() { + idfx::pushRegion("Exchanger::~Exchanger"); + if(isInitialized) { + // Properly clean up the mess + #ifdef MPI_PERSISTENT + for(int i=0 ; i< 2; i++) { + MPI_Request_free( &sendRequest[i]); + MPI_Request_free( &recvRequest[i]); + } + #endif + isInitialized = false; + } + idfx::popRegion(); +} + +void Exchanger::Exchange(IdefixArray4D Vc, IdefixArray4D Vs) { + idfx::pushRegion("Mpi::ExchangeX1"); + // Load the buffers with data + Buffer BufferLeft = BufferSend[faceLeft]; + Buffer BufferRight = BufferSend[faceRight]; + IdefixArray1D map = this->mapVars; + + bool recvRight = (procRecv[faceRight] != MPI_PROC_NULL); + bool recvLeft = (procRecv[faceLeft] != MPI_PROC_NULL); + + // If MPI Persistent, start receiving even before the buffers are filled + myTimer -= MPI_Wtime(); + double tStart = MPI_Wtime(); +#ifdef MPI_PERSISTENT + MPI_Status sendStatus[2]; + MPI_Status recvStatus[2]; + + MPI_Startall(2, recvRequest); + idfx::mpiCallsTimer += MPI_Wtime() - tStart; +#endif + myTimer += MPI_Wtime(); + + BufferLeft.ResetPointer(); + BufferRight.ResetPointer(); + + BufferLeft.Pack(Vc, map, boxSend[faceLeft]); + BufferRight.Pack(Vc, map, boxSend[faceRight]); + // Load face-centered field in the buffer + if(haveVs) { + for(int component = 0 ; component < DIMENSIONS ; component++) { + BufferLeft.Pack(Vs, component, boxSendVs[component][faceLeft]); + BufferRight.Pack(Vs, component, boxSendVs[component][faceRight]); + } + } + + // Wait for completion before sending out everything + Kokkos::fence(); + myTimer -= MPI_Wtime(); + tStart = MPI_Wtime(); +#ifdef MPI_PERSISTENT + MPI_Startall(2, sendRequest); + // Wait for buffers to be received + MPI_Waitall(2,recvRequest,recvStatus); + +#else + + #ifdef MPI_NON_BLOCKING + MPI_Status sendStatus[2]; + MPI_Status recvStatus[2]; + MPI_Request sendRequest[2]; + MPI_Request recvRequest[2]; + + // We receive from procRecv, and we send to procSend + + MPI_Isend(BufferSend[faceRight].data(), bufferSizeSend[faceRight], realMPI, + procSend[faceRight], 100, mygrid->CartComm, &sendRequest[0]); + + MPI_Irecv(BufferRecv[faceLeft].data(), bufferSizeRecv[faceLeft], realMPI, + procRecv[faceLeft], 100, mygrid->CartComm, &recvRequest[0]); + // Send to the left + // We receive from procRecv, and we send to procSend + + MPI_Isend(BufferSend[faceLeft].data(), bufferSizeSend[faceLeft], realMPI, + procSend[faceLeft], 101, mygrid->CartComm, &sendRequest[1]); + + MPI_Irecv(BufferRecv[faceRight].data(), bufferSizeRecv[faceRight], realMPI, + procRecv[faceRight], 101, mygrid->CartComm, &recvRequest[1]); + + // Wait for recv to complete (we don't care about the sends) + MPI_Waitall(2, recvRequest, recvStatus); + + #else + MPI_Status status; + // Send to the right + // We receive from procRecv, and we send to procSend + + MPI_Sendrecv(BufferSend[faceRight].data(), bufferSizeSend[faceRight], realMPI, + procSend[faceRight], 100, + BufferRecv[faceLeft].data(), bufferSizeRecv[faceLeft], realMPI, + procRecv[faceLeft], 100, + grid->CartComm, &status); + + // Send to the left + // We receive from procRecv, and we send to procSend + + MPI_Sendrecv(BufferSend[faceLeft].data(), bufferSizeSend[faceLeft], realMPI, + procSend[faceLeft], 101, + BufferRecv[faceRight].data(), bufferSizeRecv[faceRight], realMPI, + procRecv[faceRight], 101, + grid->CartComm, &status); + #endif +#endif +myTimer += MPI_Wtime(); +idfx::mpiCallsTimer += MPI_Wtime() - tStart; +// Unpack +BufferLeft=BufferRecv[faceLeft]; +BufferRight=BufferRecv[faceRight]; + +BufferLeft.ResetPointer(); +BufferRight.ResetPointer(); + +if(recvLeft) { + BufferLeft.Unpack(Vc, map, boxRecv[faceLeft]); + if(haveVs) { + for(int component = 0 ; component < DIMENSIONS ; component++) { + BufferLeft.Unpack(Vs, component, boxRecvVs[component][faceLeft]); + } + } +} +if(recvRight) { + BufferRight.Unpack(Vc, map, boxRecv[faceRight]); + if(haveVs) { + for(int component = 0 ; component < DIMENSIONS ; component++) { + BufferRight.Unpack(Vs, component, boxRecvVs[component][faceRight]); + } + } +} +myTimer -= MPI_Wtime(); +#ifdef MPI_NON_BLOCKING + // Wait for the sends if they have not yet completed + MPI_Waitall(2, sendRequest, sendStatus); +#endif + +#ifdef MPI_PERSISTENT + MPI_Waitall(2, sendRequest, sendStatus); +#endif + myTimer += MPI_Wtime(); + bytesSentOrReceived += (bufferSizeRecv[faceLeft] + +bufferSizeSend[faceLeft] + +bufferSizeRecv[faceRight] + +bufferSizeSend[faceRight])*sizeof(real); + + idfx::popRegion(); +} diff --git a/src/mpi/exchanger.hpp b/src/mpi/exchanger.hpp new file mode 100644 index 000000000..2e1affcb0 --- /dev/null +++ b/src/mpi/exchanger.hpp @@ -0,0 +1,82 @@ +// *********************************************************************************** +// Idefix MHD astrophysical code +// Copyright(C) Geoffroy R. J. Lesur +// and other code contributors +// Licensed under CeCILL 2.1 License, see COPYING for more information +// *********************************************************************************** + +#ifndef MPI_EXCHANGER_HPP_ +#define MPI_EXCHANGER_HPP_ + +#include + +#include +#include +#include +#include "idefix.hpp" +#include "grid.hpp" +#include "buffer.hpp" +#include "arrays.hpp" + +class Grid; + +class Exchanger { + public: + Exchanger() = default; + void Init( Grid* grid, + int direction, + std::vector inputMap, + std::array nghost, + std::array nint, + bool inputHaveVs = false, + std::array overwriteBXn = {true, true}); + + void Exchange(IdefixArray4D Vc, IdefixArray4D Vs); + ~Exchanger(); + + static int nInstances; // total number of mpi instances in the code + int thisInstance; // unique number of the current instance + bool isInitialized{false}; + + // MPI throughput timer specific to this object + double myTimer{0}; + int64_t bytesSentOrReceived{0}; + + // Buffer sizes for throughput calculations + std::array bufferSizeSend; + std::array bufferSizeRecv; + + private: + enum {faceRight, faceLeft}; + + std::array boxSend, boxRecv; // bounding boxes for each face + std::array,3> boxSendVs, boxRecvVs; // 3= 3 field components + + // Buffers for MPI calls + Buffer BufferSend[2]; + Buffer BufferRecv[2]; + + int procSend[2]; // MPI process to send to in X1 direction + int procRecv[2]; // MPI process to receive from in X1 direction + + int direction; + IdefixArray1D mapVars; + int mapNVars{0}; + + int nint[3]; //< number of internal elements of the arrays we treat + int nghost[3]; //< number of ghost zone of the arrays we treat + int ntot[3]; //< total number of cells of the arrays we treat + int beg[3]; //< begining index of the active zone + int end[3]; //< end index of the active zone + + bool haveVs{false}; + + // Requests for MPI persistent communications + MPI_Request sendRequest[2]; + MPI_Request recvRequest[2]; + + Grid *grid; +}; + + +#endif // MPI_EXCHANGER_HPP_ diff --git a/src/mpi/mpi.cpp b/src/mpi/mpi.cpp new file mode 100644 index 000000000..36b4eb9a3 --- /dev/null +++ b/src/mpi/mpi.cpp @@ -0,0 +1,265 @@ +// *********************************************************************************** +// Idefix MHD astrophysical code +// Copyright(C) Geoffroy R. J. Lesur +// and other code contributors +// Licensed under CeCILL 2.1 License, see COPYING for more information +// *********************************************************************************** + + +#include "mpi.hpp" +#include +#include +#include // NOLINT [build/c++11] +#include // NOLINT [build/c++11] +#include +#include +#include "idefix.hpp" +#include "dataBlock.hpp" + + +#if defined(OPEN_MPI) && OPEN_MPI +#include "mpi-ext.h" // Needed for CUDA-aware check */ +#endif + + + + +// init the number of instances +int Mpi::nInstances = 0; + +// MPI Routines exchange +void Mpi::ExchangeAll() { + IDEFIX_ERROR("Not Implemented"); +} + +/// +/// Initialise an instance of the MPI class. +/// @param grid: pointer to the grid object (needed to get the MPI neighbours) +/// @param inputMap: 1st indices of inputVc which are to be exchanged (i.e, the list of variables) +/// @param nghost: size of the ghost region in each direction +/// @param nint: size of the internal region in each direction +/// @param inputHaveVs: whether the instance should also treat face-centered variable +/// (optional, default false) +/// + +void Mpi::Init(Grid *grid, std::vector inputMap, + std::array nghost, std::array nint, + std::array lbound, + std::array rbound, + bool inputHaveVs) { + idfx::pushRegion("Mpi::Init"); + + // increase the number of instances + nInstances++; + thisInstance=nInstances; + + for(int dir=0; dir<3; dir++) { + std::array overWriteBXn = {true, true}; + if(lbound[dir] == BoundaryType::shearingbox) { + overWriteBXn[faceLeft] = false; + } + if(rbound[dir] == BoundaryType::shearingbox) { + overWriteBXn[faceRight] = false; + } + + exchanger[dir].Init(grid, dir, inputMap, + nghost, nint, + inputHaveVs, overWriteBXn); + } + + isInitialized = true; + idfx::popRegion(); +} + +// Destructor (clean up persistent communication channels) +Mpi::~Mpi() { + idfx::pushRegion("Mpi::~Mpi"); + if(isInitialized) { + if(thisInstance==1) { + int bytesSentOrReceived = 0; + double myTimer = 0; + for(int dir=0; dir<3; dir++) { + bytesSentOrReceived += exchanger[dir].bytesSentOrReceived; + myTimer += exchanger[dir].myTimer; + } + idfx::cout << "Mpi(" << thisInstance << "): measured throughput is " + << bytesSentOrReceived/myTimer/1024.0/1024.0 << " MB/s" << std::endl; + idfx::cout << "Mpi(" << thisInstance << "): message sizes were " << std::endl; + idfx::cout << " X1: " + << exchanger[IDIR].bufferSizeSend[0]*sizeof(real)/1024.0/1024.0 + << " MB" << std::endl; + idfx::cout << " X2: " + << exchanger[JDIR].bufferSizeSend[0]*sizeof(real)/1024.0/1024.0 + << " MB" << std::endl; + idfx::cout << " X3: " + << exchanger[KDIR].bufferSizeSend[0]*sizeof(real)/1024.0/1024.0 + << " MB" << std::endl; + } + isInitialized = false; + } + idfx::popRegion(); +} + +void Mpi::ExchangeX1(IdefixArray4D Vc, IdefixArray4D Vs) { + idfx::pushRegion("Mpi::ExchangeX1"); + + exchanger[IDIR].Exchange(Vc, Vs); + idfx::popRegion(); +} + +void Mpi::ExchangeX2(IdefixArray4D Vc, IdefixArray4D Vs) { + idfx::pushRegion("Mpi::ExchangeX2"); + exchanger[JDIR].Exchange(Vc, Vs); + idfx::popRegion(); +} + +void Mpi::ExchangeX3(IdefixArray4D Vc, IdefixArray4D Vs) { + idfx::pushRegion("Mpi::ExchangeX3"); + exchanger[KDIR].Exchange(Vc, Vs); + idfx::popRegion(); +} + + +void Mpi::CheckConfig() { + idfx::pushRegion("Mpi::CheckConfig"); + // compile time check + #ifdef KOKKOS_ENABLE_CUDA + #if defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT + #error Your MPI library is not CUDA Aware (check Idefix requirements). + #endif + #endif /* MPIX_CUDA_AWARE_SUPPORT */ + + // Run-time check that we can do a reduce on device arrays + IdefixArray1D src("MPIChecksrc",1); + IdefixArray1D::HostMirror srcHost = Kokkos::create_mirror_view(src); + + if(idfx::prank == 0) { + srcHost(0) = 0; + Kokkos::deep_copy(src, srcHost); + } + + if(idfx::psize > 1) { + MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); + + // Capture segfaults + struct sigaction newHandler; + struct sigaction oldHandler; + memset(&newHandler, 0, sizeof(newHandler)); + newHandler.sa_flags = SA_SIGINFO; + newHandler.sa_sigaction = Mpi::SigErrorHandler; + sigaction(SIGSEGV, &newHandler, &oldHandler); + try { + // We next circulate the info round-robin accross all the nodes to check that + // MPI can exchange buffers in idefix arrays + + MPI_Status status; + int ierrSend, ierrRecv; + if(idfx::prank == 0) { + ierrSend = MPI_Send(src.data(), 1, MPI_INT64_T, idfx::prank+1, 1, MPI_COMM_WORLD); + ierrRecv = MPI_Recv(src.data(), 1, MPI_INT64_T, idfx::psize-1, 1, MPI_COMM_WORLD, &status); + } else { + ierrRecv = MPI_Recv(src.data(), 1, MPI_INT64_T, idfx::prank-1, 1, MPI_COMM_WORLD, &status); + // Add our own rank to the data + Kokkos::deep_copy(srcHost, src); + srcHost(0) += idfx::prank; + Kokkos::deep_copy(src, srcHost); + ierrSend = MPI_Send(src.data(), 1, MPI_INT64_T, (idfx::prank+1)%idfx::psize, 1, + MPI_COMM_WORLD); + } + + if(ierrSend != 0) { + char MPImsg[MPI_MAX_ERROR_STRING]; + int MPImsgLen; + MPI_Error_string(ierrSend, MPImsg, &MPImsgLen); + throw std::runtime_error(std::string(MPImsg, MPImsgLen)); + } + if(ierrRecv != 0) { + char MPImsg[MPI_MAX_ERROR_STRING]; + int MPImsgLen; + MPI_Error_string(ierrSend, MPImsg, &MPImsgLen); + throw std::runtime_error(std::string(MPImsg, MPImsgLen)); + } + } catch(std::exception &e) { + std::stringstream errmsg; + errmsg << "Your MPI library is unable to perform Send/Recv on Idefix arrays."; + errmsg << std::endl; + #ifdef KOKKOS_ENABLE_CUDA + errmsg << "Check that your MPI library is CUDA aware." << std::endl; + #elif defined(KOKKOS_ENABLE_HIP) + errmsg << "Check that your MPI library is RocM aware." << std::endl; + #else + errmsg << "Check your MPI library configuration." << std::endl; + #endif + errmsg << "Error: " << e.what() << std::endl; + IDEFIX_ERROR(errmsg); + } + // Restore old handlers + sigaction(SIGSEGV, &oldHandler, NULL ); + MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL); + } + + // Check that we have the proper end result + Kokkos::deep_copy(srcHost, src); + int64_t size = static_cast(idfx::psize); + int64_t rank = static_cast(idfx::prank); + int64_t result = rank == 0 ? size*(size-1)/2 : rank*(rank+1)/2; + + if(srcHost(0) != result) { + idfx::cout << "got " << srcHost(0) << " expected " << result << std::endl; + std::stringstream errmsg; + errmsg << "Your MPI library managed to perform MPI exchanges on Idefix Arrays, but the result "; + errmsg << "is incorrect. " << std::endl; + errmsg << "Check your MPI library configuration." << std::endl; + IDEFIX_ERROR(errmsg); + } + idfx::popRegion(); +} + +void Mpi::SigErrorHandler(int nSignum, siginfo_t* si, void* vcontext) { + std::stringstream errmsg; + errmsg << "A segmentation fault was triggered while attempting to test your MPI library."; + errmsg << std::endl; + errmsg << "Your MPI library is unable to perform reductions on Idefix arrays."; + errmsg << std::endl; + #ifdef KOKKOS_ENABLE_CUDA + errmsg << "Check that your MPI library is CUDA aware." << std::endl; + #elif defined(KOKKOS_ENABLE_HIP) + errmsg << "Check that your MPI library is RocM aware." << std::endl; + #else + errmsg << "Check your MPI library configuration." << std::endl; + #endif + IDEFIX_ERROR(errmsg); +} + +// This routine check that all of the processes are synced. +// Returns true if this is the case, false otherwise + +bool Mpi::CheckSync(real timeout) { + // If no parallelism, then we're in sync! + if(idfx::psize == 1) return(true); + + int send = idfx::prank; + int recv = 0; + MPI_Request request; + + MPI_Iallreduce(&send, &recv, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &request); + + double start = MPI_Wtime(); + int flag = 0; + MPI_Status status; + + while((MPI_Wtime()-start < timeout) && !flag) { + MPI_Test(&request, &flag, &status); + // sleep for 10 ms + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + if(!flag) { + // We did not managed to do an allreduce, so this is a failure. + return(false); + } + if(recv != idfx::psize*(idfx::psize-1)/2) { + IDEFIX_ERROR("wrong result for synchronisation"); + } + + return(true); +} diff --git a/src/mpi/mpi.hpp b/src/mpi/mpi.hpp new file mode 100644 index 000000000..b8f929326 --- /dev/null +++ b/src/mpi/mpi.hpp @@ -0,0 +1,74 @@ +// *********************************************************************************** +// Idefix MHD astrophysical code +// Copyright(C) Geoffroy R. J. Lesur +// and other code contributors +// Licensed under CeCILL 2.1 License, see COPYING for more information +// *********************************************************************************** + +#ifndef MPI_MPI_HPP_ +#define MPI_MPI_HPP_ + +#include + +#include +#include +#include +#include "idefix.hpp" +#include "grid.hpp" +#include "buffer.hpp" +#include "exchanger.hpp" + + +class DataBlock; + + +class Mpi { + public: + Mpi() = default; + // MPI Exchange functions + void ExchangeAll(); ///< Exchange boundary elements in all directions (todo) + void ExchangeX1(IdefixArray4D inputVc, + IdefixArray4D inputVs = IdefixArray4D()); + ///< Exchange boundary elements in the X1 direction + void ExchangeX2(IdefixArray4D inputVc, + IdefixArray4D inputVs = IdefixArray4D()); + ///< Exchange boundary elements in the X2 direction + void ExchangeX3(IdefixArray4D inputVc, + IdefixArray4D inputVs = IdefixArray4D()); + ///< Exchange boundary elements in the X3 direction + + // Init from datablock + void Init(Grid *grid, std::vector inputMap, + std::array nghost, std::array nint, + std::array lbound, + std::array rbound, + bool inputHaveVs = false ); + + // Check that MPI will work with the designated target (in particular GPU Direct) + static void CheckConfig(); + + // Check that MPI processes are synced + static bool CheckSync(real); + + + // Destructor + ~Mpi(); + + private: + enum {faceRight, faceLeft}; + // Because the MPI class initialise internal pointers, we do not allow copies of this class + // These lines should not be removed as they constitute a safeguard + Mpi(const Mpi&); + Mpi operator=(const Mpi&); + + static int nInstances; // total number of mpi instances in the code + int thisInstance; // unique number of the current instance + int nReferences; // # of references to this instance + bool isInitialized{false}; + + std::array exchanger; ///< exchangers in each direction + // Error handler used by CheckConfig + static void SigErrorHandler(int, siginfo_t* , void* ); +}; + +#endif // MPI_MPI_HPP_ diff --git a/src/rkl/rkl.hpp b/src/rkl/rkl.hpp index c8dd8cb96..6f22de6f5 100644 --- a/src/rkl/rkl.hpp +++ b/src/rkl/rkl.hpp @@ -198,7 +198,8 @@ RKLegendre::RKLegendre(Input &input, Fluid* hydroin) { nvarRKL = varListHost.size(); #ifdef WITH_MPI - mpi.Init(data->mygrid, varListHost, data->nghost.data(), data->np_int.data(), haveVs); + mpi.Init(data->mygrid, varListHost, data->nghost, data->np_int, + data->lbound, data->rbound, haveVs); #endif diff --git a/src/utils/column.cpp b/src/utils/column.cpp index 34d55f56b..4657abc55 100644 --- a/src/utils/column.cpp +++ b/src/utils/column.cpp @@ -56,7 +56,7 @@ Column::Column(int dir, int sign, DataBlock *data) std::vector mapVars; mapVars.push_back(ntarget); - this->mpi.Init(data->mygrid, mapVars, data->nghost.data(), data->np_int.data()); + this->mpi.Init(data->mygrid, mapVars, data->nghost, data->np_int, data->lbound, data->rbound); this->nproc = data->mygrid->nproc; #endif idfx::popRegion(); diff --git a/test/MHD/AmbipolarCshock3D/idefix-rkl.ini b/test/MHD/AmbipolarCshock3D/idefix-rkl.ini index 055429557..621552b13 100644 --- a/test/MHD/AmbipolarCshock3D/idefix-rkl.ini +++ b/test/MHD/AmbipolarCshock3D/idefix-rkl.ini @@ -22,8 +22,8 @@ X1-beg userdef X1-end userdef X2-beg periodic X2-end periodic -X3-beg periodic -X3-end periodic +X3-beg userdef +X3-end userdef [Output] vtk 100.0 diff --git a/test/MHD/AmbipolarCshock3D/idefix.ini b/test/MHD/AmbipolarCshock3D/idefix.ini index 984e985ef..78c7ddd23 100644 --- a/test/MHD/AmbipolarCshock3D/idefix.ini +++ b/test/MHD/AmbipolarCshock3D/idefix.ini @@ -19,8 +19,8 @@ X1-beg userdef X1-end userdef X2-beg periodic X2-end periodic -X3-beg periodic -X3-end periodic +X3-beg userdef +X3-end userdef [Output] vtk 100.0 diff --git a/test/MHD/AmbipolarCshock3D/setup.cpp b/test/MHD/AmbipolarCshock3D/setup.cpp index 5f8ec9313..abf8a0f24 100644 --- a/test/MHD/AmbipolarCshock3D/setup.cpp +++ b/test/MHD/AmbipolarCshock3D/setup.cpp @@ -79,7 +79,48 @@ void UserdefBoundary(Hydro *hydro, int dir, BoundarySide side, real t) { }); } - + if(dir ==KDIR) { + auto Vc = hydro->Vc; + auto Vs = hydro->Vs; + int jbeg = data->beg[JDIR]; + int jend = data->end[JDIR]; + int kbeg = data->beg[KDIR]; + int kend = data->end[KDIR]; + hydro->boundary->BoundaryForAll("UserdefBoundary", dir, side, + KOKKOS_LAMBDA (int n, int k, int j, int i) { + int kref=k; + + if(side == left) { + kref = kbeg; + } else { + kref = kend -1; + } + Vc(n,k,j,i) = Vc(n,kref,j,i); + }); + + if(dir == KDIR) { + hydro->boundary->BoundaryForX1s("UserdefBoundaryX1s", dir, side, + KOKKOS_LAMBDA (int k, int j, int i) { + int kref=k; + if(side == left) { + kref = kbeg; + } else { + kref = kend -1; + } + Vs(BX1s,k,j,i) = Vs(BX1s,kref,j,i); + }); + hydro->boundary->BoundaryForX2s("UserdefBoundaryX2s", dir, side, + KOKKOS_LAMBDA (int k, int j, int i) { + int kref=k; + if(side == left) { + kref = kbeg; + } else { + kref = kend -1; + } + Vs(BX2s,k,j,i) = Vs(BX2s,kref,j,i); + }); + } + } } @@ -136,8 +177,8 @@ void Setup::InitFlow(DataBlock &data) { real z = d.x[KDIR](k); real y = d.x[JDIR](j); d.Ve(AX1e,k,j,i) = B0*sin(theta)*z; - d.Ve(AX2e,k,j,i) = ZERO_F; - d.Ve(AX3e,k,j,i) = B0*cos(theta)*y; + d.Ve(AX2e,k,j,i) = -B0*cos(theta)*z; + d.Ve(AX3e,k,j,i) = 0; #else IDEFIX_ERROR("Vector potential only valid in 3 dimensions for this setup"); #endif diff --git a/test/MHD/AmbipolarCshock3D/testme.py b/test/MHD/AmbipolarCshock3D/testme.py index f9fae6765..964224854 100755 --- a/test/MHD/AmbipolarCshock3D/testme.py +++ b/test/MHD/AmbipolarCshock3D/testme.py @@ -9,7 +9,7 @@ sys.path.append(os.getenv("IDEFIX_DIR")) import pytools.idfx_test as tst -tolerance=2e-14 +tolerance=3e-14 def testMe(test): test.configure() test.compile() diff --git a/test/MHD/OrszagTang3D/setup.cpp b/test/MHD/OrszagTang3D/setup.cpp index 8e7205c42..86766c8ee 100644 --- a/test/MHD/OrszagTang3D/setup.cpp +++ b/test/MHD/OrszagTang3D/setup.cpp @@ -19,6 +19,7 @@ void Analysis(DataBlock& data) { idfx::cout << "Analysis: Checking restart routines" << std::endl; // Trigger dump creation + // data.SetBoundaries(); myOutput->ForceWriteDump(data); // Mirror data on Host @@ -96,7 +97,8 @@ void Analysis(DataBlock& data) { errornum++; idfx::cout << "-----------------------------------------" << std::endl << " Error in Vc at (i,j,k,n) = ( " << i << ", " << j << ", " << k << ", " << n << ")" << std::endl - << " Coordinates (x1,x2,x3) = ( " << d.x[IDIR](i) << ", " << d.x[JDIR](j) << ", " << d.x[KDIR](k) << ")" << std::endl; + << " Coordinates (x1,x2,x3) = ( " << d.x[IDIR](i) << ", " << d.x[JDIR](j) << ", " << d.x[KDIR](k) << ")" << std::endl + << "Original= " << myVc(n,k,j,i) << " New=" << d.Vc(n,k,j,i) << " diff=" << myVc(n,k,j,i)-d.Vc(n,k,j,i) << std::endl; } } diff --git a/test/MHD/ShearingBox/testme.py b/test/MHD/ShearingBox/testme.py index c89d24b52..364adde9b 100755 --- a/test/MHD/ShearingBox/testme.py +++ b/test/MHD/ShearingBox/testme.py @@ -41,3 +41,6 @@ def testMe(test): test.reconstruction=2 test.mpi=False testMe(test) + + test.mpi=True + testMe(test)