From a6018e3ca0cd96c004cb17e3659cc39b664ff0a1 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 24 Oct 2022 10:29:23 +0200 Subject: [PATCH 001/117] alpine files moved to sub-directory --- alpine/{ => ElectrostaticPIC}/BumponTailInstability.cpp | 0 alpine/{ => ElectrostaticPIC}/CMakeLists.txt | 0 alpine/{ => ElectrostaticPIC}/ChargedParticles.hpp | 0 alpine/{ => ElectrostaticPIC}/LandauDamping.cpp | 0 alpine/{ => ElectrostaticPIC}/PenningTrap.cpp | 0 alpine/{ => ElectrostaticPIC}/UniformPlasmaTest.cpp | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename alpine/{ => ElectrostaticPIC}/BumponTailInstability.cpp (100%) rename alpine/{ => ElectrostaticPIC}/CMakeLists.txt (100%) rename alpine/{ => ElectrostaticPIC}/ChargedParticles.hpp (100%) rename alpine/{ => ElectrostaticPIC}/LandauDamping.cpp (100%) rename alpine/{ => ElectrostaticPIC}/PenningTrap.cpp (100%) rename alpine/{ => ElectrostaticPIC}/UniformPlasmaTest.cpp (100%) diff --git a/alpine/BumponTailInstability.cpp b/alpine/ElectrostaticPIC/BumponTailInstability.cpp similarity index 100% rename from alpine/BumponTailInstability.cpp rename to alpine/ElectrostaticPIC/BumponTailInstability.cpp diff --git a/alpine/CMakeLists.txt b/alpine/ElectrostaticPIC/CMakeLists.txt similarity index 100% rename from alpine/CMakeLists.txt rename to alpine/ElectrostaticPIC/CMakeLists.txt diff --git a/alpine/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp similarity index 100% rename from alpine/ChargedParticles.hpp rename to alpine/ElectrostaticPIC/ChargedParticles.hpp diff --git a/alpine/LandauDamping.cpp b/alpine/ElectrostaticPIC/LandauDamping.cpp similarity index 100% rename from alpine/LandauDamping.cpp rename to alpine/ElectrostaticPIC/LandauDamping.cpp diff --git a/alpine/PenningTrap.cpp b/alpine/ElectrostaticPIC/PenningTrap.cpp similarity index 100% rename from alpine/PenningTrap.cpp rename to alpine/ElectrostaticPIC/PenningTrap.cpp diff --git a/alpine/UniformPlasmaTest.cpp b/alpine/ElectrostaticPIC/UniformPlasmaTest.cpp similarity index 100% rename from alpine/UniformPlasmaTest.cpp rename to alpine/ElectrostaticPIC/UniformPlasmaTest.cpp From 6e2186b75ca1a49e9a7648a6e0ea860a11e8e44a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 24 Oct 2022 10:35:34 +0200 Subject: [PATCH 002/117] Files copied from PIC directory to PIF for modifying --- alpine/ElectrostaticPIF/CMakeLists.txt | 26 + .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 624 ++++++++++++++++++ alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 410 ++++++++++++ 3 files changed, 1060 insertions(+) create mode 100644 alpine/ElectrostaticPIF/CMakeLists.txt create mode 100644 alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp create mode 100644 alpine/ElectrostaticPIF/LandauDampingPIF.cpp diff --git a/alpine/ElectrostaticPIF/CMakeLists.txt b/alpine/ElectrostaticPIF/CMakeLists.txt new file mode 100644 index 000000000..60fa9678b --- /dev/null +++ b/alpine/ElectrostaticPIF/CMakeLists.txt @@ -0,0 +1,26 @@ +file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") +message (STATUS "Adding index test found in ${_relPath}") + +include_directories ( + ${CMAKE_SOURCE_DIR}/src +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${Kokkos_DIR}/.. +) + +set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) +set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) + +add_executable (LandauDampingPIF LandauDampingPIF.cpp) +target_link_libraries (LandauDampingPIF ${IPPL_LIBS}) + +# vi: set et ts=4 sw=4 sts=4: + +# Local Variables: +# mode: cmake +# cmake-tab-width: 4 +# indent-tabs-mode: nil +# require-final-newline: nil +# End: diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp new file mode 100644 index 000000000..e64417e19 --- /dev/null +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -0,0 +1,624 @@ +// ChargedParticles header file +// Defines a particle attribute for charged particles to be used in +// test programs +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "Ippl.h" +#include "Solver/FFTPeriodicPoissonSolver.h" + +// dimension of our positions +constexpr unsigned Dim = 3; + +// some typedefs +typedef ippl::ParticleSpatialLayout PLayout_t; +typedef ippl::UniformCartesian Mesh_t; +typedef ippl::FieldLayout FieldLayout_t; +typedef ippl::OrthogonalRecursiveBisection ORB; + +using size_type = ippl::detail::size_type; + +template +using Vector = ippl::Vector; + +template +using Field = ippl::Field; + +template +using ParticleAttrib = ippl::ParticleAttrib; + +typedef Vector Vector_t; +typedef Field Field_t; +typedef Field VField_t; +typedef ippl::FFTPeriodicPoissonSolver Solver_t; + +const double pi = std::acos(-1.0); + +// Test programs have to define this variable for VTK dump purposes +extern const char* TestName; + +void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, + double dx, double dy, double dz) { + + + typename VField_t::view_type::host_mirror_type host_view = E.getHostMirror(); + + std::stringstream fname; + fname << "data/ef_"; + fname << std::setw(4) << std::setfill('0') << iteration; + fname << ".vtk"; + + Kokkos::deep_copy(host_view, E.getView()); + + Inform vtkout(NULL, fname.str().c_str(), Inform::OVERWRITE); + vtkout.precision(10); + vtkout.setf(std::ios::scientific, std::ios::floatfield); + + // start with header + vtkout << "# vtk DataFile Version 2.0" << endl; + vtkout << TestName << endl; + vtkout << "ASCII" << endl; + vtkout << "DATASET STRUCTURED_POINTS" << endl; + vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << endl; + vtkout << "ORIGIN " << -dx << " " << -dy << " " << -dz << endl; + vtkout << "SPACING " << dx << " " << dy << " " << dz << endl; + vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << endl; + + vtkout << "VECTORS E-Field float" << endl; + for (int z=0; z +class ChargedParticles : public ippl::ParticleBase { +public: + VField_t E_m; + Field_t rho_m; + + // ORB + ORB orb; + + Vector nr_m; + + ippl::e_dim_tag decomp_m[Dim]; + + Vector_t hr_m; + Vector_t rmin_m; + Vector_t rmax_m; + + double Q_m; + + std::string stype_m; + + std::shared_ptr solver_mp; + + double time_m; + + double rhoNorm_m; + + unsigned int loadbalancefreq_m; + + double loadbalancethreshold_m; + + +public: + ParticleAttrib q; // charge + typename ippl::ParticleBase::particle_position_type P; // particle velocity + typename ippl::ParticleBase::particle_position_type E; // electric field at particle position + + + /* + This constructor is mandatory for all derived classes from + ParticleBase as the bunch buffer uses this + */ + ChargedParticles(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + } + + ChargedParticles(PLayout& pl, + Vector_t hr, + Vector_t rmin, + Vector_t rmax, + ippl::e_dim_tag decomp[Dim], + double Q) + : ippl::ParticleBase(pl) + , hr_m(hr) + , rmin_m(rmin) + , rmax_m(rmax) + , Q_m(Q) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + setupBCs(); + for (unsigned int i = 0; i < Dim; i++) + decomp_m[i]=decomp[i]; + } + + ~ChargedParticles(){ } + + void setupBCs() { + setBCAllPeriodic(); + } + + void updateLayout(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticles& buffer, + bool& isFirstRepartition) { + // Update local fields + static IpplTimings::TimerRef tupdateLayout = IpplTimings::getTimer("updateLayout"); + IpplTimings::startTimer(tupdateLayout); + this->E_m.updateLayout(fl); + this->rho_m.updateLayout(fl); + + // Update layout with new FieldLayout + PLayout& layout = this->getLayout(); + layout.updateLayout(fl, mesh); + IpplTimings::stopTimer(tupdateLayout); + static IpplTimings::TimerRef tupdatePLayout = IpplTimings::getTimer("updatePB"); + IpplTimings::startTimer(tupdatePLayout); + if(!isFirstRepartition) { + layout.update(*this, buffer); + } + IpplTimings::stopTimer(tupdatePLayout); + } + + void initializeORB(FieldLayout_t& fl, Mesh_t& mesh) { + orb.initialize(fl, mesh, rho_m); + } + + void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticles& buffer, + bool& isFirstRepartition) { + // Repartition the domains + bool res = orb.binaryRepartition(this->R, fl, isFirstRepartition); + + if (res != true) { + std::cout << "Could not repartition!" << std::endl; + return; + } + // Update + this->updateLayout(fl, mesh, buffer, isFirstRepartition); + this->solver_mp->setRhs(rho_m); + } + + bool balance(size_type totalP, const unsigned int nstep){ + if(std::strcmp(TestName,"UniformPlasmaTest") == 0) { + return (nstep % loadbalancefreq_m == 0); + } + else { + int local = 0; + std::vector res(Ippl::Comm->size()); + double equalPart = (double) totalP / Ippl::Comm->size(); + double dev = std::abs((double)this->getLocalNum() - equalPart) / totalP; + if (dev > loadbalancethreshold_m) + local = 1; + MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, Ippl::getComm()); + + for (unsigned int i = 0; i < res.size(); i++) { + if (res[i] == 1) + return true; + } + return false; + } + } + + void gatherStatistics(size_type totalP) { + std::vector imb(Ippl::Comm->size()); + double equalPart = (double) totalP / Ippl::Comm->size(); + double dev = (std::abs((double)this->getLocalNum() - equalPart) + / totalP) * 100.0; + MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, + Ippl::getComm()); + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/LoadBalance_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(5); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, rank, imbalance percentage" << endl; + } + + for(int r=0; r < Ippl::Comm->size(); ++r) { + csvout << time_m << " " + << r << " " + << imb[r] << endl; + } + } + + Ippl::Comm->barrier(); + + } + + void gatherCIC() { + + gather(this->E, E_m, this->R); + + } + + void scatterCIC(size_type totalP, unsigned int iteration, Vector_t& hrField) { + + + Inform m("scatter "); + + rho_m = 0.0; + scatter(q, rho_m, this->R); + + static IpplTimings::TimerRef sumTimer = IpplTimings::getTimer("Check"); + IpplTimings::startTimer(sumTimer); + double Q_grid = rho_m.sum(); + + size_type Total_particles = 0; + size_type local_particles = this->getLocalNum(); + + MPI_Reduce(&local_particles, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, 0, Ippl::getComm()); + + double rel_error = std::fabs((Q_m-Q_grid)/Q_m); + m << "Rel. error in charge conservation = " << rel_error << endl; + + if(Ippl::Comm->rank() == 0) { + if(Total_particles != totalP || rel_error > 1e-10) { + m << "Time step: " << iteration << endl; + m << "Total particles in the sim. " << totalP + << " " << "after update: " + << Total_particles << endl; + m << "Rel. error in charge conservation: " + << rel_error << endl; + std::abort(); + } + } + + rho_m = rho_m / (hrField[0] * hrField[1] * hrField[2]); + + rhoNorm_m = norm(rho_m); + IpplTimings::stopTimer(sumTimer); + + //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + + //rho = rho_e - rho_i + rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + } + + void initSolver() { + + Inform m("solver "); + if(stype_m == "FFT") + initFFTSolver(); + else + m << "No solver matches the argument" << endl; + + } + + void initFFTSolver() { + ippl::ParameterList sp; + sp.add("output_type", Solver_t::GRAD); + sp.add("use_heffte_defaults", false); + sp.add("use_pencils", true); + sp.add("use_reorder", false); + sp.add("use_gpu_aware", true); + sp.add("comm", ippl::p2p_pl); + sp.add("r2c_direction", 0); + + solver_mp = std::make_shared(); + + solver_mp->mergeParameters(sp); + + solver_mp->setRhs(rho_m); + + solver_mp->setLhs(E_m); + } + + + + void dumpData() { + + auto Pview = P.getView(); + + double Energy = 0.0; + + Kokkos::parallel_reduce("Particle Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + valL += myVal; + }, Kokkos::Sum(Energy)); + + Energy *= 0.5; + double gEnergy = 0.0; + + MPI_Reduce(&Energy, &gEnergy, 1, + MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + + const int nghostE = E_m.getNghost(); + auto Eview = E_m.getView(); + Vector_t normE; + using mdrange_type = Kokkos::MDRangePolicy>; + + for (unsigned d=0; d(temp)); + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + normE[d] = std::sqrt(globaltemp); + } + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/ParticleField_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Kinetic energy, Rho_norm2, Ex_norm2, Ey_norm2, Ez_norm2" << endl; + } + + csvout << time_m << " " + << gEnergy << " " + << rhoNorm_m << " " + << normE[0] << " " + << normE[1] << " " + << normE[2] << endl; + } + + Ippl::Comm->barrier(); + } + + void dumpLandau() { + + const int nghostE = E_m.getNghost(); + auto Eview = E_m.getView(); + double fieldEnergy, ExAmp; + using mdrange_type = Kokkos::MDRangePolicy>; + + double temp = 0.0; + Kokkos::parallel_reduce("Ex inner product", + mdrange_type({nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, + Eview.extent(1) - nghostE, + Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, + const size_t k, double& valL) + { + double myVal = std::pow(Eview(i, j, k)[0], 2); + valL += myVal; + }, Kokkos::Sum(temp)); + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; + + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", + mdrange_type({nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, + Eview.extent(1) - nghostE, + Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, + const size_t k, double& valL) + { + double myVal = std::fabs(Eview(i, j, k)[0]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + ExAmp = 0.0; + MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + void dumpBumponTail() { + + const int nghostE = E_m.getNghost(); + auto Eview = E_m.getView(); + double fieldEnergy, EzAmp; + using mdrange_type = Kokkos::MDRangePolicy>; + + double temp = 0.0; + Kokkos::parallel_reduce("Ex inner product", + mdrange_type({nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, + Eview.extent(1) - nghostE, + Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, + const size_t k, double& valL) + { + double myVal = std::pow(Eview(i, j, k)[2], 2); + valL += myVal; + }, Kokkos::Sum(temp)); + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; + + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", + mdrange_type({nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, + Eview.extent(1) - nghostE, + Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, + const size_t k, double& valL) + { + double myVal = std::fabs(Eview(i, j, k)[2]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + EzAmp = 0.0; + MPI_Reduce(&tempMax, &EzAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldBumponTail_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ez_field_energy, Ez_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << EzAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + void dumpParticleData() { + + typename ParticleAttrib::HostMirror R_host = this->R.getHostMirror(); + typename ParticleAttrib::HostMirror P_host = this->P.getHostMirror(); + Kokkos::deep_copy(R_host, this->R.getView()); + Kokkos::deep_copy(P_host, P.getView()); + std::stringstream pname; + pname << "data/ParticleIC_"; + pname << Ippl::Comm->rank(); + pname << ".csv"; + Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout.precision(10); + pcsvout.setf(std::ios::scientific, std::ios::floatfield); + pcsvout << "R_x, R_y, R_z, V_x, V_y, V_z" << endl; + for (size_type i = 0; i< this->getLocalNum(); i++) { + pcsvout << R_host(i)[0] << " " + << R_host(i)[1] << " " + << R_host(i)[2] << " " + << P_host(i)[0] << " " + << P_host(i)[1] << " " + << P_host(i)[2] << endl; + } + Ippl::Comm->barrier(); + } + + void dumpLocalDomains(const FieldLayout_t& fl, const unsigned int step) { + + if (Ippl::Comm->rank() == 0) { + const typename FieldLayout_t::host_mirror_type domains = fl.getHostLocalDomains(); + std::ofstream myfile; + myfile.open("data/domains" + std::to_string(step) + ".txt"); + for (unsigned int i = 0; i < domains.size(); ++i) { + myfile << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " + << domains[i][0].first() << " " << domains[i][1].last() << " " << domains[i][2].first() << " " + << domains[i][0].last() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " + << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].last() + << "\n"; + } + myfile.close(); + } + Ippl::Comm->barrier(); + } + +private: + void setBCAllPeriodic() { + + this->setParticleBC(ippl::BC::PERIODIC); + } + +}; diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp new file mode 100644 index 000000000..e78dd91bf --- /dev/null +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -0,0 +1,410 @@ +// Landau Damping Test +// Usage: +// srun ./LandauDamping --info 10 +// nx = No. cell-centered points in the x-direction +// ny = No. cell-centered points in the y-direction +// nz = No. cell-centered points in the z-direction +// Np = Total no. of macro-particles in the simulation +// Nt = Number of time steps +// stype = Field solver type e.g., FFT +// lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance +// percentage which can be tolerated and beyond which +// particle load balancing occurs. A value of 0.01 is good for many typical +// simulations. +// ovfactor = Over-allocation factor for the buffers used in the communication. Typical +// values are 1.0, 2.0. Value 1.0 means no over-allocation. +// Example: +// srun ./LandauDamping 128 128 128 10000 10 FFT 0.01 2.0 --info 10 +// +// Copyright (c) 2021, Sriramkrishnan Muralikrishnan, +// Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticles.hpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T k, alpha, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& k_, const T& alpha_, + const T& u_) + : k(k_), alpha(alpha_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = x + (alpha * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = 1 + (alpha * std::cos(k * x)); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type alpha; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + value_type& alpha_, T& k_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + alpha(alpha_), k(k_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = u / (1 + alpha); + Newton1D solver(k[d], alpha, u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& alpha, const double& k) { + double cdf = x + (alpha / k) * std::sin(k * x); + return cdf; +} + +KOKKOS_FUNCTION +double PDF(const Vector_t& xvec, const double& alpha, + const Vector_t& kw, const unsigned Dim) { + double pdf = 1.0; + + for (unsigned d = 0; d < Dim; ++d) { + pdf *= (1.0 + alpha * std::cos(kw[d] * xvec[d])); + } + return pdf; +} + +const char* TestName = "LandauDamping"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + Inform msg("LandauDamping"); + Inform msg2all("LandauDamping",INFORM_ALL_NODES); + + Ippl::Comm->setDefaultOverallocation(std::atof(argv[8])); + + auto start = std::chrono::high_resolution_clock::now(); + ippl::Vector nr = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("kick"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("drift"); + static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); + static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); + static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("domainDecomp"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[4]); + const unsigned int nt = std::atoi(argv[5]); + + msg << "Landau damping" + << endl + << "nt " << nt << " Np= " + << totalP << " grid = " << nr + << endl; + + using bunch_type = ChargedParticles; + + std::unique_ptr P; + + ippl::NDIndex domain; + for (unsigned i = 0; i< Dim; i++) { + domain[i] = ippl::Index(nr[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::PARALLEL; + } + + // create mesh and layout objects for this problem domain + Vector_t kw = {0.5, 0.5, 0.5}; + double alpha = 0.05; + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw ; + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; + + Vector_t hr = {dx, dy, dz}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + const double dt = 0.5*dx; + + const bool isAllPeriodic=true; + Mesh_t mesh(domain, hr, origin); + FieldLayout_t FL(domain, decomp, isAllPeriodic); + PLayout_t PL(FL, mesh); + + //Q = -\int\int f dx dv + double Q = -rmax[0] * rmax[1] * rmax[2]; + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + + P->nr_m = nr; + + P->E_m.initialize(mesh, FL); + P->rho_m.initialize(mesh, FL); + + bunch_type bunchBuffer(PL); + + P->stype_m = argv[6]; + P->initSolver(); + P->time_m = 0.0; + P->loadbalancethreshold_m = std::atof(argv[7]); + + bool isFirstRepartition; + + if ((P->loadbalancethreshold_m != 1.0) && (Ippl::Comm->size() > 1)) { + msg << "Starting first repartition" << endl; + IpplTimings::startTimer(domainDecomposition); + isFirstRepartition = true; + const ippl::NDIndex& lDom = FL.getLocalNDIndex(); + const int nghost = P->rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + auto rhoview = P->rho_m.getView(); + + Kokkos::parallel_for("Assign initial rho based on PDF", + mdrange_type({nghost, nghost, nghost}, + {rhoview.extent(0) - nghost, + rhoview.extent(1) - nghost, + rhoview.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + //local to global index conversion + const size_t ig = i + lDom[0].first() - nghost; + const size_t jg = j + lDom[1].first() - nghost; + const size_t kg = k + lDom[2].first() - nghost; + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + Vector_t xvec = {x, y, z}; + + rhoview(i, j, k) = PDF(xvec, alpha, kw, Dim); + + }); + + Kokkos::fence(); + + P->initializeORB(FL, mesh); + P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); + IpplTimings::stopTimer(domainDecomposition); + } + + msg << "First domain decomposition done" << endl; + IpplTimings::startTimer(particleCreation); + + typedef ippl::detail::RegionLayout RegionLayout_t; + const RegionLayout_t& RLayout = PL.getRegionLayout(); + const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); + Vector_t Nr, Dr, minU, maxU; + int myRank = Ippl::Comm->rank(); + for (unsigned d = 0; d rank() < rest ) + ++nloc; + + P->create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m/totalP; + msg << "particles created and initial conditions assigned " << endl; + isFirstRepartition = false; + //The update after the particle creation is not needed as the + //particles are generated locally + + IpplTimings::startTimer(DummySolveTimer); + P->rho_m = 0.0; + P->solver_mp->solve(); + IpplTimings::stopTimer(DummySolveTimer); + + P->scatterCIC(totalP, 0, hr); + + IpplTimings::startTimer(SolveTimer); + P->solver_mp->solve(); + IpplTimings::stopTimer(SolveTimer); + + P->gatherCIC(); + + IpplTimings::startTimer(dumpDataTimer); + P->dumpLandau(); + P->gatherStatistics(totalP); + //P->dumpLocalDomains(FL, 0); + IpplTimings::stopTimer(dumpDataTimer); + + // begin main timestep loop + msg << "Starting iterations ..." << endl; + for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); + + //drift + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + IpplTimings::stopTimer(RTimer); + + //Since the particles have moved spatially update them to correct processors + IpplTimings::startTimer(updateTimer); + PL.update(*P, bunchBuffer); + IpplTimings::stopTimer(updateTimer); + + // Domain Decomposition + if (P->balance(totalP, it+1)) { + msg << "Starting repartition" << endl; + IpplTimings::startTimer(domainDecomposition); + P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); + IpplTimings::stopTimer(domainDecomposition); + //IpplTimings::startTimer(dumpDataTimer); + //P->dumpLocalDomains(FL, it+1); + //IpplTimings::stopTimer(dumpDataTimer); + } + + + //scatter the charge onto the underlying grid + P->scatterCIC(totalP, it+1, hr); + + //Field solve + IpplTimings::startTimer(SolveTimer); + P->solver_mp->solve(); + IpplTimings::stopTimer(SolveTimer); + + // gather E field + P->gatherCIC(); + + //kick + IpplTimings::startTimer(PTimer); + P->P = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); + + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + P->dumpLandau(); + P->gatherStatistics(totalP); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + } + + msg << "LandauDamping: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + auto end = std::chrono::high_resolution_clock::now(); + + std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); + std::cout << "Elapsed time: " << time_chrono.count() << std::endl; + + + return 0; +} From 8c282c9470bcbda6f94ed360ea42a9d17dca2818 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 24 Oct 2022 10:37:34 +0200 Subject: [PATCH 003/117] class name changed for ChargedParticles --- alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index e64417e19..c2f12fe21 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -1,4 +1,4 @@ -// ChargedParticles header file +// ChargedParticlesPIF header file // Defines a particle attribute for charged particles to be used in // test programs // @@ -128,7 +128,7 @@ void dumpVTK(Field_t& rho, int nx, int ny, int nz, int iteration, } template -class ChargedParticles : public ippl::ParticleBase { +class ChargedParticlesPIF : public ippl::ParticleBase { public: VField_t E_m; Field_t rho_m; @@ -169,7 +169,7 @@ class ChargedParticles : public ippl::ParticleBase { This constructor is mandatory for all derived classes from ParticleBase as the bunch buffer uses this */ - ChargedParticles(PLayout& pl) + ChargedParticlesPIF(PLayout& pl) : ippl::ParticleBase(pl) { // register the particle attributes @@ -178,7 +178,7 @@ class ChargedParticles : public ippl::ParticleBase { this->addAttribute(E); } - ChargedParticles(PLayout& pl, + ChargedParticlesPIF(PLayout& pl, Vector_t hr, Vector_t rmin, Vector_t rmax, @@ -199,13 +199,13 @@ class ChargedParticles : public ippl::ParticleBase { decomp_m[i]=decomp[i]; } - ~ChargedParticles(){ } + ~ChargedParticlesPIF(){ } void setupBCs() { setBCAllPeriodic(); } - void updateLayout(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticles& buffer, + void updateLayout(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, bool& isFirstRepartition) { // Update local fields static IpplTimings::TimerRef tupdateLayout = IpplTimings::getTimer("updateLayout"); @@ -229,7 +229,7 @@ class ChargedParticles : public ippl::ParticleBase { orb.initialize(fl, mesh, rho_m); } - void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticles& buffer, + void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, bool& isFirstRepartition) { // Repartition the domains bool res = orb.binaryRepartition(this->R, fl, isFirstRepartition); From 3cb368d0f6a437d611e32c9b4e31e268e1ed944b Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 24 Oct 2022 10:41:44 +0200 Subject: [PATCH 004/117] CMakeLists added for alpine --- alpine/CMakeLists.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 alpine/CMakeLists.txt diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt new file mode 100644 index 000000000..ffba1ba3c --- /dev/null +++ b/alpine/CMakeLists.txt @@ -0,0 +1,26 @@ +macro(list_subdirectories retval curdir) + file(GLOB sub-dir RELATIVE ${curdir} *) + set(list_of_dirs "") + foreach(dir ${sub-dir}) + if(IS_DIRECTORY ${curdir}/${dir}) + set(list_of_dirs ${list_of_dirs} ${dir}) + endif() + endforeach() + set(${retval} ${list_of_dirs}) +endmacro() + +#list_subdirectories("TESTS" ${CMAKE_CURRENT_SOURCE_DIR}) +#foreach (test ${TESTS}) +# add_subdirectory (${test}) +#endforeach() + +add_subdirectory (ElectrostaticPIC) + +# vi: set et ts=4 sw=4 sts=4: + +# Local Variables: +# mode: cmake +# cmake-tab-width: 4 +# indent-tabs-mode: nil +# require-final-newline: nil +# End: From 440d593995e44e1db73d90d915a1a4e1aaddd05f Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 24 Oct 2022 16:20:01 +0200 Subject: [PATCH 005/117] Landau damping modified for PIF. Need to do scatter, solver and gather --- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 117 +++---------------- src/FieldLayout/FieldLayout.hpp | 8 +- 2 files changed, 23 insertions(+), 102 deletions(-) diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index e78dd91bf..ec14d1b15 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -1,20 +1,13 @@ -// Landau Damping Test +// Electrostatic Landau damping test with Particle-in-Fourier schemes // Usage: -// srun ./LandauDamping --info 10 -// nx = No. cell-centered points in the x-direction -// ny = No. cell-centered points in the y-direction -// nz = No. cell-centered points in the z-direction +// srun ./LandauDampingPIF --info 10 +// nx = No. of Fourier modes in the x-direction +// ny = No. of Fourier modes in the y-direction +// nz = No. of Fourier modes in the z-direction // Np = Total no. of macro-particles in the simulation // Nt = Number of time steps -// stype = Field solver type e.g., FFT -// lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance -// percentage which can be tolerated and beyond which -// particle load balancing occurs. A value of 0.01 is good for many typical -// simulations. -// ovfactor = Over-allocation factor for the buffers used in the communication. Typical -// values are 1.0, 2.0. Value 1.0 means no over-allocation. // Example: -// srun ./LandauDamping 128 128 128 10000 10 FFT 0.01 2.0 --info 10 +// srun ./LandauDampingPIF 128 128 128 10000 10 --info 10 // // Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland @@ -31,7 +24,7 @@ // along with IPPL. If not, see . // -#include "ChargedParticles.hpp" +#include "ChargedParticlesPIF.hpp" #include #include #include @@ -146,17 +139,14 @@ double PDF(const Vector_t& xvec, const double& alpha, return pdf; } -const char* TestName = "LandauDamping"; +const char* TestName = "LandauDampingPIF"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); - Inform msg("LandauDamping"); - Inform msg2all("LandauDamping",INFORM_ALL_NODES); + Inform msg("LandauDampingPIF"); + Inform msg2all("LandauDampingPIF",INFORM_ALL_NODES); - Ippl::Comm->setDefaultOverallocation(std::atof(argv[8])); - - auto start = std::chrono::high_resolution_clock::now(); ippl::Vector nr = { std::atoi(argv[1]), std::atoi(argv[2]), @@ -169,9 +159,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("kick"); static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("drift"); static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); - static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); - static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("domainDecomp"); IpplTimings::startTimer(mainTimer); @@ -181,10 +169,10 @@ int main(int argc, char *argv[]){ msg << "Landau damping" << endl << "nt " << nt << " Np= " - << totalP << " grid = " << nr + << totalP << " Fourier modes = " << nr << endl; - using bunch_type = ChargedParticles; + using bunch_type = ChargedParticlesPIF; std::unique_ptr P; @@ -195,7 +183,7 @@ int main(int argc, char *argv[]){ ippl::e_dim_tag decomp[Dim]; for (unsigned d = 0; d < Dim; ++d) { - decomp[d] = ippl::PARALLEL; + decomp[d] = ippl::SERIAL; } // create mesh and layout objects for this problem domain @@ -227,69 +215,22 @@ int main(int argc, char *argv[]){ bunch_type bunchBuffer(PL); - P->stype_m = argv[6]; P->initSolver(); P->time_m = 0.0; - P->loadbalancethreshold_m = std::atof(argv[7]); - - bool isFirstRepartition; - - if ((P->loadbalancethreshold_m != 1.0) && (Ippl::Comm->size() > 1)) { - msg << "Starting first repartition" << endl; - IpplTimings::startTimer(domainDecomposition); - isFirstRepartition = true; - const ippl::NDIndex& lDom = FL.getLocalNDIndex(); - const int nghost = P->rho_m.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - auto rhoview = P->rho_m.getView(); - - Kokkos::parallel_for("Assign initial rho based on PDF", - mdrange_type({nghost, nghost, nghost}, - {rhoview.extent(0) - nghost, - rhoview.extent(1) - nghost, - rhoview.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k) - { - //local to global index conversion - const size_t ig = i + lDom[0].first() - nghost; - const size_t jg = j + lDom[1].first() - nghost; - const size_t kg = k + lDom[2].first() - nghost; - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - Vector_t xvec = {x, y, z}; - - rhoview(i, j, k) = PDF(xvec, alpha, kw, Dim); - - }); - - Kokkos::fence(); - - P->initializeORB(FL, mesh); - P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); - IpplTimings::stopTimer(domainDecomposition); - } - - msg << "First domain decomposition done" << endl; + IpplTimings::startTimer(particleCreation); typedef ippl::detail::RegionLayout RegionLayout_t; const RegionLayout_t& RLayout = PL.getRegionLayout(); const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); - Vector_t Nr, Dr, minU, maxU; + Vector_t minU, maxU; int myRank = Ippl::Comm->rank(); for (unsigned d = 0; d size(); size_type nloc = (size_type)(factor * totalP); size_type Total_particles = 0; @@ -313,14 +254,6 @@ int main(int argc, char *argv[]){ P->q = P->Q_m/totalP; msg << "particles created and initial conditions assigned " << endl; - isFirstRepartition = false; - //The update after the particle creation is not needed as the - //particles are generated locally - - IpplTimings::startTimer(DummySolveTimer); - P->rho_m = 0.0; - P->solver_mp->solve(); - IpplTimings::stopTimer(DummySolveTimer); P->scatterCIC(totalP, 0, hr); @@ -332,7 +265,6 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpLandau(); - P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -360,17 +292,6 @@ int main(int argc, char *argv[]){ PL.update(*P, bunchBuffer); IpplTimings::stopTimer(updateTimer); - // Domain Decomposition - if (P->balance(totalP, it+1)) { - msg << "Starting repartition" << endl; - IpplTimings::startTimer(domainDecomposition); - P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); - IpplTimings::stopTimer(domainDecomposition); - //IpplTimings::startTimer(dumpDataTimer); - //P->dumpLocalDomains(FL, it+1); - //IpplTimings::stopTimer(dumpDataTimer); - } - //scatter the charge onto the underlying grid P->scatterCIC(totalP, it+1, hr); @@ -391,7 +312,6 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); P->dumpLandau(); - P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } @@ -400,11 +320,6 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(mainTimer); IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); - auto end = std::chrono::high_resolution_clock::now(); - - std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); - std::cout << "Elapsed time: " << time_chrono.count() << std::endl; - return 0; } diff --git a/src/FieldLayout/FieldLayout.hpp b/src/FieldLayout/FieldLayout.hpp index 34cf92e0e..47d12a9e6 100644 --- a/src/FieldLayout/FieldLayout.hpp +++ b/src/FieldLayout/FieldLayout.hpp @@ -132,7 +132,13 @@ namespace ippl { isAllPeriodic_m = isAllPeriodic; - if (nRanks < 2) { + bool isAllSerial = true; + + for (unsigned d = 0; d < Dim; ++d) { + isAllSerial = isAllSerial && (requestedLayout_m[d] == SERIAL); + } + + if ((nRanks < 2) || isAllSerial) { Kokkos::resize(dLocalDomains_m, nRanks); Kokkos::resize(hLocalDomains_m, nRanks); hLocalDomains_m(0) = domain; From 27856b254fcf056c042c00b00a462050b08052cb Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 2 Nov 2022 12:04:32 +0100 Subject: [PATCH 006/117] scatterPIF implemented and seems to be working --- alpine/CMakeLists.txt | 1 + .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 892 ++++++++---------- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 102 +- src/FieldLayout/FieldLayout.hpp | 31 +- src/Particle/ParticleAttrib.h | 88 +- src/Particle/ParticleAttrib.hpp | 95 ++ 6 files changed, 665 insertions(+), 544 deletions(-) diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt index ffba1ba3c..3a6d622c5 100644 --- a/alpine/CMakeLists.txt +++ b/alpine/CMakeLists.txt @@ -15,6 +15,7 @@ endmacro() #endforeach() add_subdirectory (ElectrostaticPIC) +add_subdirectory (ElectrostaticPIF) # vi: set et ts=4 sw=4 sts=4: diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index c2f12fe21..b13b9f156 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -40,8 +40,11 @@ template using ParticleAttrib = ippl::ParticleAttrib; typedef Vector Vector_t; +typedef Vector, Dim> CxVector_t; typedef Field Field_t; +typedef Field, Dim> CxField_t; typedef Field VField_t; +typedef Field CxVField_t; typedef ippl::FFTPeriodicPoissonSolver Solver_t; const double pi = std::acos(-1.0); @@ -49,89 +52,89 @@ const double pi = std::acos(-1.0); // Test programs have to define this variable for VTK dump purposes extern const char* TestName; -void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, - double dx, double dy, double dz) { - - - typename VField_t::view_type::host_mirror_type host_view = E.getHostMirror(); - - std::stringstream fname; - fname << "data/ef_"; - fname << std::setw(4) << std::setfill('0') << iteration; - fname << ".vtk"; - - Kokkos::deep_copy(host_view, E.getView()); - - Inform vtkout(NULL, fname.str().c_str(), Inform::OVERWRITE); - vtkout.precision(10); - vtkout.setf(std::ios::scientific, std::ios::floatfield); - - // start with header - vtkout << "# vtk DataFile Version 2.0" << endl; - vtkout << TestName << endl; - vtkout << "ASCII" << endl; - vtkout << "DATASET STRUCTURED_POINTS" << endl; - vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << endl; - vtkout << "ORIGIN " << -dx << " " << -dy << " " << -dz << endl; - vtkout << "SPACING " << dx << " " << dy << " " << dz << endl; - vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << endl; - - vtkout << "VECTORS E-Field float" << endl; - for (int z=0; z class ChargedParticlesPIF : public ippl::ParticleBase { public: - VField_t E_m; - Field_t rho_m; + CxVField_t E_m; + CxField_t rho_m; // ORB ORB orb; @@ -205,415 +208,358 @@ class ChargedParticlesPIF : public ippl::ParticleBase { setBCAllPeriodic(); } - void updateLayout(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, - bool& isFirstRepartition) { - // Update local fields - static IpplTimings::TimerRef tupdateLayout = IpplTimings::getTimer("updateLayout"); - IpplTimings::startTimer(tupdateLayout); - this->E_m.updateLayout(fl); - this->rho_m.updateLayout(fl); - - // Update layout with new FieldLayout - PLayout& layout = this->getLayout(); - layout.updateLayout(fl, mesh); - IpplTimings::stopTimer(tupdateLayout); - static IpplTimings::TimerRef tupdatePLayout = IpplTimings::getTimer("updatePB"); - IpplTimings::startTimer(tupdatePLayout); - if(!isFirstRepartition) { - layout.update(*this, buffer); - } - IpplTimings::stopTimer(tupdatePLayout); - } - - void initializeORB(FieldLayout_t& fl, Mesh_t& mesh) { - orb.initialize(fl, mesh, rho_m); - } - - void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, - bool& isFirstRepartition) { - // Repartition the domains - bool res = orb.binaryRepartition(this->R, fl, isFirstRepartition); - - if (res != true) { - std::cout << "Could not repartition!" << std::endl; - return; - } - // Update - this->updateLayout(fl, mesh, buffer, isFirstRepartition); - this->solver_mp->setRhs(rho_m); - } - - bool balance(size_type totalP, const unsigned int nstep){ - if(std::strcmp(TestName,"UniformPlasmaTest") == 0) { - return (nstep % loadbalancefreq_m == 0); - } - else { - int local = 0; - std::vector res(Ippl::Comm->size()); - double equalPart = (double) totalP / Ippl::Comm->size(); - double dev = std::abs((double)this->getLocalNum() - equalPart) / totalP; - if (dev > loadbalancethreshold_m) - local = 1; - MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, Ippl::getComm()); - - for (unsigned int i = 0; i < res.size(); i++) { - if (res[i] == 1) - return true; - } - return false; - } - } + //void updateLayout(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, + // bool& isFirstRepartition) { + // // Update local fields + // static IpplTimings::TimerRef tupdateLayout = IpplTimings::getTimer("updateLayout"); + // IpplTimings::startTimer(tupdateLayout); + // this->E_m.updateLayout(fl); + // this->rho_m.updateLayout(fl); + + // // Update layout with new FieldLayout + // PLayout& layout = this->getLayout(); + // layout.updateLayout(fl, mesh); + // IpplTimings::stopTimer(tupdateLayout); + // static IpplTimings::TimerRef tupdatePLayout = IpplTimings::getTimer("updatePB"); + // IpplTimings::startTimer(tupdatePLayout); + // if(!isFirstRepartition) { + // layout.update(*this, buffer); + // } + // IpplTimings::stopTimer(tupdatePLayout); + //} + + //void initializeORB(FieldLayout_t& fl, Mesh_t& mesh) { + // orb.initialize(fl, mesh, rho_m); + //} + + //void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, + // bool& isFirstRepartition) { + // // Repartition the domains + // bool res = orb.binaryRepartition(this->R, fl, isFirstRepartition); + + // if (res != true) { + // std::cout << "Could not repartition!" << std::endl; + // return; + // } + // // Update + // this->updateLayout(fl, mesh, buffer, isFirstRepartition); + // this->solver_mp->setRhs(rho_m); + //} + + //bool balance(size_type totalP, const unsigned int nstep){ + // if(std::strcmp(TestName,"UniformPlasmaTest") == 0) { + // return (nstep % loadbalancefreq_m == 0); + // } + // else { + // int local = 0; + // std::vector res(Ippl::Comm->size()); + // double equalPart = (double) totalP / Ippl::Comm->size(); + // double dev = std::abs((double)this->getLocalNum() - equalPart) / totalP; + // if (dev > loadbalancethreshold_m) + // local = 1; + // MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, Ippl::getComm()); + + // for (unsigned int i = 0; i < res.size(); i++) { + // if (res[i] == 1) + // return true; + // } + // return false; + // } + //} + + //void gatherStatistics(size_type totalP) { + // std::vector imb(Ippl::Comm->size()); + // double equalPart = (double) totalP / Ippl::Comm->size(); + // double dev = (std::abs((double)this->getLocalNum() - equalPart) + // / totalP) * 100.0; + // MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, + // Ippl::getComm()); + // + // if (Ippl::Comm->rank() == 0) { + // std::stringstream fname; + // fname << "data/LoadBalance_"; + // fname << Ippl::Comm->size(); + // fname << ".csv"; + + // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + // csvout.precision(5); + // csvout.setf(std::ios::scientific, std::ios::floatfield); + + // if(time_m == 0.0) { + // csvout << "time, rank, imbalance percentage" << endl; + // } + + // for(int r=0; r < Ippl::Comm->size(); ++r) { + // csvout << time_m << " " + // << r << " " + // << imb[r] << endl; + // } + // } + + // Ippl::Comm->barrier(); + // + //} + + void gather() { + + gatherPIF(this->E, E_m, this->R); - void gatherStatistics(size_type totalP) { - std::vector imb(Ippl::Comm->size()); - double equalPart = (double) totalP / Ippl::Comm->size(); - double dev = (std::abs((double)this->getLocalNum() - equalPart) - / totalP) * 100.0; - MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, - Ippl::getComm()); - - if (Ippl::Comm->rank() == 0) { - std::stringstream fname; - fname << "data/LoadBalance_"; - fname << Ippl::Comm->size(); - fname << ".csv"; - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(5); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(time_m == 0.0) { - csvout << "time, rank, imbalance percentage" << endl; - } - - for(int r=0; r < Ippl::Comm->size(); ++r) { - csvout << time_m << " " - << r << " " - << imb[r] << endl; - } - } - - Ippl::Comm->barrier(); - } - void gatherCIC() { - - gather(this->E, E_m, this->R); - - } - - void scatterCIC(size_type totalP, unsigned int iteration, Vector_t& hrField) { - - - Inform m("scatter "); - - rho_m = 0.0; - scatter(q, rho_m, this->R); - - static IpplTimings::TimerRef sumTimer = IpplTimings::getTimer("Check"); - IpplTimings::startTimer(sumTimer); - double Q_grid = rho_m.sum(); - - size_type Total_particles = 0; - size_type local_particles = this->getLocalNum(); - - MPI_Reduce(&local_particles, &Total_particles, 1, - MPI_UNSIGNED_LONG, MPI_SUM, 0, Ippl::getComm()); - - double rel_error = std::fabs((Q_m-Q_grid)/Q_m); - m << "Rel. error in charge conservation = " << rel_error << endl; - - if(Ippl::Comm->rank() == 0) { - if(Total_particles != totalP || rel_error > 1e-10) { - m << "Time step: " << iteration << endl; - m << "Total particles in the sim. " << totalP - << " " << "after update: " - << Total_particles << endl; - m << "Rel. error in charge conservation: " - << rel_error << endl; - std::abort(); - } - } - - rho_m = rho_m / (hrField[0] * hrField[1] * hrField[2]); - - rhoNorm_m = norm(rho_m); - IpplTimings::stopTimer(sumTimer); + void scatter() { + + Inform m("scatter "); + rho_m = {0.0, 0.0}; + scatterPIF(q, rho_m, this->R); - //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + rho_m = rho_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); - //rho = rho_e - rho_i - rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); } void initSolver() { Inform m("solver "); - if(stype_m == "FFT") - initFFTSolver(); - else - m << "No solver matches the argument" << endl; - - } - - void initFFTSolver() { - ippl::ParameterList sp; - sp.add("output_type", Solver_t::GRAD); - sp.add("use_heffte_defaults", false); - sp.add("use_pencils", true); - sp.add("use_reorder", false); - sp.add("use_gpu_aware", true); - sp.add("comm", ippl::p2p_pl); - sp.add("r2c_direction", 0); - - solver_mp = std::make_shared(); - - solver_mp->mergeParameters(sp); - solver_mp->setRhs(rho_m); - - solver_mp->setLhs(E_m); } - void dumpData() { - - auto Pview = P.getView(); - - double Energy = 0.0; - - Kokkos::parallel_reduce("Particle Energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = dot(Pview(i), Pview(i)).apply(); - valL += myVal; - }, Kokkos::Sum(Energy)); - - Energy *= 0.5; - double gEnergy = 0.0; - - MPI_Reduce(&Energy, &gEnergy, 1, - MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - - - const int nghostE = E_m.getNghost(); - auto Eview = E_m.getView(); - Vector_t normE; - using mdrange_type = Kokkos::MDRangePolicy>; - - for (unsigned d=0; d(temp)); - double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - normE[d] = std::sqrt(globaltemp); - } - - if (Ippl::Comm->rank() == 0) { - std::stringstream fname; - fname << "data/ParticleField_"; - fname << Ippl::Comm->size(); - fname << ".csv"; - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(time_m == 0.0) { - csvout << "time, Kinetic energy, Rho_norm2, Ex_norm2, Ey_norm2, Ez_norm2" << endl; - } - - csvout << time_m << " " - << gEnergy << " " - << rhoNorm_m << " " - << normE[0] << " " - << normE[1] << " " - << normE[2] << endl; - } - - Ippl::Comm->barrier(); - } - - void dumpLandau() { - - const int nghostE = E_m.getNghost(); - auto Eview = E_m.getView(); - double fieldEnergy, ExAmp; - using mdrange_type = Kokkos::MDRangePolicy>; - - double temp = 0.0; - Kokkos::parallel_reduce("Ex inner product", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::pow(Eview(i, j, k)[0], 2); - valL += myVal; - }, Kokkos::Sum(temp)); - double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; - - double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::fabs(Eview(i, j, k)[0]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); - ExAmp = 0.0; - MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - - - if (Ippl::Comm->rank() == 0) { - std::stringstream fname; - fname << "data/FieldLandau_"; - fname << Ippl::Comm->size(); - fname << ".csv"; - - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(time_m == 0.0) { - csvout << "time, Ex_field_energy, Ex_max_norm" << endl; - } - - csvout << time_m << " " - << fieldEnergy << " " - << ExAmp << endl; - - } - - Ippl::Comm->barrier(); - } - - void dumpBumponTail() { - - const int nghostE = E_m.getNghost(); - auto Eview = E_m.getView(); - double fieldEnergy, EzAmp; - using mdrange_type = Kokkos::MDRangePolicy>; - - double temp = 0.0; - Kokkos::parallel_reduce("Ex inner product", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::pow(Eview(i, j, k)[2], 2); - valL += myVal; - }, Kokkos::Sum(temp)); - double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; - - double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::fabs(Eview(i, j, k)[2]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); - EzAmp = 0.0; - MPI_Reduce(&tempMax, &EzAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - - - if (Ippl::Comm->rank() == 0) { - std::stringstream fname; - fname << "data/FieldBumponTail_"; - fname << Ippl::Comm->size(); - fname << ".csv"; - - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(time_m == 0.0) { - csvout << "time, Ez_field_energy, Ez_max_norm" << endl; - } - - csvout << time_m << " " - << fieldEnergy << " " - << EzAmp << endl; - - } - - Ippl::Comm->barrier(); - } - - void dumpParticleData() { - - typename ParticleAttrib::HostMirror R_host = this->R.getHostMirror(); - typename ParticleAttrib::HostMirror P_host = this->P.getHostMirror(); - Kokkos::deep_copy(R_host, this->R.getView()); - Kokkos::deep_copy(P_host, P.getView()); - std::stringstream pname; - pname << "data/ParticleIC_"; - pname << Ippl::Comm->rank(); - pname << ".csv"; - Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); - pcsvout.precision(10); - pcsvout.setf(std::ios::scientific, std::ios::floatfield); - pcsvout << "R_x, R_y, R_z, V_x, V_y, V_z" << endl; - for (size_type i = 0; i< this->getLocalNum(); i++) { - pcsvout << R_host(i)[0] << " " - << R_host(i)[1] << " " - << R_host(i)[2] << " " - << P_host(i)[0] << " " - << P_host(i)[1] << " " - << P_host(i)[2] << endl; - } - Ippl::Comm->barrier(); - } - - void dumpLocalDomains(const FieldLayout_t& fl, const unsigned int step) { - - if (Ippl::Comm->rank() == 0) { - const typename FieldLayout_t::host_mirror_type domains = fl.getHostLocalDomains(); - std::ofstream myfile; - myfile.open("data/domains" + std::to_string(step) + ".txt"); - for (unsigned int i = 0; i < domains.size(); ++i) { - myfile << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " - << domains[i][0].first() << " " << domains[i][1].last() << " " << domains[i][2].first() << " " - << domains[i][0].last() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " - << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].last() - << "\n"; - } - myfile.close(); - } - Ippl::Comm->barrier(); - } + //void dumpData() { + + // auto Pview = P.getView(); + + // double Energy = 0.0; + + // Kokkos::parallel_reduce("Particle Energy", this->getLocalNum(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = dot(Pview(i), Pview(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(Energy)); + + // Energy *= 0.5; + // double gEnergy = 0.0; + + // MPI_Reduce(&Energy, &gEnergy, 1, + // MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + + // const int nghostE = E_m.getNghost(); + // auto Eview = E_m.getView(); + // Vector_t normE; + // using mdrange_type = Kokkos::MDRangePolicy>; + + // for (unsigned d=0; d(temp)); + // double globaltemp = 0.0; + // MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + // normE[d] = std::sqrt(globaltemp); + // } + + // if (Ippl::Comm->rank() == 0) { + // std::stringstream fname; + // fname << "data/ParticleField_"; + // fname << Ippl::Comm->size(); + // fname << ".csv"; + + // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + // csvout.precision(10); + // csvout.setf(std::ios::scientific, std::ios::floatfield); + + // if(time_m == 0.0) { + // csvout << "time, Kinetic energy, Rho_norm2, Ex_norm2, Ey_norm2, Ez_norm2" << endl; + // } + + // csvout << time_m << " " + // << gEnergy << " " + // << rhoNorm_m << " " + // << normE[0] << " " + // << normE[1] << " " + // << normE[2] << endl; + // } + + // Ippl::Comm->barrier(); + //} + + //void dumpLandau() { + + // const int nghostE = E_m.getNghost(); + // auto Eview = E_m.getView(); + // double fieldEnergy, ExAmp; + // using mdrange_type = Kokkos::MDRangePolicy>; + + // double temp = 0.0; + // Kokkos::parallel_reduce("Ex inner product", + // mdrange_type({nghostE, nghostE, nghostE}, + // {Eview.extent(0) - nghostE, + // Eview.extent(1) - nghostE, + // Eview.extent(2) - nghostE}), + // KOKKOS_LAMBDA(const size_t i, const size_t j, + // const size_t k, double& valL) + // { + // double myVal = std::pow(Eview(i, j, k)[0], 2); + // valL += myVal; + // }, Kokkos::Sum(temp)); + // double globaltemp = 0.0; + // MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + // fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; + + // double tempMax = 0.0; + // Kokkos::parallel_reduce("Ex max norm", + // mdrange_type({nghostE, nghostE, nghostE}, + // {Eview.extent(0) - nghostE, + // Eview.extent(1) - nghostE, + // Eview.extent(2) - nghostE}), + // KOKKOS_LAMBDA(const size_t i, const size_t j, + // const size_t k, double& valL) + // { + // double myVal = std::fabs(Eview(i, j, k)[0]); + // if(myVal > valL) valL = myVal; + // }, Kokkos::Max(tempMax)); + // ExAmp = 0.0; + // MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + // if (Ippl::Comm->rank() == 0) { + // std::stringstream fname; + // fname << "data/FieldLandau_"; + // fname << Ippl::Comm->size(); + // fname << ".csv"; + + + // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + // csvout.precision(10); + // csvout.setf(std::ios::scientific, std::ios::floatfield); + + // if(time_m == 0.0) { + // csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + // } + + // csvout << time_m << " " + // << fieldEnergy << " " + // << ExAmp << endl; + + // } + // + // Ippl::Comm->barrier(); + //} + // + //void dumpBumponTail() { + + // const int nghostE = E_m.getNghost(); + // auto Eview = E_m.getView(); + // double fieldEnergy, EzAmp; + // using mdrange_type = Kokkos::MDRangePolicy>; + + // double temp = 0.0; + // Kokkos::parallel_reduce("Ex inner product", + // mdrange_type({nghostE, nghostE, nghostE}, + // {Eview.extent(0) - nghostE, + // Eview.extent(1) - nghostE, + // Eview.extent(2) - nghostE}), + // KOKKOS_LAMBDA(const size_t i, const size_t j, + // const size_t k, double& valL) + // { + // double myVal = std::pow(Eview(i, j, k)[2], 2); + // valL += myVal; + // }, Kokkos::Sum(temp)); + // double globaltemp = 0.0; + // MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + // fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; + + // double tempMax = 0.0; + // Kokkos::parallel_reduce("Ex max norm", + // mdrange_type({nghostE, nghostE, nghostE}, + // {Eview.extent(0) - nghostE, + // Eview.extent(1) - nghostE, + // Eview.extent(2) - nghostE}), + // KOKKOS_LAMBDA(const size_t i, const size_t j, + // const size_t k, double& valL) + // { + // double myVal = std::fabs(Eview(i, j, k)[2]); + // if(myVal > valL) valL = myVal; + // }, Kokkos::Max(tempMax)); + // EzAmp = 0.0; + // MPI_Reduce(&tempMax, &EzAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + // if (Ippl::Comm->rank() == 0) { + // std::stringstream fname; + // fname << "data/FieldBumponTail_"; + // fname << Ippl::Comm->size(); + // fname << ".csv"; + + + // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + // csvout.precision(10); + // csvout.setf(std::ios::scientific, std::ios::floatfield); + + // if(time_m == 0.0) { + // csvout << "time, Ez_field_energy, Ez_max_norm" << endl; + // } + + // csvout << time_m << " " + // << fieldEnergy << " " + // << EzAmp << endl; + + // } + // + // Ippl::Comm->barrier(); + //} + + //void dumpParticleData() { + + // typename ParticleAttrib::HostMirror R_host = this->R.getHostMirror(); + // typename ParticleAttrib::HostMirror P_host = this->P.getHostMirror(); + // Kokkos::deep_copy(R_host, this->R.getView()); + // Kokkos::deep_copy(P_host, P.getView()); + // std::stringstream pname; + // pname << "data/ParticleIC_"; + // pname << Ippl::Comm->rank(); + // pname << ".csv"; + // Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + // pcsvout.precision(10); + // pcsvout.setf(std::ios::scientific, std::ios::floatfield); + // pcsvout << "R_x, R_y, R_z, V_x, V_y, V_z" << endl; + // for (size_type i = 0; i< this->getLocalNum(); i++) { + // pcsvout << R_host(i)[0] << " " + // << R_host(i)[1] << " " + // << R_host(i)[2] << " " + // << P_host(i)[0] << " " + // << P_host(i)[1] << " " + // << P_host(i)[2] << endl; + // } + // Ippl::Comm->barrier(); + //} + // + //void dumpLocalDomains(const FieldLayout_t& fl, const unsigned int step) { + + // if (Ippl::Comm->rank() == 0) { + // const typename FieldLayout_t::host_mirror_type domains = fl.getHostLocalDomains(); + // std::ofstream myfile; + // myfile.open("data/domains" + std::to_string(step) + ".txt"); + // for (unsigned int i = 0; i < domains.size(); ++i) { + // myfile << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " + // << domains[i][0].first() << " " << domains[i][1].last() << " " << domains[i][2].first() << " " + // << domains[i][0].last() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " + // << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].last() + // << "\n"; + // } + // myfile.close(); + // } + // Ippl::Comm->barrier(); + //} private: void setBCAllPeriodic() { diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index ec14d1b15..0102e55dc 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -220,14 +220,14 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(particleCreation); - typedef ippl::detail::RegionLayout RegionLayout_t; - const RegionLayout_t& RLayout = PL.getRegionLayout(); - const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); + //typedef ippl::detail::RegionLayout RegionLayout_t; + //const RegionLayout_t& RLayout = PL.getRegionLayout(); + //const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t minU, maxU; int myRank = Ippl::Comm->rank(); for (unsigned d = 0; d size(); @@ -255,66 +255,66 @@ int main(int argc, char *argv[]){ P->q = P->Q_m/totalP; msg << "particles created and initial conditions assigned " << endl; - P->scatterCIC(totalP, 0, hr); + P->scatter(); - IpplTimings::startTimer(SolveTimer); - P->solver_mp->solve(); - IpplTimings::stopTimer(SolveTimer); + //IpplTimings::startTimer(SolveTimer); + //P->solver_mp->solve(); + //IpplTimings::stopTimer(SolveTimer); - P->gatherCIC(); + //P->gather(); - IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); - //P->dumpLocalDomains(FL, 0); - IpplTimings::stopTimer(dumpDataTimer); + //IpplTimings::startTimer(dumpDataTimer); + //P->dumpLandau(); + ////P->dumpLocalDomains(FL, 0); + //IpplTimings::stopTimer(dumpDataTimer); - // begin main timestep loop - msg << "Starting iterations ..." << endl; - for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E; - IpplTimings::stopTimer(PTimer); + // IpplTimings::startTimer(PTimer); + // P->P = P->P - 0.5 * dt * P->E; + // IpplTimings::stopTimer(PTimer); - //drift - IpplTimings::startTimer(RTimer); - P->R = P->R + dt * P->P; - IpplTimings::stopTimer(RTimer); + // //drift + // IpplTimings::startTimer(RTimer); + // P->R = P->R + dt * P->P; + // IpplTimings::stopTimer(RTimer); - //Since the particles have moved spatially update them to correct processors - IpplTimings::startTimer(updateTimer); - PL.update(*P, bunchBuffer); - IpplTimings::stopTimer(updateTimer); + // //Since the particles have moved spatially update them to correct processors + // IpplTimings::startTimer(updateTimer); + // PL.update(*P, bunchBuffer); + // IpplTimings::stopTimer(updateTimer); - //scatter the charge onto the underlying grid - P->scatterCIC(totalP, it+1, hr); + // //scatter the charge onto the underlying grid + // P->scatter(totalP, it+1, hr); - //Field solve - IpplTimings::startTimer(SolveTimer); - P->solver_mp->solve(); - IpplTimings::stopTimer(SolveTimer); + // //Field solve + // IpplTimings::startTimer(SolveTimer); + // P->solver_mp->solve(); + // IpplTimings::stopTimer(SolveTimer); - // gather E field - P->gatherCIC(); + // // gather E field + // P->gather(); - //kick - IpplTimings::startTimer(PTimer); - P->P = P->P - 0.5 * dt * P->E; - IpplTimings::stopTimer(PTimer); + // //kick + // IpplTimings::startTimer(PTimer); + // P->P = P->P - 0.5 * dt * P->E; + // IpplTimings::stopTimer(PTimer); - P->time_m += dt; - IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); - IpplTimings::stopTimer(dumpDataTimer); - msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; - } + // P->time_m += dt; + // IpplTimings::startTimer(dumpDataTimer); + // P->dumpLandau(); + // IpplTimings::stopTimer(dumpDataTimer); + // msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + //} msg << "LandauDamping: End." << endl; IpplTimings::stopTimer(mainTimer); diff --git a/src/FieldLayout/FieldLayout.hpp b/src/FieldLayout/FieldLayout.hpp index 47d12a9e6..9e129e497 100644 --- a/src/FieldLayout/FieldLayout.hpp +++ b/src/FieldLayout/FieldLayout.hpp @@ -132,21 +132,6 @@ namespace ippl { isAllPeriodic_m = isAllPeriodic; - bool isAllSerial = true; - - for (unsigned d = 0; d < Dim; ++d) { - isAllSerial = isAllSerial && (requestedLayout_m[d] == SERIAL); - } - - if ((nRanks < 2) || isAllSerial) { - Kokkos::resize(dLocalDomains_m, nRanks); - Kokkos::resize(hLocalDomains_m, nRanks); - hLocalDomains_m(0) = domain; - Kokkos::deep_copy(dLocalDomains_m, hLocalDomains_m); - return; - } - - // If the user did not specify parallel/serial flags then make all parallel. long totparelems = 1; for (unsigned d = 0; d < Dim; ++d) { @@ -160,6 +145,22 @@ namespace ippl { } } + bool isAllSerial = true; + + for (unsigned d = 0; d < Dim; ++d) { + isAllSerial = isAllSerial && (requestedLayout_m[d] == SERIAL); + } + + if ((nRanks < 2) || isAllSerial) { + Kokkos::resize(dLocalDomains_m,nRanks); + Kokkos::resize(hLocalDomains_m,nRanks); + for (int r = 0; r < nRanks; ++r) { + hLocalDomains_m(r) = domain; + } + Kokkos::deep_copy(dLocalDomains_m, hLocalDomains_m); + return; + } + /* Check to see if we have too few elements to partition. If so, reduce * the number of ranks (if necessary) to just the number of elements along * parallel dims. diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 99276a82c..5e60fd06a 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -32,6 +32,80 @@ #include "Expression/IpplExpressions.h" #include "Particle/ParticleAttribBase.h" + +//namespace sample { // namespace helps with name resolution in reduction identity +// template< typename T, int N0, int N1, int N2 > +// struct array_type { +// +// using view_type = typename ippl::detail::ViewType::view_type; +// view_type viewTemp{"viewLocal",N0,N1,N2}; +// using mdrange_type3 = Kokkos::MDRangePolicy>; +// +// //KOKKOS_INLINE_FUNCTION // Default constructor - Initialize to 0's +// array_type() { +// Kokkos::deep_copy(viewTemp, 0.0); +// //Kokkos::parallel_for( +// // "array_type default constructor", +// // mdrange_type3({0, 0, 0}, +// // {viewTemp.extent(0), +// // viewTemp.extent(1), +// // viewTemp.extent(2)}), +// // KOKKOS_CLASS_LAMBDA(const size_t i, +// // const size_t j, +// // const size_t k) +// // { +// // viewTemp(i,j,k) = 0.0; +// // }); +// } +// //KOKKOS_INLINE_FUNCTION // Copy Constructor +// array_type(const array_type & rhs) { +// auto rhsView = rhs.viewTemp; +// Kokkos::deep_copy(viewTemp, rhsView); +// //Kokkos::parallel_for( +// // "array_type copy constructor", +// // mdrange_type3({0, 0, 0}, +// // {viewTemp.extent(0), +// // viewTemp.extent(1), +// // viewTemp.extent(2)}), +// // KOKKOS_CLASS_LAMBDA(const size_t i, +// // const size_t j, +// // const size_t k) +// // { +// // viewTemp(i,j,k) = rhsView(i,j,k); +// // }); +// +// } +// KOKKOS_FUNCTION // add operator +// array_type& operator+=(const array_type& src) { +// auto srcView = src.viewTemp; +// Kokkos::parallel_for( +// "array_type operator +=", +// mdrange_type3({0, 0, 0}, +// {viewTemp.extent(0), +// viewTemp.extent(1), +// viewTemp.extent(2)}), +// KOKKOS_CLASS_LAMBDA(const size_t i, +// const size_t j, +// const size_t k) +// { +// viewTemp(i,j,k) += srcView(i,j,k); +// }); +// +// return *this; +// } +// }; +// typedef array_type,34,34,34> ValueType; +//} +// +//namespace Kokkos { //reduction identity must be defined in Kokkos namespace +// template<> +// struct reduction_identity< sample::ValueType > { +// KOKKOS_FORCEINLINE_FUNCTION static sample::ValueType sum() { +// return sample::ValueType(); +// } +// }; +//} + namespace ippl { // ParticleAttrib class definition @@ -127,7 +201,6 @@ namespace ippl { /*! * Assign the same value to the whole attribute. */ - //KOKKOS_INLINE_FUNCTION ParticleAttrib& operator=(T x); /*! @@ -138,17 +211,22 @@ namespace ippl { * @param expr is the expression */ template - //KOKKOS_INLINE_FUNCTION ParticleAttrib& operator=(detail::Expression const& expr); - // // scatter the data from this attribute onto the given Field, using -// // the given Position attribute + // scatter the data from this attribute onto the given Field, using + // the given Position attribute template void scatter(Field& f, const ParticleAttrib, Properties... >& pp) const; - + + template + void + scatterPIF(Field& f, + const ParticleAttrib, Properties... >& pp) const; + + template void gather(Field& f, diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index dad8ededf..0b6d8aee8 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -30,6 +30,7 @@ #include "Communicate/DataTypes.h" #include "Utility/IpplTimings.h" + namespace ippl { template @@ -195,6 +196,91 @@ namespace ippl { } + template + template + void ParticleAttrib::scatterPIF(Field& f, + const ParticleAttrib< Vector, Properties... >& pp) + const + { + static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("Scatter"); + IpplTimings::startTimer(scatterTimer); + + using view_type = typename Field::view_type; + using vector_type = typename M::vector_type; + using value_type = typename ParticleAttrib::value_type; + view_type fview = f.getView(); + const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const M& mesh = f.get_mesh(); + const vector_type& dx = mesh.getMeshSpacing(); + const vector_type& origin = mesh.getOrigin(); + const auto& domain = layout.getDomain(); + vector_type length; + + for (unsigned d=0; d < Dim; ++d) { + length[d] = origin[d] + dx[d] * domain[d].length(); + } + + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + using view_type_temp = typename detail::ViewType::view_type; + + view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); + + double pi = std::acos(-1.0); + Kokkos::complex imag = {0.0, 1.0}; + + size_t Np = *(this->localNum_mp); + + size_t N = domain[0].length()*domain[1].length()*domain[2].length(); + + Kokkos::parallel_for("ParticleAttrib::scatterPIF compute", + team_policy(N, Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { + const size_t flatIndex = teamMember.league_rank(); + const int i = flatIndex % domain[0].length(); + const int j = (int)(flatIndex / domain[0].length()); + const int k = (int)(flatIndex / (domain[0].length() * domain[1].length())); + + FT reducedValue = 0.0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), + [=](const size_t idx, FT& innerReduce) + { + //This can be done with Ippl vectors but problem maybe the + //complex numbers + Kokkos::complex fx = Kokkos::Experimental::cos((2*pi*i*pp(idx)[0])/length[0]) + -imag*Kokkos::Experimental::sin((2*pi*i*pp(idx)[0])/length[0]); + Kokkos::complex fy = Kokkos::Experimental::cos((2*pi*j*pp(idx)[1])/length[1]) + -imag*Kokkos::Experimental::sin((2*pi*j*pp(idx)[1])/length[1]); + Kokkos::complex fz = Kokkos::Experimental::cos((2*pi*k*pp(idx)[2])/length[2]) + -imag*Kokkos::Experimental::sin((2*pi*k*pp(idx)[2])/length[2]); + + const value_type& val = dview_m(idx); + + innerReduce += fx*fy*fz*val; + }, Kokkos::Sum(reducedValue)); + + if(teamMember.team_rank() == 0) { + viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; + } + + } + ); + + IpplTimings::stopTimer(scatterTimer); + + + static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); + IpplTimings::startTimer(scatterAllReduceTimer); + int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); + IpplTimings::stopTimer(scatterAllReduceTimer); + + } + + template template void ParticleAttrib::gather(Field& f, @@ -269,6 +355,15 @@ namespace ippl { attrib.scatter(f, pp); } + template + inline + void scatterPIF(const ParticleAttrib& attrib, Field& f, + const ParticleAttrib, Properties...>& pp) + { + attrib.scatterPIF(f, pp); + } + + template inline From 0d4f2b23a8264f125261c1b3f1bfe39984bea717 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 4 Nov 2022 16:55:40 +0100 Subject: [PATCH 007/117] PIF implemented but gives nan results. Need to check --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 376 ++---------------- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 96 ++--- src/Particle/ParticleAttrib.h | 15 +- src/Particle/ParticleAttrib.hpp | 134 ++++++- src/Solver/FFTPeriodicPoissonSolver.hpp | 2 + 5 files changed, 214 insertions(+), 409 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index b13b9f156..ecb7ffd0a 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -17,7 +17,6 @@ // #include "Ippl.h" -#include "Solver/FFTPeriodicPoissonSolver.h" // dimension of our positions constexpr unsigned Dim = 3; @@ -26,7 +25,6 @@ constexpr unsigned Dim = 3; typedef ippl::ParticleSpatialLayout PLayout_t; typedef ippl::UniformCartesian Mesh_t; typedef ippl::FieldLayout FieldLayout_t; -typedef ippl::OrthogonalRecursiveBisection ORB; using size_type = ippl::detail::size_type; @@ -40,105 +38,20 @@ template using ParticleAttrib = ippl::ParticleAttrib; typedef Vector Vector_t; -typedef Vector, Dim> CxVector_t; typedef Field Field_t; typedef Field, Dim> CxField_t; typedef Field VField_t; -typedef Field CxVField_t; -typedef ippl::FFTPeriodicPoissonSolver Solver_t; const double pi = std::acos(-1.0); // Test programs have to define this variable for VTK dump purposes extern const char* TestName; -//void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, -// double dx, double dy, double dz) { -// -// -// typename VField_t::view_type::host_mirror_type host_view = E.getHostMirror(); -// -// std::stringstream fname; -// fname << "data/ef_"; -// fname << std::setw(4) << std::setfill('0') << iteration; -// fname << ".vtk"; -// -// Kokkos::deep_copy(host_view, E.getView()); -// -// Inform vtkout(NULL, fname.str().c_str(), Inform::OVERWRITE); -// vtkout.precision(10); -// vtkout.setf(std::ios::scientific, std::ios::floatfield); -// -// // start with header -// vtkout << "# vtk DataFile Version 2.0" << endl; -// vtkout << TestName << endl; -// vtkout << "ASCII" << endl; -// vtkout << "DATASET STRUCTURED_POINTS" << endl; -// vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << endl; -// vtkout << "ORIGIN " << -dx << " " << -dy << " " << -dz << endl; -// vtkout << "SPACING " << dx << " " << dy << " " << dz << endl; -// vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << endl; -// -// vtkout << "VECTORS E-Field float" << endl; -// for (int z=0; z class ChargedParticlesPIF : public ippl::ParticleBase { public: - CxVField_t E_m; CxField_t rho_m; - // ORB - ORB orb; - Vector nr_m; ippl::e_dim_tag decomp_m[Dim]; @@ -149,18 +62,10 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double Q_m; - std::string stype_m; - - std::shared_ptr solver_mp; - double time_m; double rhoNorm_m; - unsigned int loadbalancefreq_m; - - double loadbalancethreshold_m; - public: ParticleAttrib q; // charge @@ -208,101 +113,9 @@ class ChargedParticlesPIF : public ippl::ParticleBase { setBCAllPeriodic(); } - //void updateLayout(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, - // bool& isFirstRepartition) { - // // Update local fields - // static IpplTimings::TimerRef tupdateLayout = IpplTimings::getTimer("updateLayout"); - // IpplTimings::startTimer(tupdateLayout); - // this->E_m.updateLayout(fl); - // this->rho_m.updateLayout(fl); - - // // Update layout with new FieldLayout - // PLayout& layout = this->getLayout(); - // layout.updateLayout(fl, mesh); - // IpplTimings::stopTimer(tupdateLayout); - // static IpplTimings::TimerRef tupdatePLayout = IpplTimings::getTimer("updatePB"); - // IpplTimings::startTimer(tupdatePLayout); - // if(!isFirstRepartition) { - // layout.update(*this, buffer); - // } - // IpplTimings::stopTimer(tupdatePLayout); - //} - - //void initializeORB(FieldLayout_t& fl, Mesh_t& mesh) { - // orb.initialize(fl, mesh, rho_m); - //} - - //void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticlesPIF& buffer, - // bool& isFirstRepartition) { - // // Repartition the domains - // bool res = orb.binaryRepartition(this->R, fl, isFirstRepartition); - - // if (res != true) { - // std::cout << "Could not repartition!" << std::endl; - // return; - // } - // // Update - // this->updateLayout(fl, mesh, buffer, isFirstRepartition); - // this->solver_mp->setRhs(rho_m); - //} - - //bool balance(size_type totalP, const unsigned int nstep){ - // if(std::strcmp(TestName,"UniformPlasmaTest") == 0) { - // return (nstep % loadbalancefreq_m == 0); - // } - // else { - // int local = 0; - // std::vector res(Ippl::Comm->size()); - // double equalPart = (double) totalP / Ippl::Comm->size(); - // double dev = std::abs((double)this->getLocalNum() - equalPart) / totalP; - // if (dev > loadbalancethreshold_m) - // local = 1; - // MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, Ippl::getComm()); - - // for (unsigned int i = 0; i < res.size(); i++) { - // if (res[i] == 1) - // return true; - // } - // return false; - // } - //} - - //void gatherStatistics(size_type totalP) { - // std::vector imb(Ippl::Comm->size()); - // double equalPart = (double) totalP / Ippl::Comm->size(); - // double dev = (std::abs((double)this->getLocalNum() - equalPart) - // / totalP) * 100.0; - // MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, - // Ippl::getComm()); - // - // if (Ippl::Comm->rank() == 0) { - // std::stringstream fname; - // fname << "data/LoadBalance_"; - // fname << Ippl::Comm->size(); - // fname << ".csv"; - - // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - // csvout.precision(5); - // csvout.setf(std::ios::scientific, std::ios::floatfield); - - // if(time_m == 0.0) { - // csvout << "time, rank, imbalance percentage" << endl; - // } - - // for(int r=0; r < Ippl::Comm->size(); ++r) { - // csvout << time_m << " " - // << r << " " - // << imb[r] << endl; - // } - // } - - // Ippl::Comm->barrier(); - // - //} - void gather() { - gatherPIF(this->E, E_m, this->R); + gatherPIF(this->E, rho_m, this->R); } @@ -316,145 +129,60 @@ class ChargedParticlesPIF : public ippl::ParticleBase { } - void initSolver() { - - Inform m("solver "); - - } - - - - //void dumpData() { - - // auto Pview = P.getView(); - - // double Energy = 0.0; - - // Kokkos::parallel_reduce("Particle Energy", this->getLocalNum(), - // KOKKOS_LAMBDA(const int i, double& valL){ - // double myVal = dot(Pview(i), Pview(i)).apply(); - // valL += myVal; - // }, Kokkos::Sum(Energy)); - - // Energy *= 0.5; - // double gEnergy = 0.0; - - // MPI_Reduce(&Energy, &gEnergy, 1, - // MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - - - // const int nghostE = E_m.getNghost(); - // auto Eview = E_m.getView(); - // Vector_t normE; - // using mdrange_type = Kokkos::MDRangePolicy>; - - // for (unsigned d=0; d(temp)); - // double globaltemp = 0.0; - // MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - // normE[d] = std::sqrt(globaltemp); - // } - - // if (Ippl::Comm->rank() == 0) { - // std::stringstream fname; - // fname << "data/ParticleField_"; - // fname << Ippl::Comm->size(); - // fname << ".csv"; - - // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - // csvout.precision(10); - // csvout.setf(std::ios::scientific, std::ios::floatfield); - - // if(time_m == 0.0) { - // csvout << "time, Kinetic energy, Rho_norm2, Ex_norm2, Ey_norm2, Ez_norm2" << endl; - // } - - // csvout << time_m << " " - // << gEnergy << " " - // << rhoNorm_m << " " - // << normE[0] << " " - // << normE[1] << " " - // << normE[2] << endl; - // } - // Ippl::Comm->barrier(); - //} + void dumpLandau(size_type totalP) { + + auto Eview = E.getView(); - //void dumpLandau() { + double fieldEnergy, ExAmp; + double temp = 0.0; - // const int nghostE = E_m.getNghost(); - // auto Eview = E_m.getView(); - // double fieldEnergy, ExAmp; - // using mdrange_type = Kokkos::MDRangePolicy>; + Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = Eview(i)[0] * Eview(i)[0]; + valL += myVal; + }, Kokkos::Sum(temp)); - // double temp = 0.0; - // Kokkos::parallel_reduce("Ex inner product", - // mdrange_type({nghostE, nghostE, nghostE}, - // {Eview.extent(0) - nghostE, - // Eview.extent(1) - nghostE, - // Eview.extent(2) - nghostE}), - // KOKKOS_LAMBDA(const size_t i, const size_t j, - // const size_t k, double& valL) - // { - // double myVal = std::pow(Eview(i, j, k)[0], 2); - // valL += myVal; - // }, Kokkos::Sum(temp)); - // double globaltemp = 0.0; - // MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - // fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy = globaltemp * volume / totalP ; - // double tempMax = 0.0; - // Kokkos::parallel_reduce("Ex max norm", - // mdrange_type({nghostE, nghostE, nghostE}, - // {Eview.extent(0) - nghostE, - // Eview.extent(1) - nghostE, - // Eview.extent(2) - nghostE}), - // KOKKOS_LAMBDA(const size_t i, const size_t j, - // const size_t k, double& valL) - // { - // double myVal = std::fabs(Eview(i, j, k)[0]); - // if(myVal > valL) valL = myVal; - // }, Kokkos::Max(tempMax)); - // ExAmp = 0.0; - // MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t i, double& valL) + { + double myVal = std::fabs(Eview(i)[0]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + ExAmp = 0.0; + MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - // if (Ippl::Comm->rank() == 0) { - // std::stringstream fname; - // fname << "data/FieldLandau_"; - // fname << Ippl::Comm->size(); - // fname << ".csv"; + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; - // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - // csvout.precision(10); - // csvout.setf(std::ios::scientific, std::ios::floatfield); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); - // if(time_m == 0.0) { - // csvout << "time, Ex_field_energy, Ex_max_norm" << endl; - // } + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } - // csvout << time_m << " " - // << fieldEnergy << " " - // << ExAmp << endl; + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; - // } - // - // Ippl::Comm->barrier(); - //} - // + } + + Ippl::Comm->barrier(); + } + //void dumpBumponTail() { // const int nghostE = E_m.getNghost(); @@ -542,24 +270,6 @@ class ChargedParticlesPIF : public ippl::ParticleBase { // } // Ippl::Comm->barrier(); //} - // - //void dumpLocalDomains(const FieldLayout_t& fl, const unsigned int step) { - - // if (Ippl::Comm->rank() == 0) { - // const typename FieldLayout_t::host_mirror_type domains = fl.getHostLocalDomains(); - // std::ofstream myfile; - // myfile.open("data/domains" + std::to_string(step) + ".txt"); - // for (unsigned int i = 0; i < domains.size(); ++i) { - // myfile << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " - // << domains[i][0].first() << " " << domains[i][1].last() << " " << domains[i][2].first() << " " - // << domains[i][0].last() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " - // << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].last() - // << "\n"; - // } - // myfile.close(); - // } - // Ippl::Comm->barrier(); - //} private: void setBCAllPeriodic() { diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 0102e55dc..054ff5f18 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -9,7 +9,7 @@ // Example: // srun ./LandauDampingPIF 128 128 128 10000 10 --info 10 // -// Copyright (c) 2021, Sriramkrishnan Muralikrishnan, +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved // @@ -158,8 +158,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("kick"); static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("drift"); - static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); - static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); + static IpplTimings::TimerRef BCTimer = IpplTimings::getTimer("particleBC"); IpplTimings::startTimer(mainTimer); @@ -197,7 +196,7 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.5*dx; + const double dt = 0.05;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -210,12 +209,8 @@ int main(int argc, char *argv[]){ P->nr_m = nr; - P->E_m.initialize(mesh, FL); P->rho_m.initialize(mesh, FL); - bunch_type bunchBuffer(PL); - - P->initSolver(); P->time_m = 0.0; IpplTimings::startTimer(particleCreation); @@ -257,64 +252,53 @@ int main(int argc, char *argv[]){ P->scatter(); - //IpplTimings::startTimer(SolveTimer); - //P->solver_mp->solve(); - //IpplTimings::stopTimer(SolveTimer); - - //P->gather(); - - //IpplTimings::startTimer(dumpDataTimer); - //P->dumpLandau(); - ////P->dumpLocalDomains(FL, 0); - //IpplTimings::stopTimer(dumpDataTimer); + P->gather(); - //// begin main timestep loop - //msg << "Starting iterations ..." << endl; - //for (unsigned int it=0; itdumpLandau(totalP); + IpplTimings::stopTimer(dumpDataTimer); - // // LeapFrog time stepping https://en.wikipedia.org/wiki/Leapfrog_integration - // // Here, we assume a constant charge-to-mass ratio of -1 for - // // all the particles hence eliminating the need to store mass as - // // an attribute - // // kick + // begin main timestep loop + msg << "Starting iterations ..." << endl; + for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E; - // IpplTimings::stopTimer(PTimer); + // LeapFrog time stepping https://en.wikipedia.org/wiki/Leapfrog_integration + // Here, we assume a constant charge-to-mass ratio of -1 for + // all the particles hence eliminating the need to store mass as + // an attribute + // kick - // //drift - // IpplTimings::startTimer(RTimer); - // P->R = P->R + dt * P->P; - // IpplTimings::stopTimer(RTimer); + IpplTimings::startTimer(PTimer); + P->P = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); - // //Since the particles have moved spatially update them to correct processors - // IpplTimings::startTimer(updateTimer); - // PL.update(*P, bunchBuffer); - // IpplTimings::stopTimer(updateTimer); + //drift + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + IpplTimings::stopTimer(RTimer); + //Apply particle BC + IpplTimings::startTimer(BCTimer); + PL.applyBC(P->R, PL.getRegionLayout().getDomain()); + IpplTimings::stopTimer(BCTimer); - // //scatter the charge onto the underlying grid - // P->scatter(totalP, it+1, hr); + //scatter the charge onto the underlying grid + P->scatter(); - // //Field solve - // IpplTimings::startTimer(SolveTimer); - // P->solver_mp->solve(); - // IpplTimings::stopTimer(SolveTimer); + // Solve for and gather E field + P->gather(); - // // gather E field - // P->gather(); + //kick + IpplTimings::startTimer(PTimer); + P->P = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); - // //kick - // IpplTimings::startTimer(PTimer); - // P->P = P->P - 0.5 * dt * P->E; - // IpplTimings::stopTimer(PTimer); - - // P->time_m += dt; - // IpplTimings::startTimer(dumpDataTimer); - // P->dumpLandau(); - // IpplTimings::stopTimer(dumpDataTimer); - // msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; - //} + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + P->dumpLandau(totalP); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + } msg << "LandauDamping: End." << endl; IpplTimings::stopTimer(mainTimer); diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 5e60fd06a..480e8e5ed 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -106,6 +106,15 @@ // }; //} +namespace Kokkos { //reduction identity must be defined in Kokkos namespace + template<> + struct reduction_identity< ippl::Vector > { + KOKKOS_FORCEINLINE_FUNCTION static ippl::Vector sum() { + return ippl::Vector(); + } + }; +} + namespace ippl { // ParticleAttrib class definition @@ -226,12 +235,16 @@ namespace ippl { scatterPIF(Field& f, const ParticleAttrib, Properties... >& pp) const; - template void gather(Field& f, const ParticleAttrib, Properties...>& pp); + template + void + gatherPIF(Field& f, + const ParticleAttrib, Properties... >& pp) const; + T sum(); T max(); T min(); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 0b6d8aee8..f0f340ab0 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -213,17 +213,19 @@ namespace ippl { const FieldLayout& layout = f.getLayout(); const M& mesh = f.get_mesh(); const vector_type& dx = mesh.getMeshSpacing(); - const vector_type& origin = mesh.getOrigin(); const auto& domain = layout.getDomain(); - vector_type length; + vector_type Len; + Vector N; for (unsigned d=0; d < Dim; ++d) { - length[d] = origin[d] + dx[d] * domain[d].length(); + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; } - + typedef Kokkos::TeamPolicy<> team_policy; typedef Kokkos::TeamPolicy<>::member_type member_type; + using view_type_temp = typename detail::ViewType::view_type; view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); @@ -233,32 +235,31 @@ namespace ippl { size_t Np = *(this->localNum_mp); - size_t N = domain[0].length()*domain[1].length()*domain[2].length(); + size_t flatN = N[0]*N[1]*N[2]; Kokkos::parallel_for("ParticleAttrib::scatterPIF compute", - team_policy(N, Kokkos::AUTO), + team_policy(flatN, Kokkos::AUTO), KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { const size_t flatIndex = teamMember.league_rank(); - const int i = flatIndex % domain[0].length(); - const int j = (int)(flatIndex / domain[0].length()); - const int k = (int)(flatIndex / (domain[0].length() * domain[1].length())); + const int i = flatIndex % N[0]; + const int j = (int)(flatIndex / N[0]); + const int k = (int)(flatIndex / (N[0] * N[1])); FT reducedValue = 0.0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), [=](const size_t idx, FT& innerReduce) { - //This can be done with Ippl vectors but problem maybe the - //complex numbers - Kokkos::complex fx = Kokkos::Experimental::cos((2*pi*i*pp(idx)[0])/length[0]) - -imag*Kokkos::Experimental::sin((2*pi*i*pp(idx)[0])/length[0]); - Kokkos::complex fy = Kokkos::Experimental::cos((2*pi*j*pp(idx)[1])/length[1]) - -imag*Kokkos::Experimental::sin((2*pi*j*pp(idx)[1])/length[1]); - Kokkos::complex fz = Kokkos::Experimental::cos((2*pi*k*pp(idx)[2])/length[2]) - -imag*Kokkos::Experimental::sin((2*pi*k*pp(idx)[2])/length[2]); - + Vector iVec = {i, j, k}; + vector_type kVec; + double arg=0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + arg += kVec[d]*pp(idx)[d]; + } const value_type& val = dview_m(idx); - innerReduce += fx*fy*fz*val; + innerReduce += (Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { @@ -339,6 +340,92 @@ namespace ippl { IpplTimings::stopTimer(gatherTimer); } + template + template + void ParticleAttrib::gatherPIF(Field& f, + const ParticleAttrib< Vector, Properties... >& pp) + const + { + static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("Gather"); + IpplTimings::startTimer(gatherTimer); + + using view_type = typename Field::view_type; + using vector_type = typename M::vector_type; + using value_type = typename ParticleAttrib::value_type; + view_type fview = f.getView(); + const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const M& mesh = f.get_mesh(); + const vector_type& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + vector_type Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + double pi = std::acos(-1.0); + Kokkos::complex imag = {0.0, 1.0}; + + size_t Np = *(this->localNum_mp); + + size_t flatN = N[0]*N[1]*N[2]; + + Kokkos::parallel_for("ParticleAttrib::gatherPIF", + team_policy(Np, Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { + const size_t idx = teamMember.league_rank(); + + value_type reducedValue = 0.0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), + [=](const size_t flatIndex, value_type& innerReduce) + { + const int i = flatIndex % N[0]; + const int j = (int)(flatIndex / N[0]); + const int k = (int)(flatIndex / (N[0] * N[1])); + + Vector iVec = {i, j, k}; + vector_type kVec; + double Dr = 0.0, arg=0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + bool notMid = (iVec[d] != (N[d]/2)); + //For the noMid part see + //https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 + kVec[d] = notMid * 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + Dr += kVec[d] * kVec[d]; + arg += kVec[d]*pp(idx)[d]; + } + + FT Ek; + value_type Ex; + for(size_t d = 0; d < Dim; ++d) { + Ek = -(imag * kVec[d] * fview(i+nghost,j+nghost,k+nghost) / Dr); + //Inverse Fourier transform when the lhs is real + Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) + - Ek.imag() * Kokkos::Experimental::sin(arg)); + } + + innerReduce += Ex; + }, Kokkos::Sum(reducedValue)); + + teamMember.team_barrier(); + + if(teamMember.team_rank() == 0) { + dview_m(idx) = reducedValue; + } + + } + ); + + IpplTimings::stopTimer(gatherTimer); + + } /* @@ -373,6 +460,15 @@ namespace ippl { attrib.gather(f, pp); } + template + inline + void gatherPIF(const ParticleAttrib& attrib, Field& f, + const ParticleAttrib, Properties...>& pp) + { + attrib.gatherPIF(f, pp); + } + + #define DefineParticleReduction(fun, name, op, MPI_Op) \ template \ T ParticleAttrib::name() { \ diff --git a/src/Solver/FFTPeriodicPoissonSolver.hpp b/src/Solver/FFTPeriodicPoissonSolver.hpp index 015400e9a..e6f690942 100644 --- a/src/Solver/FFTPeriodicPoissonSolver.hpp +++ b/src/Solver/FFTPeriodicPoissonSolver.hpp @@ -158,6 +158,8 @@ namespace ippl { const double Len = rmax[d] - origin[d]; bool shift = (iVec[d] > (N[d]/2)); bool notMid = (iVec[d] != (N[d]/2)); + //For the noMid part see + //https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 kVec[d] = notMid * 2 * pi / Len * (iVec[d] - shift * N[d]); } From 676ba0738fd94c117dbf77b08755f27b74a05d31 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 14 Nov 2022 15:36:34 +0100 Subject: [PATCH 008/117] Bugs corrected in PIF and it seems to be working. Need to check more --- alpine/ElectrostaticPIC/ChargedParticles.hpp | 56 ++++++++- alpine/ElectrostaticPIC/LandauDamping.cpp | 6 +- src/Particle/ParticleAttrib.hpp | 124 +++++++++++++++++-- 3 files changed, 171 insertions(+), 15 deletions(-) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index e64417e19..53653f3dc 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -508,7 +508,61 @@ class ChargedParticles : public ippl::ParticleBase { Ippl::Comm->barrier(); } - + + void dumpLandauParticle(size_type totalP) { + + auto Eview = E.getView(); + + double fieldEnergy, ExAmp; + double temp = 0.0; + + Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = Eview(i)[0] * Eview(i)[0]; + valL += myVal; + }, Kokkos::Sum(temp)); + + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy = globaltemp * volume / totalP ; + + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t i, double& valL) + { + double myVal = std::fabs(Eview(i)[0]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + ExAmp = 0.0; + MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + void dumpBumponTail() { const int nghostE = E_m.getNghost(); diff --git a/alpine/ElectrostaticPIC/LandauDamping.cpp b/alpine/ElectrostaticPIC/LandauDamping.cpp index e78dd91bf..2cd0acbcd 100644 --- a/alpine/ElectrostaticPIC/LandauDamping.cpp +++ b/alpine/ElectrostaticPIC/LandauDamping.cpp @@ -209,7 +209,7 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.5*dx; + const double dt = 0.05;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -331,7 +331,7 @@ int main(int argc, char *argv[]){ P->gatherCIC(); IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); + P->dumpLandauParticle(totalP); P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -390,7 +390,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); + P->dumpLandauParticle(totalP); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index f0f340ab0..9aa7f8ca5 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -202,6 +202,8 @@ namespace ippl { const ParticleAttrib< Vector, Properties... >& pp) const { + //Inform msg("scatterPIF"); + static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("Scatter"); IpplTimings::startTimer(scatterTimer); @@ -241,9 +243,11 @@ namespace ippl { team_policy(flatN, Kokkos::AUTO), KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { const size_t flatIndex = teamMember.league_rank(); - const int i = flatIndex % N[0]; - const int j = (int)(flatIndex / N[0]); + const int k = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (k * N[0] * N[1]); + const int i = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); FT reducedValue = 0.0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), @@ -253,8 +257,9 @@ namespace ippl { vector_type kVec; double arg=0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * iVec[d]; arg += kVec[d]*pp(idx)[d]; } const value_type& val = dview_m(idx); @@ -271,6 +276,17 @@ namespace ippl { IpplTimings::stopTimer(scatterTimer); + //double sum = 0.0; + //Kokkos::parallel_reduce("inner product complex", f.getRangePolicy(), + // KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& val) { + // val += std::pow(viewLocal(i, j, k).real(), 2) + std::pow(viewLocal(i, j, k).imag(), 2); + // }, + // Kokkos::Sum(sum) + //); + //double globalSum = 0; + //MPI_Allreduce(&sum, &globalSum, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + + //msg << "rho inner product before all reduce: " << globalSum << endl; static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); IpplTimings::startTimer(scatterAllReduceTimer); @@ -279,6 +295,17 @@ namespace ippl { MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); IpplTimings::stopTimer(scatterAllReduceTimer); + //sum = 0.0; + //Kokkos::parallel_reduce("inner product complex2", f.getRangePolicy(), + // KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& val) { + // val += std::pow(fview(i, j, k).real(), 2) + std::pow(fview(i, j, k).imag(), 2); + // }, + // Kokkos::Sum(sum) + //); + //MPI_Allreduce(&sum, &globalSum, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + // + //msg << "rho inner product after all reduce: " << globalSum << endl; + } @@ -346,6 +373,7 @@ namespace ippl { const ParticleAttrib< Vector, Properties... >& pp) const { + //Inform msg("gatherPIF"); static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("Gather"); IpplTimings::startTimer(gatherTimer); @@ -385,27 +413,31 @@ namespace ippl { Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), [=](const size_t flatIndex, value_type& innerReduce) { - const int i = flatIndex % N[0]; - const int j = (int)(flatIndex / N[0]); const int k = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (k * N[0] * N[1]); + const int i = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); Vector iVec = {i, j, k}; vector_type kVec; double Dr = 0.0, arg=0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - bool notMid = (iVec[d] != (N[d]/2)); + //bool shift = (iVec[d] > (N[d]/2)); + //bool notMid = (iVec[d] != (N[d]/2)); //For the noMid part see //https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 - kVec[d] = notMid * 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = notMid * 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * iVec[d]; Dr += kVec[d] * kVec[d]; arg += kVec[d]*pp(idx)[d]; } - FT Ek; + FT Ek = 0.0; value_type Ex; for(size_t d = 0; d < Dim; ++d) { - Ek = -(imag * kVec[d] * fview(i+nghost,j+nghost,k+nghost) / Dr); + if(Dr != 0.0) + Ek = -(imag * kVec[d] * fview(i+nghost,j+nghost,k+nghost) / Dr); + //Inverse Fourier transform when the lhs is real Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) - Ek.imag() * Kokkos::Experimental::sin(arg)); @@ -423,8 +455,78 @@ namespace ippl { } ); + + //Kokkos::parallel_for("ParticleAttrib::gatherPIF", + // team_policy(Np, Kokkos::AUTO), + // KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { + // const size_t idx = teamMember.league_rank(); + + // for(size_t gd = 0; gd < Dim; ++gd) { + // double reducedValue = 0.0; + // Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), + // [=](const size_t flatIndex, double& innerReduce) + // { + // const int i = flatIndex % N[0]; + // const int j = (int)(flatIndex / N[0]); + // const int k = (int)(flatIndex / (N[0] * N[1])); + + // Vector iVec = {i, j, k}; + // vector_type kVec; + // double Dr = 0.0, arg=0.0; + // for(size_t d = 0; d < Dim; ++d) { + // bool shift = (iVec[d] > (N[d]/2)); + // bool notMid = (iVec[d] != (N[d]/2)); + // //For the noMid part see + // //https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 + // kVec[d] = notMid * 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + // Dr += kVec[d] * kVec[d]; + // arg += kVec[d]*pp(idx)[d]; + // } + + // FT Ek; + // double Ex; + // //for(size_t d = 0; d < Dim; ++d) { + // if(Dr != 0.0) + // Ek = -(imag * kVec[gd] * fview(i+nghost,j+nghost,k+nghost) / Dr); + // else + // Ek = 0.0; + // + // //Inverse Fourier transform when the lhs is real + // Ex = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) + // - Ek.imag() * Kokkos::Experimental::sin(arg)); + // //} + // + // innerReduce += Ex; + // }, reducedValue); + + // teamMember.team_barrier(); + + // if(teamMember.team_rank() == 0) { + // dview_m(idx)[gd] = reducedValue; + // } + + // } + // } + //); + + IpplTimings::stopTimer(gatherTimer); + //double Energy = 0.0; + + //Kokkos::parallel_reduce("E Energy", Np, + // KOKKOS_CLASS_LAMBDA(const int i, double& valL){ + // double myVal = dot(dview_m(i), dview_m(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(Energy)); + + //double gEnergy = 0.0; + + //MPI_Reduce(&Energy, &gEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + //msg << "E energy in gatherPIF: " << gEnergy << endl; + + } From 0a8f787b2991d43740755156fc51922848832c75 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 17 Nov 2022 05:25:19 +0100 Subject: [PATCH 009/117] [-K K] implementation --- alpine/ElectrostaticPIC/ChargedParticles.hpp | 63 +++++++++ alpine/ElectrostaticPIC/LandauDamping.cpp | 4 +- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 63 +++++++++ alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 4 +- src/Particle/ParticleAttrib.hpp | 122 +++--------------- 5 files changed, 149 insertions(+), 107 deletions(-) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 53653f3dc..8e57432e1 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -563,6 +563,69 @@ class ChargedParticles : public ippl::ParticleBase { } + void dumpEnergy(size_type totalP) { + + auto Eview = E.getView(); + + double potentialEnergy, kineticEnergy; + double temp = 0.0; + + Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Eview(i), Eview(i)).apply(); + valL += myVal; + }, Kokkos::Sum(temp)); + + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + potentialEnergy = 0.5 * globaltemp * volume / totalP ; + + + auto Pview = P.getView(); + auto qView = q.getView(); + + temp = 0.0; + + Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + myVal *= -qView(i); + valL += myVal; + }, Kokkos::Sum(temp)); + + temp *= 0.5; + globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + kineticEnergy = globaltemp; + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/Energy_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + } + + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << endl; + + } + + Ippl::Comm->barrier(); + } + + void dumpBumponTail() { const int nghostE = E_m.getNghost(); diff --git a/alpine/ElectrostaticPIC/LandauDamping.cpp b/alpine/ElectrostaticPIC/LandauDamping.cpp index 2cd0acbcd..85448c342 100644 --- a/alpine/ElectrostaticPIC/LandauDamping.cpp +++ b/alpine/ElectrostaticPIC/LandauDamping.cpp @@ -177,6 +177,7 @@ int main(int argc, char *argv[]){ const size_type totalP = std::atoll(argv[4]); const unsigned int nt = std::atoi(argv[5]); + const double dt = std::atof(argv[9]);;//0.5*dx; msg << "Landau damping" << endl @@ -209,7 +210,6 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.05;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -332,6 +332,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpLandauParticle(totalP); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -391,6 +392,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); P->dumpLandauParticle(totalP); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index ecb7ffd0a..5e63bba8f 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -182,6 +182,69 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Ippl::Comm->barrier(); } + + + void dumpEnergy(size_type totalP) { + + auto Eview = E.getView(); + + double potentialEnergy, kineticEnergy; + double temp = 0.0; + + Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Eview(i), Eview(i)).apply(); + valL += myVal; + }, Kokkos::Sum(temp)); + + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + potentialEnergy = 0.5 * globaltemp * volume / totalP ; + + + auto Pview = P.getView(); + auto qView = q.getView(); + + temp = 0.0; + + Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + myVal *= -qView(i); + valL += myVal; + }, Kokkos::Sum(temp)); + + temp *= 0.5; + globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + kineticEnergy = globaltemp; + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/Energy_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + } + + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << endl; + + } + + Ippl::Comm->barrier(); + } //void dumpBumponTail() { diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 054ff5f18..8db6b9dce 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -164,6 +164,7 @@ int main(int argc, char *argv[]){ const size_type totalP = std::atoll(argv[4]); const unsigned int nt = std::atoi(argv[5]); + const double dt = std::atof(argv[6]); msg << "Landau damping" << endl @@ -196,7 +197,6 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.05;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -256,6 +256,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpLandau(totalP); + P->dumpEnergy(totalP); IpplTimings::stopTimer(dumpDataTimer); // begin main timestep loop @@ -296,6 +297,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); P->dumpLandau(totalP); + P->dumpEnergy(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 9aa7f8ca5..ee0c2084e 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -257,9 +257,10 @@ namespace ippl { vector_type kVec; double arg=0.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - kVec[d] = 2 * pi / Len[d] * iVec[d]; + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * iVec[d]; + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); arg += kVec[d]*pp(idx)[d]; } const value_type& val = dview_m(idx); @@ -276,18 +277,6 @@ namespace ippl { IpplTimings::stopTimer(scatterTimer); - //double sum = 0.0; - //Kokkos::parallel_reduce("inner product complex", f.getRangePolicy(), - // KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& val) { - // val += std::pow(viewLocal(i, j, k).real(), 2) + std::pow(viewLocal(i, j, k).imag(), 2); - // }, - // Kokkos::Sum(sum) - //); - //double globalSum = 0; - //MPI_Allreduce(&sum, &globalSum, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - - //msg << "rho inner product before all reduce: " << globalSum << endl; - static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); IpplTimings::startTimer(scatterAllReduceTimer); int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); @@ -295,17 +284,6 @@ namespace ippl { MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); IpplTimings::stopTimer(scatterAllReduceTimer); - //sum = 0.0; - //Kokkos::parallel_reduce("inner product complex2", f.getRangePolicy(), - // KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& val) { - // val += std::pow(fview(i, j, k).real(), 2) + std::pow(fview(i, j, k).imag(), 2); - // }, - // Kokkos::Sum(sum) - //); - //MPI_Allreduce(&sum, &globalSum, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - // - //msg << "rho inner product after all reduce: " << globalSum << endl; - } @@ -422,12 +400,10 @@ namespace ippl { vector_type kVec; double Dr = 0.0, arg=0.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //bool notMid = (iVec[d] != (N[d]/2)); - //For the noMid part see - //https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 - //kVec[d] = notMid * 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - kVec[d] = 2 * pi / Len[d] * iVec[d]; + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * iVec[d]; + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); Dr += kVec[d] * kVec[d]; arg += kVec[d]*pp(idx)[d]; } @@ -435,12 +411,17 @@ namespace ippl { FT Ek = 0.0; value_type Ex; for(size_t d = 0; d < Dim; ++d) { - if(Dr != 0.0) + if(Dr != 0.0) { Ek = -(imag * kVec[d] * fview(i+nghost,j+nghost,k+nghost) / Dr); + } - //Inverse Fourier transform when the lhs is real - Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) - - Ek.imag() * Kokkos::Experimental::sin(arg)); + //Inverse Fourier transform when the lhs is real. Use when + //we choose k \in [0 K) instead of from [-K/2+1 K/2] + //Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) + // - Ek.imag() * Kokkos::Experimental::sin(arg)); + Ek *= (Kokkos::Experimental::cos(arg) + + imag * Kokkos::Experimental::sin(arg)); + Ex[d] = Ek.real(); } innerReduce += Ex; @@ -456,77 +437,8 @@ namespace ippl { ); - //Kokkos::parallel_for("ParticleAttrib::gatherPIF", - // team_policy(Np, Kokkos::AUTO), - // KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { - // const size_t idx = teamMember.league_rank(); - - // for(size_t gd = 0; gd < Dim; ++gd) { - // double reducedValue = 0.0; - // Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), - // [=](const size_t flatIndex, double& innerReduce) - // { - // const int i = flatIndex % N[0]; - // const int j = (int)(flatIndex / N[0]); - // const int k = (int)(flatIndex / (N[0] * N[1])); - - // Vector iVec = {i, j, k}; - // vector_type kVec; - // double Dr = 0.0, arg=0.0; - // for(size_t d = 0; d < Dim; ++d) { - // bool shift = (iVec[d] > (N[d]/2)); - // bool notMid = (iVec[d] != (N[d]/2)); - // //For the noMid part see - // //https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 - // kVec[d] = notMid * 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - // Dr += kVec[d] * kVec[d]; - // arg += kVec[d]*pp(idx)[d]; - // } - - // FT Ek; - // double Ex; - // //for(size_t d = 0; d < Dim; ++d) { - // if(Dr != 0.0) - // Ek = -(imag * kVec[gd] * fview(i+nghost,j+nghost,k+nghost) / Dr); - // else - // Ek = 0.0; - // - // //Inverse Fourier transform when the lhs is real - // Ex = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) - // - Ek.imag() * Kokkos::Experimental::sin(arg)); - // //} - // - // innerReduce += Ex; - // }, reducedValue); - - // teamMember.team_barrier(); - - // if(teamMember.team_rank() == 0) { - // dview_m(idx)[gd] = reducedValue; - // } - - // } - // } - //); - - IpplTimings::stopTimer(gatherTimer); - //double Energy = 0.0; - - //Kokkos::parallel_reduce("E Energy", Np, - // KOKKOS_CLASS_LAMBDA(const int i, double& valL){ - // double myVal = dot(dview_m(i), dview_m(i)).apply(); - // valL += myVal; - // }, Kokkos::Sum(Energy)); - - //double gEnergy = 0.0; - - //MPI_Reduce(&Energy, &gEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - - //msg << "E energy in gatherPIF: " << gEnergy << endl; - - } From 2ca3462069c4e87bba52cd27a46c8a308469f450 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 2 Dec 2022 09:56:51 +0100 Subject: [PATCH 010/117] Version which has correct energy error convergence --- alpine/ElectrostaticPIC/ChargedParticles.hpp | 22 ++--- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 82 +++++++++++++++++-- 2 files changed, 85 insertions(+), 19 deletions(-) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 8e57432e1..61730648d 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -563,24 +563,26 @@ class ChargedParticles : public ippl::ParticleBase { } - void dumpEnergy(size_type totalP) { + void dumpEnergy(size_type /*totalP*/) { - auto Eview = E.getView(); double potentialEnergy, kineticEnergy; + //auto Eview = E.getView(); double temp = 0.0; - Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = dot(Eview(i), Eview(i)).apply(); - valL += myVal; - }, Kokkos::Sum(temp)); + //Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = dot(Eview(i), Eview(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(temp)); double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - potentialEnergy = 0.5 * globaltemp * volume / totalP ; + //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //potentialEnergy = 0.5 * globaltemp * volume / totalP ; + rho_m = dot(E_m, E_m); + potentialEnergy = 0.5 * hr_m[0] * hr_m[1] * hr_m[2] * rho_m.sum(); auto Pview = P.getView(); auto qView = q.getView(); diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 5e63bba8f..9ff279c18 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -184,24 +184,88 @@ class ChargedParticlesPIF : public ippl::ParticleBase { } - void dumpEnergy(size_type totalP) { + void dumpEnergy(size_type /*totalP*/) { - auto Eview = E.getView(); double potentialEnergy, kineticEnergy; double temp = 0.0; - Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = dot(Eview(i), Eview(i)).apply(); - valL += myVal; - }, Kokkos::Sum(temp)); + //auto Eview = E.getView(); + //Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = dot(Eview(i), Eview(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(temp)); + + + + auto rhoview = rho_m.getView(); + const int nghost = rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rho_m.getLayout(); + const Mesh_t& mesh = rho_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Potential energy", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * iVec[d]; + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + double myVal = 0.0; + for(size_t d = 0; d < Dim; ++d) { + if(Dr != 0.0) { + Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + } + myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } + + //double myVal = rhoview(i,j,k).real() * rhoview(i,j,k).real() + + // rhoview(i,j,k).imag() * rhoview(i,j,k).imag(); + //if(Dr != 0.0) { + // myVal /= Dr; + //} + //else { + // myVal = 0.0; + //} + valL += myVal; + + }, Kokkos::Sum(temp)); + double globaltemp = 0.0; MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - potentialEnergy = 0.5 * globaltemp * volume / totalP ; - + //potentialEnergy = 0.5 * globaltemp * volume / totalP ; + potentialEnergy = 0.25 * 0.5 * globaltemp * volume; auto Pview = P.getView(); auto qView = q.getView(); From f79439d542a8d7bfb5d04c8b1318cbff143b19f8 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 5 Dec 2022 08:52:01 +0100 Subject: [PATCH 011/117] PinT directory created and necessary files copied and renamed --- alpine/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt index 3a6d622c5..a3884ca14 100644 --- a/alpine/CMakeLists.txt +++ b/alpine/CMakeLists.txt @@ -16,6 +16,7 @@ endmacro() add_subdirectory (ElectrostaticPIC) add_subdirectory (ElectrostaticPIF) +add_subdirectory (PinT) # vi: set et ts=4 sw=4 sts=4: From 2890a1d3221522da9b1f26272959cf86cff3149e Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 5 Dec 2022 12:26:11 +0100 Subject: [PATCH 012/117] PIF and PIC integrators made. Need to write parareal now. --- alpine/ElectrostaticPIC/LandauDamping.cpp | 3 ++- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/alpine/ElectrostaticPIC/LandauDamping.cpp b/alpine/ElectrostaticPIC/LandauDamping.cpp index 85448c342..591dff32f 100644 --- a/alpine/ElectrostaticPIC/LandauDamping.cpp +++ b/alpine/ElectrostaticPIC/LandauDamping.cpp @@ -1,6 +1,6 @@ // Landau Damping Test // Usage: -// srun ./LandauDamping --info 10 +// srun ./LandauDamping
--info 10 // nx = No. cell-centered points in the x-direction // ny = No. cell-centered points in the y-direction // nz = No. cell-centered points in the z-direction @@ -13,6 +13,7 @@ // simulations. // ovfactor = Over-allocation factor for the buffers used in the communication. Typical // values are 1.0, 2.0. Value 1.0 means no over-allocation. +// dt = Time stepize // Example: // srun ./LandauDamping 128 128 128 10000 10 FFT 0.01 2.0 --info 10 // diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 8db6b9dce..93e9e7796 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -1,13 +1,14 @@ // Electrostatic Landau damping test with Particle-in-Fourier schemes // Usage: -// srun ./LandauDampingPIF --info 10 +// srun ./LandauDampingPIF
--info 5 // nx = No. of Fourier modes in the x-direction // ny = No. of Fourier modes in the y-direction // nz = No. of Fourier modes in the z-direction // Np = Total no. of macro-particles in the simulation // Nt = Number of time steps +// dt = Time stepsize // Example: -// srun ./LandauDampingPIF 128 128 128 10000 10 --info 10 +// srun ./LandauDampingPIF 128 128 128 10000 10 --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland From 4a9fa74d8309099fb62d4148aff6ce54dcfea56a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 5 Dec 2022 16:19:03 +0100 Subject: [PATCH 013/117] In the middle of MPI send and receive --- src/Particle/ParticleAttrib.h | 74 ----------------------------------- 1 file changed, 74 deletions(-) diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 480e8e5ed..bcfde8d3c 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -32,80 +32,6 @@ #include "Expression/IpplExpressions.h" #include "Particle/ParticleAttribBase.h" - -//namespace sample { // namespace helps with name resolution in reduction identity -// template< typename T, int N0, int N1, int N2 > -// struct array_type { -// -// using view_type = typename ippl::detail::ViewType::view_type; -// view_type viewTemp{"viewLocal",N0,N1,N2}; -// using mdrange_type3 = Kokkos::MDRangePolicy>; -// -// //KOKKOS_INLINE_FUNCTION // Default constructor - Initialize to 0's -// array_type() { -// Kokkos::deep_copy(viewTemp, 0.0); -// //Kokkos::parallel_for( -// // "array_type default constructor", -// // mdrange_type3({0, 0, 0}, -// // {viewTemp.extent(0), -// // viewTemp.extent(1), -// // viewTemp.extent(2)}), -// // KOKKOS_CLASS_LAMBDA(const size_t i, -// // const size_t j, -// // const size_t k) -// // { -// // viewTemp(i,j,k) = 0.0; -// // }); -// } -// //KOKKOS_INLINE_FUNCTION // Copy Constructor -// array_type(const array_type & rhs) { -// auto rhsView = rhs.viewTemp; -// Kokkos::deep_copy(viewTemp, rhsView); -// //Kokkos::parallel_for( -// // "array_type copy constructor", -// // mdrange_type3({0, 0, 0}, -// // {viewTemp.extent(0), -// // viewTemp.extent(1), -// // viewTemp.extent(2)}), -// // KOKKOS_CLASS_LAMBDA(const size_t i, -// // const size_t j, -// // const size_t k) -// // { -// // viewTemp(i,j,k) = rhsView(i,j,k); -// // }); -// -// } -// KOKKOS_FUNCTION // add operator -// array_type& operator+=(const array_type& src) { -// auto srcView = src.viewTemp; -// Kokkos::parallel_for( -// "array_type operator +=", -// mdrange_type3({0, 0, 0}, -// {viewTemp.extent(0), -// viewTemp.extent(1), -// viewTemp.extent(2)}), -// KOKKOS_CLASS_LAMBDA(const size_t i, -// const size_t j, -// const size_t k) -// { -// viewTemp(i,j,k) += srcView(i,j,k); -// }); -// -// return *this; -// } -// }; -// typedef array_type,34,34,34> ValueType; -//} -// -//namespace Kokkos { //reduction identity must be defined in Kokkos namespace -// template<> -// struct reduction_identity< sample::ValueType > { -// KOKKOS_FORCEINLINE_FUNCTION static sample::ValueType sum() { -// return sample::ValueType(); -// } -// }; -//} - namespace Kokkos { //reduction identity must be defined in Kokkos namespace template<> struct reduction_identity< ippl::Vector > { From 6888fed70bc6cdfdd115cd545b518581a5605ec8 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 6 Dec 2022 14:32:53 +0100 Subject: [PATCH 014/117] PinT directory and files added in alpine --- alpine/PinT/CMakeLists.txt | 26 +++ alpine/PinT/ChargedParticlesPinT.hpp | 320 +++++++++++++++++++++++++++ alpine/PinT/LandauDampingPinT.cpp | 315 ++++++++++++++++++++++++++ alpine/PinT/LeapFrogPIC.cpp | 60 +++++ alpine/PinT/LeapFrogPIF.cpp | 56 +++++ 5 files changed, 777 insertions(+) create mode 100644 alpine/PinT/CMakeLists.txt create mode 100644 alpine/PinT/ChargedParticlesPinT.hpp create mode 100644 alpine/PinT/LandauDampingPinT.cpp create mode 100644 alpine/PinT/LeapFrogPIC.cpp create mode 100644 alpine/PinT/LeapFrogPIF.cpp diff --git a/alpine/PinT/CMakeLists.txt b/alpine/PinT/CMakeLists.txt new file mode 100644 index 000000000..f73338484 --- /dev/null +++ b/alpine/PinT/CMakeLists.txt @@ -0,0 +1,26 @@ +file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") +message (STATUS "Adding index test found in ${_relPath}") + +include_directories ( + ${CMAKE_SOURCE_DIR}/src +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${Kokkos_DIR}/.. +) + +set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) +set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) + +add_executable (LandauDampingPinT LandauDampingPinT.cpp) +target_link_libraries (LandauDampingPinT ${IPPL_LIBS}) + +# vi: set et ts=4 sw=4 sts=4: + +# Local Variables: +# mode: cmake +# cmake-tab-width: 4 +# indent-tabs-mode: nil +# require-final-newline: nil +# End: diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp new file mode 100644 index 000000000..98a80618d --- /dev/null +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -0,0 +1,320 @@ +// ChargedParticlesPinT header file +// Defines a particle attribute for charged particles to be used in +// test programs +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "Ippl.h" + +// dimension of our positions +constexpr unsigned Dim = 3; + +// some typedefs +typedef ippl::ParticleSpatialLayout PLayout_t; +typedef ippl::UniformCartesian Mesh_t; +typedef ippl::FieldLayout FieldLayout_t; + +using size_type = ippl::detail::size_type; + +template +using Vector = ippl::Vector; + +template +using Field = ippl::Field; + +template +using ParticleAttrib = ippl::ParticleAttrib; + +typedef Vector Vector_t; +typedef Field Field_t; +typedef Field, Dim> CxField_t; +typedef Field VField_t; +typedef ippl::FFTPeriodicPoissonSolver Solver_t; + +const double pi = std::acos(-1.0); + +// Test programs have to define this variable for VTK dump purposes +extern const char* TestName; + +template +class ChargedParticlesPinT : public ippl::ParticleBase { +public: + CxField_t rhoPIF_m; + Field_t rhoPIC_m; + VField_t EfieldPIC_m; + + Vector nr_m; + + ippl::e_dim_tag decomp_m[Dim]; + + Vector_t hr_m; + Vector_t rmin_m; + Vector_t rmax_m; + + double Q_m; + + double time_m; + + double rhoNorm_m; + + +public: + ParticleAttrib q; // charge + typename ippl::ParticleBase::particle_position_type P; // particle velocity + typename ippl::ParticleBase::particle_position_type E; // electric field at particle position + + + typename ippl::ParticleBase::particle_position_type R0; // Initial particle positions at t=0 + typename ippl::ParticleBase::particle_position_type P0; // Initial particle velocities at t=0 + + typename ippl::ParticleBase::particle_position_type Rend; // Particle positions at end of each time slice + typename ippl::ParticleBase::particle_position_type Pend; // Particle velocities at end of each time slice + + typename ippl::ParticleBase::particle_position_type GR; // G(R^(k-1)_n) + typename ippl::ParticleBase::particle_position_type GP; // G(P^(k-1)_n) + + ChargedParticlesPinT(PLayout& pl, + Vector_t hr, + Vector_t rmin, + Vector_t rmax, + ippl::e_dim_tag decomp[Dim], + double Q) + : ippl::ParticleBase(pl) + , hr_m(hr) + , rmin_m(rmin) + , rmax_m(rmax) + , Q_m(Q) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + this->addAttribute(R0); + this->addAttribute(P0); + this->addAttribute(Rend); + this->addAttribute(Pend); + this->addAttribute(GR); + this->addAttribute(GP); + setupBCs(); + for (unsigned int i = 0; i < Dim; i++) + decomp_m[i]=decomp[i]; + } + + ~ChargedParticlesPinT(){ } + + void setupBCs() { + setBCAllPeriodic(); + } + + + void initFFTSolver() { + ippl::ParameterList sp; + sp.add("output_type", Solver_t::GRAD); + sp.add("use_heffte_defaults", false); + sp.add("use_pencils", true); + sp.add("use_reorder", false); + sp.add("use_gpu_aware", true); + sp.add("comm", ippl::p2p_pl); + sp.add("r2c_direction", 0); + + solver_mp = std::make_shared(); + + solver_mp->mergeParameters(sp); + + solver_mp->setRhs(rhoPIC_m); + + solver_mp->setLhs(EfieldPIC_m); + } + + + void dumpLandau(size_type totalP) { + + auto Eview = E.getView(); + + double fieldEnergy, ExAmp; + double temp = 0.0; + + Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = Eview(i)[0] * Eview(i)[0]; + valL += myVal; + }, Kokkos::Sum(temp)); + + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy = globaltemp * volume / totalP ; + + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t i, double& valL) + { + double myVal = std::fabs(Eview(i)[0]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + ExAmp = 0.0; + MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + + void dumpEnergy(size_type /*totalP*/) { + + + double potentialEnergy, kineticEnergy; + double temp = 0.0; + + + auto rhoview = rho_m.getView(); + const int nghost = rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rho_m.getLayout(); + const Mesh_t& mesh = rho_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Potential energy", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * iVec[d]; + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + double myVal = 0.0; + for(size_t d = 0; d < Dim; ++d) { + if(Dr != 0.0) { + Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + } + myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } + + //double myVal = rhoview(i,j,k).real() * rhoview(i,j,k).real() + + // rhoview(i,j,k).imag() * rhoview(i,j,k).imag(); + //if(Dr != 0.0) { + // myVal /= Dr; + //} + //else { + // myVal = 0.0; + //} + valL += myVal; + + }, Kokkos::Sum(temp)); + + + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //potentialEnergy = 0.5 * globaltemp * volume / totalP ; + potentialEnergy = 0.25 * 0.5 * globaltemp * volume; + + auto Pview = P.getView(); + auto qView = q.getView(); + + temp = 0.0; + + Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + myVal *= -qView(i); + valL += myVal; + }, Kokkos::Sum(temp)); + + temp *= 0.5; + globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + kineticEnergy = globaltemp; + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/Energy_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + } + + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << endl; + + } + + Ippl::Comm->barrier(); + } + +private: + void setBCAllPeriodic() { + + this->setParticleBC(ippl::BC::PERIODIC); + } + +}; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp new file mode 100644 index 000000000..5e2694088 --- /dev/null +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -0,0 +1,315 @@ +// Parallel-in-time (PinT) method Parareal combined with Particle-in-cell +// and Particle-in-Fourier schemes. The example is electrostatic Landau +// damping. The implementation of Parareal follows the open source implementation +// https://github.com/Parallel-in-Time/PararealF90 by Daniel Ruprecht. The corresponding +// publication is Ruprecht, Daniel. "Shared memory pipelined parareal." +// European Conference on Parallel Processing. Springer, Cham, 2017. +// +// Usage: +// srun ./LandauDampingPinT --info 5 +// nx = No. of Fourier modes in the x-direction +// ny = No. of Fourier modes in the y-direction +// nz = No. of Fourier modes in the z-direction +// Np = Total no. of macro-particles in the simulation +// Example: +// srun ./LandauDampingPinT 128 128 128 10000 20 0.05 0.05 1e-5 100 --info 5 +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPinT.hpp" +#include "LeapFrogPIC.cpp" +#include "LeapFrogPIF.cpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T k, alpha, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& k_, const T& alpha_, + const T& u_) + : k(k_), alpha(alpha_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = x + (alpha * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = 1 + (alpha * std::cos(k * x)); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type alpha; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + value_type& alpha_, T& k_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + alpha(alpha_), k(k_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = u / (1 + alpha); + Newton1D solver(k[d], alpha, u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& alpha, const double& k) { + double cdf = x + (alpha / k) * std::sin(k * x); + return cdf; +} + +KOKKOS_FUNCTION +double PDF(const Vector_t& xvec, const double& alpha, + const Vector_t& kw, const unsigned Dim) { + double pdf = 1.0; + + for (unsigned d = 0; d < Dim; ++d) { + pdf *= (1.0 + alpha * std::cos(kw[d] * xvec[d])); + } + return pdf; +} + +const char* TestName = "LandauDampingPinT"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + Inform msg("LandauDampingPinT"); + Inform msg2all("LandauDampingPinT",INFORM_ALL_NODES); + + ippl::Vector nr = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[4]); + const double tEnd = std::atof(argv[5]); + const double dtSlice = tEnd / Ippl::Comm->size(); + const double dtFine = std::atof(argv[6]); + const double dtCoarse = std::atof(argv[7]); + const unsigned int ntFine = (unsigned int)(dtSlice / dtFine); + const unsigned int ntCoarse = (unsigned int)(dtSlice / dtCoarse); + const double tol = std::atof(argv[8]); + const unsigned int maxIter = std::atoi(argv[9]); + + const double tStartMySlice = Ippl::Comm->rank() * dtSlice; + const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; + + msg << "Parareal Landau damping" + << endl + << "Slice dT: " << dtSlice + << "No. of fine time steps: " << ntFine + << "No. of coarse time steps: " << ntCoarse + << endl + << "Tolerance: " << tol + << "Max. iterations: " << maxIter + << endl + << " Np= " + << totalP << " Fourier modes = " << nr + << endl; + + using bunch_type = ChargedParticlesPinT; + + std::unique_ptr P; + + ippl::NDIndex domain; + for (unsigned i = 0; i< Dim; i++) { + domain[i] = ippl::Index(nr[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t kw = {0.5, 0.5, 0.5}; + double alpha = 0.05; + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw ; + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; + + Vector_t hr = {dx, dy, dz}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + + const bool isAllPeriodic=true; + Mesh_t mesh(domain, hr, origin); + FieldLayout_t FL(domain, decomp, isAllPeriodic); + PLayout_t PL(FL, mesh); + + //Q = -\int\int f dx dv + double Q = -rmax[0] * rmax[1] * rmax[2]; + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + + P->nr_m = nr; + + P->rhoPIF_m.initialize(mesh, FL); + P->rhoPIC_m.initialize(mesh, FL); + P->EfieldPIC_m.initialize(mesh, FL); + + P->initFFTSolver(); + P->time_m = 0.0; + + IpplTimings::startTimer(particleCreation); + + Vector_t minU, maxU; + for (unsigned d = 0; d create(nloc); + //Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m/totalP; + msg << "particles created and initial conditions assigned " << endl; + + //Copy initial conditions as they are needed later + Kokkos::deep_copy(P->R0.getView(), P->R.getView()); + Kokkos::deep_copy(P->P0.getView(), P->P.getView()); + + P->scatter(P->q, P->rhoPIC_m, P->R); + P->rhoPIC_m = P->rhoPIC_m / (hr[0] * hr[1] * hr[2]); + + P->rhoPIC_m = P->rhoPIC_m - (P->Q_m/((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2]))); + + P->solver_mp->solve(); + + P->gather(P->E, P->EfieldPIC_m, P->R); + + //Get initial guess for ranks other than 0 by propagating the coarse solver + if (Ippl::Comm->rank() > 0) { + LeapFrogPIC(*P, P->R, P->P, Ippl::Comm->rank()*ntCoarse, dtCoarse); + } + + Ippl::Comm->barrier(); + + Kokkos::deep_copy(P->GR.getView(), P->R.getView()); + Kokkos::deep_copy(P->GP.getView(), P->P.getView()); + + //Run the coarse integrator to get the values at the end of the time slice + LeapFrogPIC(*P, P->GR, P->GP, ntCoarse, dtCoarse); + + + msg << "Starting parareal iterations ..." << endl; + for (unsigned int it=0; itR, P->P, ntFine, dtFine); + + //Difference = Fine - Coarse + P->Rend = P->R - P->GR; + P->Pend = P->P - P->GP; + + if(Ippl::Comm-> rank() > 0) { + + MPI_Recv(P->R.getView().data(), nloc, + MPI_BYTE, src, tag, comm_m, &status); + + + msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + } + + msg << "LandauDamping: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + return 0; +} diff --git a/alpine/PinT/LeapFrogPIC.cpp b/alpine/PinT/LeapFrogPIC.cpp new file mode 100644 index 000000000..0de516a80 --- /dev/null +++ b/alpine/PinT/LeapFrogPIC.cpp @@ -0,0 +1,60 @@ +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +//#include "ChargedParticlesPinT.hpp" + +void LeapFrogPIC(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int nt, + const double dt) { + + const auto& PL = P.getLayout(); + + const auto& hr = P.hr_m; + const auto& rmax = P.rmax_m; + const auto& rmax = P.rmin_m; + for (unsigned int it=0; itsolve(); + + // gather E field + P.gather(P.E, P.EfieldPIC_m, Rtemp); + + //kick + Ptemp = Ptemp - 0.5 * dt * P.E; + } + +} diff --git a/alpine/PinT/LeapFrogPIF.cpp b/alpine/PinT/LeapFrogPIF.cpp new file mode 100644 index 000000000..399ffb1f6 --- /dev/null +++ b/alpine/PinT/LeapFrogPIF.cpp @@ -0,0 +1,56 @@ +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +//#include "ChargedParticlesPinT.hpp" + +void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int nt, + const double dt) { + + const auto& PL = P.getLayout(); + const auto& rmax = P.rmax_m; + const auto& rmax = P.rmin_m; + + for (unsigned int it=0; it Date: Tue, 6 Dec 2022 15:36:05 +0100 Subject: [PATCH 015/117] additional classes created and code modified. Need to do MPI send/recv --- alpine/PinT/ChargedParticlesPinT.hpp | 12 +---- alpine/PinT/LandauDampingPinT.cpp | 80 ++++++++++++++++++---------- alpine/PinT/StatesBeginSlice.hpp | 31 +++++++++++ alpine/PinT/StatesEndSlice.hpp | 31 +++++++++++ 4 files changed, 116 insertions(+), 38 deletions(-) create mode 100644 alpine/PinT/StatesBeginSlice.hpp create mode 100644 alpine/PinT/StatesEndSlice.hpp diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 98a80618d..cfc53f50b 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -72,18 +72,12 @@ class ChargedParticlesPinT : public ippl::ParticleBase { public: ParticleAttrib q; // charge - typename ippl::ParticleBase::particle_position_type P; // particle velocity + typename ippl::ParticleBase::particle_position_type P; // G(P^(k-1)_n) typename ippl::ParticleBase::particle_position_type E; // electric field at particle position - typename ippl::ParticleBase::particle_position_type R0; // Initial particle positions at t=0 typename ippl::ParticleBase::particle_position_type P0; // Initial particle velocities at t=0 - typename ippl::ParticleBase::particle_position_type Rend; // Particle positions at end of each time slice - typename ippl::ParticleBase::particle_position_type Pend; // Particle velocities at end of each time slice - - typename ippl::ParticleBase::particle_position_type GR; // G(R^(k-1)_n) - typename ippl::ParticleBase::particle_position_type GP; // G(P^(k-1)_n) ChargedParticlesPinT(PLayout& pl, Vector_t hr, @@ -103,10 +97,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { this->addAttribute(E); this->addAttribute(R0); this->addAttribute(P0); - this->addAttribute(Rend); - this->addAttribute(Pend); - this->addAttribute(GR); - this->addAttribute(GP); setupBCs(); for (unsigned int i = 0; i < Dim; i++) decomp_m[i]=decomp[i]; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 5e2694088..78c3a2c95 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -30,6 +30,8 @@ // #include "ChargedParticlesPinT.hpp" +#include "StatesBeginSlice.hpp" +#include "StatesEndSlice.hpp" #include "LeapFrogPIC.cpp" #include "LeapFrogPIF.cpp" #include @@ -193,8 +195,12 @@ int main(int argc, char *argv[]){ << endl; using bunch_type = ChargedParticlesPinT; + using states_begin_type = StatesBeginSlice; + using states_end_type = StatesEndSlice; - std::unique_ptr P; + std::unique_ptr Pcoarse; + std::unique_ptr Pbegin; + std::unique_ptr Pend; ippl::NDIndex domain; for (unsigned i = 0; i< Dim; i++) { @@ -225,16 +231,18 @@ int main(int argc, char *argv[]){ //Q = -\int\int f dx dv double Q = -rmax[0] * rmax[1] * rmax[2]; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + Pcoarse = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); - P->nr_m = nr; + Pcoarse->nr_m = nr; - P->rhoPIF_m.initialize(mesh, FL); - P->rhoPIC_m.initialize(mesh, FL); - P->EfieldPIC_m.initialize(mesh, FL); + Pcoarse->rhoPIF_m.initialize(mesh, FL); + Pcoarse->rhoPIC_m.initialize(mesh, FL); + Pcoarse->EfieldPIC_m.initialize(mesh, FL); - P->initFFTSolver(); - P->time_m = 0.0; + Pcoarse->initFFTSolver(); + Pcoarse->time_m = 0.0; IpplTimings::startTimer(particleCreation); @@ -246,64 +254,82 @@ int main(int argc, char *argv[]){ size_type nloc = totalP; - P->create(nloc); + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); //Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( - P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, alpha, kw, minU, maxU)); Kokkos::fence(); Ippl::Comm->barrier(); IpplTimings::stopTimer(particleCreation); - P->q = P->Q_m/totalP; + Pcoarse->q = Pcoarse->Q_m/totalP; msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later - Kokkos::deep_copy(P->R0.getView(), P->R.getView()); - Kokkos::deep_copy(P->P0.getView(), P->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - P->scatter(P->q, P->rhoPIC_m, P->R); - P->rhoPIC_m = P->rhoPIC_m / (hr[0] * hr[1] * hr[2]); + Pcoarse->rhoPIC_m = 0.0; + Pcoarse->scatter(Pcoarse->q, Pcoarse->rhoPIC_m, Pcoarse->R); + Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m / (hr[0] * hr[1] * hr[2]); - P->rhoPIC_m = P->rhoPIC_m - (P->Q_m/((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2]))); + Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m - (Pcoarse->Q_m/((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2]))); - P->solver_mp->solve(); + Pcoarse->solver_mp->solve(); - P->gather(P->E, P->EfieldPIC_m, P->R); + Pcoarse->gather(Pcoarse->E, Pcoarse->EfieldPIC_m, Pcoarse->R); //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { - LeapFrogPIC(*P, P->R, P->P, Ippl::Comm->rank()*ntCoarse, dtCoarse); + LeapFrogPIC(*Pcoarse, Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse); } Ippl::Comm->barrier(); - Kokkos::deep_copy(P->GR.getView(), P->R.getView()); - Kokkos::deep_copy(P->GP.getView(), P->P.getView()); + + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + + //Compute initial E fields corresponding to fine integrator + Pcoarse->rhoPIF_m = {0.0, 0.0}; + Pcoarse->scatterPIF(Pcoarse->q, Pcoarse->rhoPIF_m, Pcoarse->R); + + Pcoarse->rhoPIF_m = Pcoarse->rhoPIF_m / + ((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2])); + + Pcoarse->gatherPIF(Pcoarse->E, Pcoarse->rhoPIF_m, Pcoarse->R); + //Run the coarse integrator to get the values at the end of the time slice - LeapFrogPIC(*P, P->GR, P->GP, ntCoarse, dtCoarse); + LeapFrogPIC(*Pcoarse, Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); + + //The following might not be needed + Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); msg << "Starting parareal iterations ..." << endl; for (unsigned int it=0; itR, P->P, ntFine, dtFine); + LeapFrogPIF(*Pcoarse, Pbegin->R, Pbegin->P, ntFine, dtFine); //Difference = Fine - Coarse - P->Rend = P->R - P->GR; - P->Pend = P->P - P->GP; + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; if(Ippl::Comm-> rank() > 0) { - MPI_Recv(P->R.getView().data(), nloc, + MPI_Recv(Pcoarse->R.getView().data(), nloc, MPI_BYTE, src, tag, comm_m, &status); - msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + msg << "Finished iteration: " << it+1 << endl; } msg << "LandauDamping: End." << endl; diff --git a/alpine/PinT/StatesBeginSlice.hpp b/alpine/PinT/StatesBeginSlice.hpp new file mode 100644 index 000000000..621e88038 --- /dev/null +++ b/alpine/PinT/StatesBeginSlice.hpp @@ -0,0 +1,31 @@ +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + + +template +class StatesBeginSlice : public ippl::ParticleBase { + +public: + typename ippl::ParticleBase::particle_position_type P; + + StatesBeginSlice(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(P); + } + + ~StatesBeginSlice(){ } + +}; diff --git a/alpine/PinT/StatesEndSlice.hpp b/alpine/PinT/StatesEndSlice.hpp new file mode 100644 index 000000000..6b69996a1 --- /dev/null +++ b/alpine/PinT/StatesEndSlice.hpp @@ -0,0 +1,31 @@ +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + + +template +class StatesEndSlice : public ippl::ParticleBase { + +public: + typename ippl::ParticleBase::particle_position_type P; + + StatesEndSlice(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(P); + } + + ~StatesEndSlice(){ } + +}; From e4223cadebc8d85e9544dbe1027c1523cf6f187f Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 6 Dec 2022 21:55:13 +0100 Subject: [PATCH 016/117] Parareal almost completed. Need to do convergence check and data writing --- alpine/PinT/LandauDampingPinT.cpp | 41 ++++++++++++++++++++++++++----- src/Communicate/Tags.h | 34 +++---------------------- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 78c3a2c95..bc959ee0b 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -257,7 +257,6 @@ int main(int argc, char *argv[]){ Pcoarse->create(nloc); Pbegin->create(nloc); Pend->create(nloc); - //Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( @@ -278,7 +277,8 @@ int main(int argc, char *argv[]){ Pcoarse->scatter(Pcoarse->q, Pcoarse->rhoPIC_m, Pcoarse->R); Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m / (hr[0] * hr[1] * hr[2]); - Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m - (Pcoarse->Q_m/((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2]))); + Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m - + (Pcoarse->Q_m/((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2]))); Pcoarse->solver_mp->solve(); @@ -313,6 +313,7 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + using buffer_type = ippl::Communicate::buffer_type; msg << "Starting parareal iterations ..." << endl; for (unsigned int it=0; itR = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; - if(Ippl::Comm-> rank() > 0) { - MPI_Recv(Pcoarse->R.getView().data(), nloc, - MPI_BYTE, src, tag, comm_m, &status); + int tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(Ippl::Comm->rank() > 0) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + } + else { + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + } + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + + + LeapFrogPIC(*Pcoarse, Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); + + + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; + + + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } msg << "Finished iteration: " << it+1 << endl; } - msg << "LandauDamping: End." << endl; + msg << "LandauDamping Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); diff --git a/src/Communicate/Tags.h b/src/Communicate/Tags.h index 8d6db8bcd..1e07ed717 100644 --- a/src/Communicate/Tags.h +++ b/src/Communicate/Tags.h @@ -26,13 +26,6 @@ #define IPPL_EXIT_TAG 6 // program should exit() -// tags for reduction -#define COMM_REDUCE_SEND_TAG 10000 -#define COMM_REDUCE_RECV_TAG 11000 -#define COMM_REDUCE_SCATTER_TAG 12000 -#define COMM_REDUCE_CYCLE 1000 - - // tag for applying parallel periodic boundary condition. #define BC_PARALLEL_PERIODIC_TAG 15000 @@ -48,28 +41,6 @@ namespace ippl { } } -#define F_GUARD_CELLS_TAG 20000 // Field::fillGuardCells() -#define F_WRITE_TAG 21000 // Field::write() -#define F_READ_TAG 22000 // Field::read() -#define F_GEN_ASSIGN_TAG 23000 // assign(BareField,BareField) -#define F_REPARTITION_BCAST_TAG 24000 // broadcast in FieldLayout::repartion. -#define F_REDUCE_PERP_TAG 25000 // reduction in binary load balance. -#define F_GETSINGLE_TAG 26000 // IndexedBareField::getsingle() -#define F_REDUCE_TAG 27000 // Reduction in minloc/maxloc -#define F_LAYOUT_IO_TAG 28000 // Reduction in minloc/maxloc -#define F_TAG_CYCLE 1000 - -// // Tags for FieldView and FieldBlock -// #define FV_2D_TAG 30000 // FieldView::update_2D_data() -// #define FV_3D_TAG 31000 // FieldView::update_2D_data() -// #define FV_TAG_CYCLE 1000 -// -// #define FB_WRITE_TAG 32000 // FieldBlock::write() -// #define FB_READ_TAG 33000 // FieldBlock::read() -// #define FB_TAG_CYCLE 1000 -// -// #define FP_GATHER_TAG 34000 // FieldPrint::print() -// #define FP_TAG_CYCLE 1000 // Special tags used by Particle classes for communication. #define P_WEIGHTED_LAYOUT_TAG 50000 @@ -88,7 +59,7 @@ namespace ippl { #define IPPL_TAG_CYCLE 1000 // Tags for Ippl application codes -#define IPPL_APP_TAG0 90000 +#define IPPL_PARAREAL_APP 90000 #define IPPL_APP_TAG1 91000 #define IPPL_APP_TAG2 92000 #define IPPL_APP_TAG3 93000 @@ -128,4 +99,7 @@ namespace ippl { #define OPEN_SOLVER_TAG 18000 #define VICO_SOLVER_TAG 70000 +#define IPPL_PARAREAL_SEND 19000 +#define IPPL_PARAREAL_RECV 21000 + #endif // TAGS_H From fa4c8cec2d224e1ef73303ec00b34a29a8ea3b1f Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 7 Dec 2022 11:11:43 +0100 Subject: [PATCH 017/117] Some modifications made and output writing done --- alpine/PinT/ChargedParticlesPinT.hpp | 321 ++++++++++++++------------- alpine/PinT/LandauDampingPinT.cpp | 8 +- alpine/PinT/LeapFrogPIF.cpp | 7 +- src/FFT/FFT.hpp | 2 +- src/Field/BareField.hpp | 18 +- src/Particle/ParticleAttrib.hpp | 12 +- 6 files changed, 199 insertions(+), 169 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index cfc53f50b..d894587c9 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -131,174 +131,179 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void dumpLandau(size_type totalP) { - auto Eview = E.getView(); - - double fieldEnergy, ExAmp; - double temp = 0.0; - - Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = Eview(i)[0] * Eview(i)[0]; - valL += myVal; - }, Kokkos::Sum(temp)); - - double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - fieldEnergy = globaltemp * volume / totalP ; - - double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), - KOKKOS_LAMBDA(const size_t i, double& valL) - { - double myVal = std::fabs(Eview(i)[0]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); - ExAmp = 0.0; - MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - - - if (Ippl::Comm->rank() == 0) { - std::stringstream fname; - fname << "data/FieldLandau_"; - fname << Ippl::Comm->size(); - fname << ".csv"; - - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(time_m == 0.0) { - csvout << "time, Ex_field_energy, Ex_max_norm" << endl; - } - - csvout << time_m << " " - << fieldEnergy << " " - << ExAmp << endl; - - } - - Ippl::Comm->barrier(); + auto Eview = E.getView(); + + double fieldEnergy, ExAmp; + double temp = 0.0; + + Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = Eview(i)[0] * Eview(i)[0]; + valL += myVal; + }, Kokkos::Sum(temp)); + + //double globaltemp = 0.0; + double globaltemp = temp; + //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy = globaltemp * volume / totalP ; + + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t i, double& valL) + { + double myVal = std::fabs(Eview(i)[0]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + //ExAmp = 0.0; + ExAmp = tempMax; + //MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + for (int rank=0; rank < Ippl::Comm->size(); ++rank) { + if(Ippl::Comm->rank() == rank) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, rank); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; + } + Ippl::Comm->barrier(); + } } void dumpEnergy(size_type /*totalP*/) { - double potentialEnergy, kineticEnergy; - double temp = 0.0; + double potentialEnergy, kineticEnergy; + double temp = 0.0; - auto rhoview = rho_m.getView(); - const int nghost = rho_m.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; + auto rhoview = rhoPIF_m.getView(); + const int nghost = rhoPIF_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; - const FieldLayout_t& layout = rho_m.getLayout(); - const Mesh_t& mesh = rho_m.get_mesh(); - const Vector& dx = mesh.getMeshSpacing(); - const auto& domain = layout.getDomain(); - Vector Len; - Vector N; - - for (unsigned d=0; d < Dim; ++d) { - N[d] = domain[d].length(); - Len[d] = dx[d] * N[d]; - } - - - Kokkos::complex imag = {0.0, 1.0}; - double pi = std::acos(-1.0); - Kokkos::parallel_reduce("Potential energy", - mdrange_type({0, 0, 0}, - {N[0], - N[1], - N[2]}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k, - double& valL) - { - - Vector iVec = {i, j, k}; - Vector kVec; - double Dr = 0.0; - for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - //kVec[d] = 2 * pi / Len[d] * iVec[d]; - Dr += kVec[d] * kVec[d]; - } - - Kokkos::complex Ek = {0.0, 0.0}; - double myVal = 0.0; - for(size_t d = 0; d < Dim; ++d) { - if(Dr != 0.0) { - Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - } - myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); - } - - //double myVal = rhoview(i,j,k).real() * rhoview(i,j,k).real() + - // rhoview(i,j,k).imag() * rhoview(i,j,k).imag(); - //if(Dr != 0.0) { - // myVal /= Dr; - //} - //else { - // myVal = 0.0; - //} - valL += myVal; - - }, Kokkos::Sum(temp)); - - - double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //potentialEnergy = 0.5 * globaltemp * volume / totalP ; - potentialEnergy = 0.25 * 0.5 * globaltemp * volume; - - auto Pview = P.getView(); - auto qView = q.getView(); - - temp = 0.0; - - Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = dot(Pview(i), Pview(i)).apply(); - myVal *= -qView(i); - valL += myVal; - }, Kokkos::Sum(temp)); - - temp *= 0.5; - globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - - kineticEnergy = globaltemp; + const FieldLayout_t& layout = rhoPIF_m.getLayout(); + const Mesh_t& mesh = rhoPIF_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Potential energy", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * iVec[d]; + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + double myVal = 0.0; + for(size_t d = 0; d < Dim; ++d) { + if(Dr != 0.0) { + Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + } + myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } + + //double myVal = rhoview(i,j,k).real() * rhoview(i,j,k).real() + + // rhoview(i,j,k).imag() * rhoview(i,j,k).imag(); + //if(Dr != 0.0) { + // myVal /= Dr; + //} + //else { + // myVal = 0.0; + //} + valL += myVal; + + }, Kokkos::Sum(temp)); + + + //double globaltemp = 0.0; + double globaltemp = temp; + //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //potentialEnergy = 0.5 * globaltemp * volume / totalP ; + potentialEnergy = 0.25 * 0.5 * globaltemp * volume; + + auto Pview = P.getView(); + auto qView = q.getView(); + + temp = 0.0; + + Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + myVal *= -qView(i); + valL += myVal; + }, Kokkos::Sum(temp)); + + temp *= 0.5; + //globaltemp = 0.0; + globaltemp = temp; + //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + kineticEnergy = globaltemp; + + for (int rank=0; rank < Ippl::Comm->size(); ++rank) { + if(Ippl::Comm->rank() == rank) { + std::stringstream fname; + fname << "data/Energy_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, rank); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + } + + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << endl; + } + Ippl::Comm->barrier(); + } - if (Ippl::Comm->rank() == 0) { - std::stringstream fname; - fname << "data/Energy_"; - fname << Ippl::Comm->size(); - fname << ".csv"; - - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(time_m == 0.0) { - csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; - } - - csvout << time_m << " " - << potentialEnergy << " " - << kineticEnergy << " " - << potentialEnergy + kineticEnergy << endl; - - } - - Ippl::Comm->barrier(); } private: diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index bc959ee0b..0ab42a8ac 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -315,15 +315,21 @@ int main(int argc, char *argv[]){ using buffer_type = ippl::Communicate::buffer_type; msg << "Starting parareal iterations ..." << endl; + bool isConverged = false; for (unsigned int it=0; itR, Pbegin->P, ntFine, dtFine); + LeapFrogPIF(*Pcoarse, Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged); + + if(isConverged) { + break; + } //Difference = Fine - Coarse Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; + double Rerror = computeL2Error( int tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); diff --git a/alpine/PinT/LeapFrogPIF.cpp b/alpine/PinT/LeapFrogPIF.cpp index 399ffb1f6..19f47f4a7 100644 --- a/alpine/PinT/LeapFrogPIF.cpp +++ b/alpine/PinT/LeapFrogPIF.cpp @@ -18,7 +18,7 @@ void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, - const double dt) { + const double dt, const bool isConverged) { const auto& PL = P.getLayout(); const auto& rmax = P.rmax_m; @@ -52,5 +52,10 @@ void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, //kick Ptemp = Ptemp - 0.5 * dt * P.E; + if(isConverged) { + P.dumpLandau(P.getLocalNum()); + P.dumpEnergy(P.getLocalNum()); + } + } } diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 853651858..b6353f09a 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -309,7 +309,7 @@ namespace ippl { } heffte_m = std::make_shared> - (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), + (inbox, outbox, params.get("r2c_direction"), MPI_COMM_SELF, heffteOptions); //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); diff --git a/src/Field/BareField.hpp b/src/Field/BareField.hpp index 685e6a751..36886e86d 100644 --- a/src/Field/BareField.hpp +++ b/src/Field/BareField.hpp @@ -92,7 +92,14 @@ namespace ippl { template void BareField::fillHalo() { - if(Ippl::Comm->size() > 1) { + + bool isAllSerial = true; + + for (unsigned d = 0; d < Dim; ++d) { + isAllSerial = isAllSerial && (layout_m->getRequestedDistribution(d) == SERIAL); + } + + if((Ippl::Comm->size() > 1) && (!isAllSerial)) { halo_m.fillHalo(dview_m, layout_m); } if(layout_m->isAllPeriodic_m) { @@ -106,7 +113,14 @@ namespace ippl { template void BareField::accumulateHalo() { - if(Ippl::Comm->size() > 1) { + + bool isAllSerial = true; + + for (unsigned d = 0; d < Dim; ++d) { + isAllSerial = isAllSerial && (layout_m->getRequestedDistribution(d) == SERIAL); + } + + if((Ippl::Comm->size() > 1) && (!isAllSerial)) { halo_m.accumulateHalo(dview_m, layout_m); } if(layout_m->isAllPeriodic_m) { diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index ee0c2084e..8522f9568 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -277,12 +277,12 @@ namespace ippl { IpplTimings::stopTimer(scatterTimer); - static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); - IpplTimings::startTimer(scatterAllReduceTimer); - int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); - MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, - MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); - IpplTimings::stopTimer(scatterAllReduceTimer); + //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); + //IpplTimings::startTimer(scatterAllReduceTimer); + //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + //MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + // MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); + //IpplTimings::stopTimer(scatterAllReduceTimer); } From 47d857550d4d8db945e90489a778892dea7dc089 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 7 Dec 2022 15:42:00 +0100 Subject: [PATCH 018/117] Finished error calculation also. Need to compile and test --- alpine/PinT/ChargedParticlesPinT.hpp | 6 +++- alpine/PinT/LandauDampingPinT.cpp | 49 ++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index d894587c9..b0c3ea8a0 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -72,12 +72,14 @@ class ChargedParticlesPinT : public ippl::ParticleBase { public: ParticleAttrib q; // charge - typename ippl::ParticleBase::particle_position_type P; // G(P^(k-1)_n) + typename ippl::ParticleBase::particle_position_type P; // G(P^(k)_n) typename ippl::ParticleBase::particle_position_type E; // electric field at particle position typename ippl::ParticleBase::particle_position_type R0; // Initial particle positions at t=0 typename ippl::ParticleBase::particle_position_type P0; // Initial particle velocities at t=0 + typename ippl::ParticleBase::particle_position_type RprevIter; // G(R^(k-1)_n) + typename ippl::ParticleBase::particle_position_type PprevIter; // G(P^(k-1)_n) ChargedParticlesPinT(PLayout& pl, Vector_t hr, @@ -97,6 +99,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { this->addAttribute(E); this->addAttribute(R0); this->addAttribute(P0); + this->addAttribute(RprevIter); + this->addAttribute(PprevIter); setupBCs(); for (unsigned int i = 0; i < Dim; i++) decomp_m[i]=decomp[i]; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 0ab42a8ac..afdc7ae64 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -148,6 +148,43 @@ double PDF(const Vector_t& xvec, const double& alpha, return pdf; } +double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double temp = 0.0; + + Kokkos::parallel_reduce("Abs. error", Q.size(), + KOKKOS_LAMBDA(const int i, double& valL){ + Vector_t diff = Qview(i) - QprevIterView; + double myVal = dot(diff, diff).apply(); + valL += myVal; + }, Kokkos::Sum(temp)); + + + double globaltemp = 0.0; + MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + + double absError = std::sqrt(globaltemp); + + temp = 0.0; + Kokkos::parallel_reduce("Q norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Qview(i), Qview(i)).apply(); + valL += myVal; + }, Kokkos::Sum(temp)); + + + globaltemp = 0.0; + MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + + double relError = absError / std::sqrt(globaltemp); + + return relError; + +} + + const char* TestName = "LandauDampingPinT"; int main(int argc, char *argv[]){ @@ -329,7 +366,8 @@ int main(int argc, char *argv[]){ Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; - double Rerror = computeL2Error( + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); int tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); @@ -350,11 +388,9 @@ int main(int argc, char *argv[]){ LeapFrogPIC(*Pcoarse, Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); - Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -365,6 +401,13 @@ int main(int argc, char *argv[]){ } msg << "Finished iteration: " << it+1 << endl; + + double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter); + double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter); + + if((Rerror <= tol) && (Perror <= tol)) { + isConverged = true; + } } msg << "LandauDamping Parareal: End." << endl; From 8ceeedeff3220dfa1478e7fdcac38efa006ce6f9 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 7 Dec 2022 16:15:41 +0100 Subject: [PATCH 019/117] nx,ny,nz changed for PIC and PIF. Need to compile and test --- alpine/PinT/ChargedParticlesPinT.hpp | 2 - alpine/PinT/LandauDampingPinT.cpp | 75 +++++++++++++++++----------- alpine/PinT/LeapFrogPIF.cpp | 6 ++- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index b0c3ea8a0..4e75791c0 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -67,8 +67,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double time_m; - double rhoNorm_m; - public: ParticleAttrib q; // charge diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index afdc7ae64..660afa795 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -6,13 +6,16 @@ // European Conference on Parallel Processing. Springer, Cham, 2017. // // Usage: -// srun ./LandauDampingPinT --info 5 -// nx = No. of Fourier modes in the x-direction -// ny = No. of Fourier modes in the y-direction -// nz = No. of Fourier modes in the z-direction +// srun ./LandauDampingPinT --info 5 +// nmx = No. of Fourier modes in the x-direction +// nmy = No. of Fourier modes in the y-direction +// nmz = No. of Fourier modes in the z-direction +// nx = No. of grid points in the x-direction +// ny = No. of grid points in the y-direction +// nz = No. of grid points in the z-direction // Np = Total no. of macro-particles in the simulation // Example: -// srun ./LandauDampingPinT 128 128 128 10000 20 0.05 0.05 1e-5 100 --info 5 +// srun ./LandauDampingPinT 16 16 16 32 32 32 655360 20 0.05 0.05 1e-5 100 --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -193,27 +196,33 @@ int main(int argc, char *argv[]){ Inform msg("LandauDampingPinT"); Inform msg2all("LandauDampingPinT",INFORM_ALL_NODES); - ippl::Vector nr = { + ippl::Vector nmPIF = { std::atoi(argv[1]), std::atoi(argv[2]), std::atoi(argv[3]) }; + ippl::Vector nrPIC = { + std::atoi(argv[4]), + std::atoi(argv[5]), + std::atoi(argv[6]) + }; + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); IpplTimings::startTimer(mainTimer); - const size_type totalP = std::atoll(argv[4]); - const double tEnd = std::atof(argv[5]); + const size_type totalP = std::atoll(argv[7]); + const double tEnd = std::atof(argv[8]); const double dtSlice = tEnd / Ippl::Comm->size(); - const double dtFine = std::atof(argv[6]); - const double dtCoarse = std::atof(argv[7]); + const double dtFine = std::atof(argv[9]); + const double dtCoarse = std::atof(argv[10]); const unsigned int ntFine = (unsigned int)(dtSlice / dtFine); const unsigned int ntCoarse = (unsigned int)(dtSlice / dtCoarse); - const double tol = std::atof(argv[8]); - const unsigned int maxIter = std::atoi(argv[9]); + const double tol = std::atof(argv[11]); + const unsigned int maxIter = std::atoi(argv[12]); const double tStartMySlice = Ippl::Comm->rank() * dtSlice; const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; @@ -228,7 +237,8 @@ int main(int argc, char *argv[]){ << "Max. iterations: " << maxIter << endl << " Np= " - << totalP << " Fourier modes = " << nr + << totalP << " Fourier modes = " << nmPIF + << "Grid points = " << nrPIC << endl; using bunch_type = ChargedParticlesPinT; @@ -239,9 +249,11 @@ int main(int argc, char *argv[]){ std::unique_ptr Pbegin; std::unique_ptr Pend; - ippl::NDIndex domain; + ippl::NDIndex domainPIC; + ippl::NDIndex domainPIF; for (unsigned i = 0; i< Dim; i++) { - domain[i] = ippl::Index(nr[i]); + domainPIC[i] = ippl::Index(nrPIC[i]); + domainPIF[i] = ippl::Index(nmPIF[i]); } ippl::e_dim_tag decomp[Dim]; @@ -254,29 +266,30 @@ int main(int argc, char *argv[]){ double alpha = 0.05; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; - double dx = rmax[0] / nr[0]; - double dy = rmax[1] / nr[1]; - double dz = rmax[2] / nr[2]; + double dx = rmax[0] / nrPIC[0]; + double dy = rmax[1] / nrPIC[1]; + double dz = rmax[2] / nrPIC[2]; Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; const bool isAllPeriodic=true; - Mesh_t mesh(domain, hr, origin); - FieldLayout_t FL(domain, decomp, isAllPeriodic); - PLayout_t PL(FL, mesh); + Mesh_t meshPIC(domainPIC, hr, origin); + FieldLayout_t FLPIC(domainPIC, decomp, isAllPeriodic); + FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); + PLayout_t PL(FLPIC, meshPIC); //Q = -\int\int f dx dv double Q = -rmax[0] * rmax[1] * rmax[2]; Pcoarse = std::make_unique(PL,hr,rmin,rmax,decomp,Q); - Pbegin = std::make_unique(PL); - Pend = std::make_unique(PL); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); - Pcoarse->nr_m = nr; + Pcoarse->nr_m = nrPIC; - Pcoarse->rhoPIF_m.initialize(mesh, FL); - Pcoarse->rhoPIC_m.initialize(mesh, FL); - Pcoarse->EfieldPIC_m.initialize(mesh, FL); + Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); Pcoarse->time_m = 0.0; @@ -356,7 +369,7 @@ int main(int argc, char *argv[]){ for (unsigned int it=0; itR, Pbegin->P, ntFine, dtFine, isConverged); + LeapFrogPIF(*Pcoarse, Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice); if(isConverged) { break; @@ -400,11 +413,15 @@ int main(int argc, char *argv[]){ MPI_Wait(&request, MPI_STATUS_IGNORE); } - msg << "Finished iteration: " << it+1 << endl; double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter); double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter); + msg << "Finished iteration: " << it+1 + << "Rerror: " << Rerror + << "Perror: " << Perror + << endl; + if((Rerror <= tol) && (Perror <= tol)) { isConverged = true; } diff --git a/alpine/PinT/LeapFrogPIF.cpp b/alpine/PinT/LeapFrogPIF.cpp index 19f47f4a7..022b6e00f 100644 --- a/alpine/PinT/LeapFrogPIF.cpp +++ b/alpine/PinT/LeapFrogPIF.cpp @@ -17,13 +17,14 @@ //#include "ChargedParticlesPinT.hpp" void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, - ParticleAttrib& Ptemp, const unsigned int nt, - const double dt, const bool isConverged) { + ParticleAttrib& Ptemp, const unsigned int& nt, + const double& dt, const bool& isConverged, const double& tStartMySlice) { const auto& PL = P.getLayout(); const auto& rmax = P.rmax_m; const auto& rmax = P.rmin_m; + P.time_m = tStartMySlice; for (unsigned int it=0; it& Rtemp, //kick Ptemp = Ptemp - 0.5 * dt * P.E; + P.time_m += dt; if(isConverged) { P.dumpLandau(P.getLocalNum()); P.dumpEnergy(P.getLocalNum()); From 0f18f74db4cbe75722bdf40e63abc5b117095e0e Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 7 Dec 2022 16:51:08 +0100 Subject: [PATCH 020/117] some compilation bugs fixed --- alpine/PinT/ChargedParticlesPinT.hpp | 3 +++ alpine/PinT/LandauDampingPinT.cpp | 11 ++++++----- alpine/PinT/LeapFrogPIC.cpp | 8 ++++---- alpine/PinT/LeapFrogPIF.cpp | 11 ++++++----- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 4e75791c0..f0b72514a 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -17,6 +17,7 @@ // #include "Ippl.h" +#include "Solver/FFTPeriodicPoissonSolver.h" // dimension of our positions constexpr unsigned Dim = 3; @@ -65,6 +66,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Q_m; + std::shared_ptr solver_mp; + double time_m; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 660afa795..bf28b9de1 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -159,7 +159,7 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr Kokkos::parallel_reduce("Abs. error", Q.size(), KOKKOS_LAMBDA(const int i, double& valL){ - Vector_t diff = Qview(i) - QprevIterView; + Vector_t diff = Qview(i) - QprevIterView(i); double myVal = dot(diff, diff).apply(); valL += myVal; }, Kokkos::Sum(temp)); @@ -275,6 +275,7 @@ int main(int argc, char *argv[]){ const bool isAllPeriodic=true; Mesh_t meshPIC(domainPIC, hr, origin); + Mesh_t meshPIF(domainPIF, hr, origin); FieldLayout_t FLPIC(domainPIC, decomp, isAllPeriodic); FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); PLayout_t PL(FLPIC, meshPIC); @@ -324,7 +325,7 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); Pcoarse->rhoPIC_m = 0.0; - Pcoarse->scatter(Pcoarse->q, Pcoarse->rhoPIC_m, Pcoarse->R); + scatter(Pcoarse->q, Pcoarse->rhoPIC_m, Pcoarse->R); Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m / (hr[0] * hr[1] * hr[2]); Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m - @@ -332,7 +333,7 @@ int main(int argc, char *argv[]){ Pcoarse->solver_mp->solve(); - Pcoarse->gather(Pcoarse->E, Pcoarse->EfieldPIC_m, Pcoarse->R); + gather(Pcoarse->E, Pcoarse->EfieldPIC_m, Pcoarse->R); //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { @@ -347,12 +348,12 @@ int main(int argc, char *argv[]){ //Compute initial E fields corresponding to fine integrator Pcoarse->rhoPIF_m = {0.0, 0.0}; - Pcoarse->scatterPIF(Pcoarse->q, Pcoarse->rhoPIF_m, Pcoarse->R); + scatterPIF(Pcoarse->q, Pcoarse->rhoPIF_m, Pcoarse->R); Pcoarse->rhoPIF_m = Pcoarse->rhoPIF_m / ((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2])); - Pcoarse->gatherPIF(Pcoarse->E, Pcoarse->rhoPIF_m, Pcoarse->R); + gatherPIF(Pcoarse->E, Pcoarse->rhoPIF_m, Pcoarse->R); //Run the coarse integrator to get the values at the end of the time slice diff --git a/alpine/PinT/LeapFrogPIC.cpp b/alpine/PinT/LeapFrogPIC.cpp index 0de516a80..3d769521d 100644 --- a/alpine/PinT/LeapFrogPIC.cpp +++ b/alpine/PinT/LeapFrogPIC.cpp @@ -15,7 +15,7 @@ //#include "ChargedParticlesPinT.hpp" -void LeapFrogPIC(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, +void LeapFrogPIC(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, const double dt) { @@ -23,7 +23,7 @@ void LeapFrogPIC(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, const auto& hr = P.hr_m; const auto& rmax = P.rmax_m; - const auto& rmax = P.rmin_m; + const auto& rmin = P.rmin_m; for (unsigned int it=0; it& Rtemp, //scatter the charge onto the underlying grid P.rhoPIC_m = 0.0; - P.scatter(P.q, P.rhoPIC_m, Rtemp); + scatter(P.q, P.rhoPIC_m, Rtemp); P.rhoPIC_m = P.rhoPIC_m / (hr[0] * hr[1] * hr[2]); @@ -51,7 +51,7 @@ void LeapFrogPIC(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, P.solver_mp->solve(); // gather E field - P.gather(P.E, P.EfieldPIC_m, Rtemp); + gather(P.E, P.EfieldPIC_m, Rtemp); //kick Ptemp = Ptemp - 0.5 * dt * P.E; diff --git a/alpine/PinT/LeapFrogPIF.cpp b/alpine/PinT/LeapFrogPIF.cpp index 022b6e00f..2db5251bb 100644 --- a/alpine/PinT/LeapFrogPIF.cpp +++ b/alpine/PinT/LeapFrogPIF.cpp @@ -16,13 +16,14 @@ //#include "ChargedParticlesPinT.hpp" -void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, +void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const bool& isConverged, const double& tStartMySlice) { + const double& dt, const bool& isConverged, + const double& tStartMySlice) { const auto& PL = P.getLayout(); const auto& rmax = P.rmax_m; - const auto& rmax = P.rmin_m; + const auto& rmin = P.rmin_m; P.time_m = tStartMySlice; for (unsigned int it=0; it& Rtemp, //scatter the charge onto the underlying grid P.rhoPIF_m = {0.0, 0.0}; - P.scatterPIF(P.q, P.rhoPIF_m, Rtemp); + scatterPIF(P.q, P.rhoPIF_m, Rtemp); P.rhoPIF_m = P.rhoPIF_m / ((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2])); // Solve for and gather E field - P.gatherPIF(P.E, P.rhoPIF_m, Rtemp); + gatherPIF(P.E, P.rhoPIF_m, Rtemp); //kick Ptemp = Ptemp - 0.5 * dt * P.E; From bb2ee189f70ab1681c58724ca755e86e50312dd9 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 7 Dec 2022 22:12:40 +0100 Subject: [PATCH 021/117] Code compiles now. Need to run and test --- alpine/PinT/ChargedParticlesPinT.hpp | 103 +++++++++++++++++++++++++++ alpine/PinT/LandauDampingPinT.cpp | 12 ++-- alpine/PinT/LeapFrogPIC.cpp | 2 +- alpine/PinT/LeapFrogPIF.cpp | 2 +- 4 files changed, 111 insertions(+), 8 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index f0b72514a..83e63cc9e 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -82,6 +82,23 @@ class ChargedParticlesPinT : public ippl::ParticleBase { typename ippl::ParticleBase::particle_position_type RprevIter; // G(R^(k-1)_n) typename ippl::ParticleBase::particle_position_type PprevIter; // G(P^(k-1)_n) + /* + This constructor is mandatory for all derived classes from + ParticleBase as the bunch buffer uses this + */ + ChargedParticlesPinT(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + this->addAttribute(R0); + this->addAttribute(P0); + this->addAttribute(RprevIter); + this->addAttribute(PprevIter); + } + ChargedParticlesPinT(PLayout& pl, Vector_t hr, Vector_t rmin, @@ -311,6 +328,92 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } + void LeapFrogPIC(ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int nt, + const double dt) { + + PLayout& PL = this->getLayout(); + + for (unsigned int it=0; itsolve(); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); + + //kick + Ptemp = Ptemp - 0.5 * dt * E; + } + + } + + void LeapFrogPIF(ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int& nt, + const double& dt, const bool& isConverged, + const double& tStartMySlice) { + + PLayout& PL = this->getLayout(); + + time_m = tStartMySlice; + for (unsigned int it=0; itgetLocalNum()); + dumpEnergy(this->getLocalNum()); + } + + } + } + private: void setBCAllPeriodic() { diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index bf28b9de1..40201deb9 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -35,8 +35,8 @@ #include "ChargedParticlesPinT.hpp" #include "StatesBeginSlice.hpp" #include "StatesEndSlice.hpp" -#include "LeapFrogPIC.cpp" -#include "LeapFrogPIF.cpp" +//#include "LeapFrogPIC.cpp" +//#include "LeapFrogPIF.cpp" #include #include #include @@ -337,7 +337,7 @@ int main(int argc, char *argv[]){ //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { - LeapFrogPIC(*Pcoarse, Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse); } Ippl::Comm->barrier(); @@ -357,7 +357,7 @@ int main(int argc, char *argv[]){ //Run the coarse integrator to get the values at the end of the time slice - LeapFrogPIC(*Pcoarse, Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); //The following might not be needed Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); @@ -370,7 +370,7 @@ int main(int argc, char *argv[]){ for (unsigned int it=0; itR, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice); if(isConverged) { break; @@ -400,7 +400,7 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - LeapFrogPIC(*Pcoarse, Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; diff --git a/alpine/PinT/LeapFrogPIC.cpp b/alpine/PinT/LeapFrogPIC.cpp index 3d769521d..d719a423e 100644 --- a/alpine/PinT/LeapFrogPIC.cpp +++ b/alpine/PinT/LeapFrogPIC.cpp @@ -19,7 +19,7 @@ void LeapFrogPIC(ChargedParticlesPinT& P, ParticleAttrib& R ParticleAttrib& Ptemp, const unsigned int nt, const double dt) { - const auto& PL = P.getLayout(); + PLayout_t& PL = P.getLayout(); const auto& hr = P.hr_m; const auto& rmax = P.rmax_m; diff --git a/alpine/PinT/LeapFrogPIF.cpp b/alpine/PinT/LeapFrogPIF.cpp index 2db5251bb..9aa8c0479 100644 --- a/alpine/PinT/LeapFrogPIF.cpp +++ b/alpine/PinT/LeapFrogPIF.cpp @@ -21,7 +21,7 @@ void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& R const double& dt, const bool& isConverged, const double& tStartMySlice) { - const auto& PL = P.getLayout(); + auto& PL = P.getLayout(); const auto& rmax = P.rmax_m; const auto& rmin = P.rmin_m; From 3d542704d3b74b3ef07fcc2bca15e0967f14ec85 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 7 Dec 2022 22:46:10 +0100 Subject: [PATCH 022/117] Code runs but gives all zeros for electric field. Need to debug --- alpine/PinT/ChargedParticlesPinT.hpp | 28 ++++++++++++++-------------- alpine/PinT/LandauDampingPinT.cpp | 18 ++++++++++-------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 83e63cc9e..8b0a7f772 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -182,15 +182,15 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - for (int rank=0; rank < Ippl::Comm->size(); ++rank) { - if(Ippl::Comm->rank() == rank) { + //for (int rank=0; rank < Ippl::Comm->size(); ++rank) { + // if(Ippl::Comm->rank() == rank) { std::stringstream fname; fname << "data/FieldLandau_"; - fname << Ippl::Comm->size(); + fname << Ippl::Comm->rank(); fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, rank); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); @@ -201,9 +201,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { csvout << time_m << " " << fieldEnergy << " " << ExAmp << endl; - } - Ippl::Comm->barrier(); - } + // } + // Ippl::Comm->barrier(); + //} } @@ -302,15 +302,15 @@ class ChargedParticlesPinT : public ippl::ParticleBase { kineticEnergy = globaltemp; - for (int rank=0; rank < Ippl::Comm->size(); ++rank) { - if(Ippl::Comm->rank() == rank) { + //for (int rank=0; rank < Ippl::Comm->size(); ++rank) { + // if(Ippl::Comm->rank() == rank) { std::stringstream fname; fname << "data/Energy_"; - fname << Ippl::Comm->size(); + fname << Ippl::Comm->rank(); fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, rank); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); @@ -322,9 +322,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { << potentialEnergy << " " << kineticEnergy << " " << potentialEnergy + kineticEnergy << endl; - } - Ippl::Comm->barrier(); - } + //} + //Ippl::Comm->barrier(); + //} } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 40201deb9..b5f2c6259 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -15,7 +15,7 @@ // nz = No. of grid points in the z-direction // Np = Total no. of macro-particles in the simulation // Example: -// srun ./LandauDampingPinT 16 16 16 32 32 32 655360 20 0.05 0.05 1e-5 100 --info 5 +// srun ./LandauDampingPinT 16 16 16 32 32 32 655360 20.0 0.05 0.05 1e-5 100 --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -229,16 +229,18 @@ int main(int argc, char *argv[]){ msg << "Parareal Landau damping" << endl - << "Slice dT: " << dtSlice + << "Slice dT: " << dtSlice + << endl << "No. of fine time steps: " << ntFine + << endl << "No. of coarse time steps: " << ntCoarse << endl << "Tolerance: " << tol - << "Max. iterations: " << maxIter + << " Max. iterations: " << maxIter << endl - << " Np= " - << totalP << " Fourier modes = " << nmPIF - << "Grid points = " << nrPIC + << "Np= " << totalP + << " Fourier modes = " << nmPIF + << " Grid points = " << nrPIC << endl; using bunch_type = ChargedParticlesPinT; @@ -419,8 +421,8 @@ int main(int argc, char *argv[]){ double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter); msg << "Finished iteration: " << it+1 - << "Rerror: " << Rerror - << "Perror: " << Perror + << " Rerror: " << Rerror + << " Perror: " << Perror << endl; if((Rerror <= tol) && (Perror <= tol)) { From 6525492c2df6533a4516e93d351a9929f2b83781 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 8 Dec 2022 17:16:29 +0100 Subject: [PATCH 023/117] Code is running but results are wrong. Need to debg further --- alpine/PinT/ChargedParticlesPinT.hpp | 18 +++++++ alpine/PinT/LandauDampingPinT.cpp | 76 +++++++++++++++++++--------- src/Particle/ParticleAttrib.hpp | 1 + 3 files changed, 70 insertions(+), 25 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 8b0a7f772..6dab41251 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -333,6 +333,17 @@ class ChargedParticlesPinT : public ippl::ParticleBase { const double dt) { PLayout& PL = this->getLayout(); + rhoPIC_m = 0.0; + scatter(q, rhoPIC_m, Rtemp); + + rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); + rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + + //Field solve + solver_mp->solve(); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); for (unsigned int it=0; it { const double& tStartMySlice) { PLayout& PL = this->getLayout(); + rhoPIF_m = {0.0, 0.0}; + scatterPIF(q, rhoPIF_m, Rtemp); + + rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + + // Solve for and gather E field + gatherPIF(E, rhoPIF_m, Rtemp); time_m = tStartMySlice; for (unsigned int it=0; it& Q, ParticleAttrib& QprevIter) { +double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& iter, const int& myrank) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -164,6 +165,7 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr valL += myVal; }, Kokkos::Sum(temp)); + std::cout << "Rank: " << myrank << " Iter: " << iter << " Abs. Error: " << temp << std::endl; double globaltemp = 0.0; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); @@ -295,7 +297,7 @@ int main(int argc, char *argv[]){ Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); - Pcoarse->time_m = 0.0; + Pcoarse->time_m = tStartMySlice; IpplTimings::startTimer(particleCreation); @@ -310,12 +312,47 @@ int main(int argc, char *argv[]){ Pcoarse->create(nloc); Pbegin->create(nloc); Pend->create(nloc); + + using buffer_type = ippl::Communicate::buffer_type; +#ifdef KOKKOS_ENABLE_CUDA + //If we don't do the following even with the same seed the initial + //condition is not the same on different GPUs + int tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(Ippl::Comm->rank() == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + + Kokkos::fence(); + size_type bufSize = Pcoarse->packedSize(nloc); + std::vector requests(0); + int sends = 0; + for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); + requests.resize(requests.size() + 1); + Ippl::Comm->isend(rank, tag, *Pcoarse, *buf, requests.back(), nloc); + buf->resetWritePos(); + ++sends; + } + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + } + else { + size_type bufSize = Pcoarse->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(0, tag, *Pcoarse, *buf, bufSize, nloc); + buf->resetReadPos(); + } +#else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, alpha, kw, minU, maxU)); Kokkos::fence(); +#endif + + Ippl::Comm->barrier(); IpplTimings::stopTimer(particleCreation); @@ -326,17 +363,6 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - Pcoarse->rhoPIC_m = 0.0; - scatter(Pcoarse->q, Pcoarse->rhoPIC_m, Pcoarse->R); - Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m / (hr[0] * hr[1] * hr[2]); - - Pcoarse->rhoPIC_m = Pcoarse->rhoPIC_m - - (Pcoarse->Q_m/((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2]))); - - Pcoarse->solver_mp->solve(); - - gather(Pcoarse->E, Pcoarse->EfieldPIC_m, Pcoarse->R); - //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse); @@ -348,15 +374,9 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - //Compute initial E fields corresponding to fine integrator - Pcoarse->rhoPIF_m = {0.0, 0.0}; - scatterPIF(Pcoarse->q, Pcoarse->rhoPIF_m, Pcoarse->R); - - Pcoarse->rhoPIF_m = Pcoarse->rhoPIF_m / - ((rmax[0] - rmin[0]) * (rmax[1] - rmin[1]) * (rmax[2] - rmin[2])); - - gatherPIF(Pcoarse->E, Pcoarse->rhoPIF_m, Pcoarse->R); + //Pcoarse->dumpLandau(nloc); + //Pcoarse->dumpEnergy(nloc); //Run the coarse integrator to get the values at the end of the time slice Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); @@ -365,8 +385,9 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pend->R.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pend->P.getView()); - using buffer_type = ippl::Communicate::buffer_type; msg << "Starting parareal iterations ..." << endl; bool isConverged = false; for (unsigned int it=0; itRprevIter.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); - int tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() > 0) { size_type bufSize = Pbegin->packedSize(nloc); @@ -417,8 +438,13 @@ int main(int argc, char *argv[]){ } - double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter); - double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter); + double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank()); + double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank()); + //double Rerror = computeL2Error(Pend->R, Pcoarse->RprevIter); + //double Perror = computeL2Error(Pend->P, Pcoarse->PprevIter); + + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pend->R.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pend->P.getView()); msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 8522f9568..800cd9350 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -277,6 +277,7 @@ namespace ippl { IpplTimings::stopTimer(scatterTimer); + Kokkos::deep_copy(fview, viewLocal); //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); //IpplTimings::startTimer(scatterAllReduceTimer); //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); From 34b94e6686625794b89f521bdbe3660697701065 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 9 Dec 2022 09:27:19 +0100 Subject: [PATCH 024/117] Some more modifications to reduce initial communication --- alpine/PinT/ChargedParticlesPinT.hpp | 36 +++++++++++-------- alpine/PinT/LandauDampingPinT.cpp | 53 +++++++++++++++++----------- alpine/PinT/LeapFrogPIF.cpp | 1 + 3 files changed, 56 insertions(+), 34 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 6dab41251..522160d0a 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -151,7 +151,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } - void dumpLandau(size_type totalP) { + void dumpLandau(size_type totalP, const unsigned int& iter) { auto Eview = E.getView(); @@ -187,6 +187,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { std::stringstream fname; fname << "data/FieldLandau_"; fname << Ippl::Comm->rank(); + fname << "_iter_"; + fname << iter; fname << ".csv"; @@ -194,9 +196,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); - if(time_m == 0.0) { - csvout << "time, Ex_field_energy, Ex_max_norm" << endl; - } + //if(time_m == 0.0) { + // csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + //} csvout << time_m << " " << fieldEnergy << " " @@ -207,7 +209,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } - void dumpEnergy(size_type /*totalP*/) { + void dumpEnergy(size_type /*totalP*/, const unsigned int& iter) { double potentialEnergy, kineticEnergy; @@ -307,6 +309,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { std::stringstream fname; fname << "data/Energy_"; fname << Ippl::Comm->rank(); + fname << "_iter_"; + fname << iter; fname << ".csv"; @@ -314,9 +318,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); - if(time_m == 0.0) { - csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; - } + //csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; csvout << time_m << " " << potentialEnergy << " " @@ -382,8 +384,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const bool& isConverged, - const double& tStartMySlice) { + const double& dt, const bool& /*isConverged*/, + const double& tStartMySlice, const unsigned int& iter) { PLayout& PL = this->getLayout(); rhoPIF_m = {0.0, 0.0}; @@ -395,6 +397,12 @@ class ChargedParticlesPinT : public ippl::ParticleBase { gatherPIF(E, rhoPIF_m, Rtemp); time_m = tStartMySlice; + + //isConverged = false; + if((time_m == 0.0)) { + dumpLandau(this->getLocalNum(), iter); + dumpEnergy(this->getLocalNum(), iter); + } for (unsigned int it=0; it { Ptemp = Ptemp - 0.5 * dt * E; time_m += dt; - if(isConverged) { - dumpLandau(this->getLocalNum()); - dumpEnergy(this->getLocalNum()); - } + //if(isConverged) { + dumpLandau(this->getLocalNum(), iter); + dumpEnergy(this->getLocalNum(), iter); + //} } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index d49301863..85d09a6b2 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -322,32 +322,32 @@ int main(int argc, char *argv[]){ Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( - Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); Kokkos::fence(); - size_type bufSize = Pcoarse->packedSize(nloc); + size_type bufSize = Pbegin->packedSize(nloc); std::vector requests(0); int sends = 0; for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); requests.resize(requests.size() + 1); - Ippl::Comm->isend(rank, tag, *Pcoarse, *buf, requests.back(), nloc); + Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); buf->resetWritePos(); ++sends; } MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } else { - size_type bufSize = Pcoarse->packedSize(nloc); + size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(0, tag, *Pcoarse, *buf, bufSize, nloc); + Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); } #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( - Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); Kokkos::fence(); #endif @@ -355,13 +355,17 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); IpplTimings::stopTimer(particleCreation); + Pcoarse->R = Pbegin->R * 1; + Pcoarse->P = Pbegin->P * 1; Pcoarse->q = Pcoarse->Q_m/totalP; msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later - Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + Pcoarse->R0 = Pcoarse->R * 1; + Pcoarse->P0 = Pcoarse->P * 1; //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { @@ -371,8 +375,10 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + Pbegin->R = Pcoarse->R * 1; + Pbegin->P = Pcoarse->P * 1; //Pcoarse->dumpLandau(nloc); @@ -382,8 +388,10 @@ int main(int argc, char *argv[]){ Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); //The following might not be needed - Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + Pend->R = Pcoarse->R * 1; + Pend->P = Pcoarse->P * 1; //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pend->R.getView()); //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pend->P.getView()); @@ -393,7 +401,7 @@ int main(int argc, char *argv[]){ for (unsigned int it=0; itLeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1); if(isConverged) { break; @@ -403,9 +411,10 @@ int main(int argc, char *argv[]){ Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; - Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); - + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + Pcoarse->RprevIter = Pcoarse->R * 1; + Pcoarse->PprevIter = Pcoarse->P * 1; tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() > 0) { @@ -415,12 +424,16 @@ int main(int argc, char *argv[]){ buf->resetReadPos(); } else { - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + Pbegin->R = Pcoarse->R0 * 1; + Pbegin->P = Pcoarse->P0 * 1; } - Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + Pcoarse->R = Pbegin->R * 1; + Pcoarse->P = Pbegin->P * 1; Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); diff --git a/alpine/PinT/LeapFrogPIF.cpp b/alpine/PinT/LeapFrogPIF.cpp index 9aa8c0479..b7473237f 100644 --- a/alpine/PinT/LeapFrogPIF.cpp +++ b/alpine/PinT/LeapFrogPIF.cpp @@ -26,6 +26,7 @@ void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& R const auto& rmin = P.rmin_m; P.time_m = tStartMySlice; + for (unsigned int it=0; it Date: Fri, 9 Dec 2022 23:08:03 +0100 Subject: [PATCH 025/117] Code seems to be working --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 7 +- alpine/PinT/ChargedParticlesPinT.hpp | 216 +++++++++++------- alpine/PinT/LandauDampingPinT.cpp | 115 +++++----- src/Particle/ParticleAttrib.hpp | 5 +- 4 files changed, 201 insertions(+), 142 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 9ff279c18..602964ab6 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -261,11 +261,8 @@ class ChargedParticlesPIF : public ippl::ParticleBase { }, Kokkos::Sum(temp)); - double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //potentialEnergy = 0.5 * globaltemp * volume / totalP ; - potentialEnergy = 0.25 * 0.5 * globaltemp * volume; + potentialEnergy = 0.5 * temp * volume; auto Pview = P.getView(); auto qView = q.getView(); @@ -280,7 +277,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { }, Kokkos::Sum(temp)); temp *= 0.5; - globaltemp = 0.0; + double globaltemp = 0.0; MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); kineticEnergy = globaltemp; diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 522160d0a..9c993c2bd 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -151,71 +151,127 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } - void dumpLandau(size_type totalP, const unsigned int& iter) { + void dumpLandau(size_type /*totalP*/, const unsigned int& iter) { - auto Eview = E.getView(); - double fieldEnergy, ExAmp; - double temp = 0.0; + double fieldEnergy = 0.0; + double ExAmp = 0.0; + //auto Eview = E.getView(); + //double temp = 0.0; - Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = Eview(i)[0] * Eview(i)[0]; - valL += myVal; - }, Kokkos::Sum(temp)); + //Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = Eview(i)[0] * Eview(i)[0]; + // valL += myVal; + // }, Kokkos::Sum(temp)); - //double globaltemp = 0.0; - double globaltemp = temp; - //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + ////double globaltemp = 0.0; + //double globaltemp = temp; + ////MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //fieldEnergy = globaltemp * volume / totalP ; + + //double tempMax = 0.0; + //Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), + // KOKKOS_LAMBDA(const size_t i, double& valL) + // { + // double myVal = std::fabs(Eview(i)[0]); + // if(myVal > valL) valL = myVal; + // }, Kokkos::Max(tempMax)); + ////ExAmp = 0.0; + //ExAmp = tempMax; + ////MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + + auto rhoview = rhoPIF_m.getView(); + const int nghost = rhoPIF_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rhoPIF_m.getLayout(); + const Mesh_t& mesh = rhoPIF_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ex energy and Max", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& tlSum, + double& tlMax) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + if(Dr != 0.0) { + Ek = -(imag * kVec[0] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + } + double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + + tlSum += myVal; + + double myValMax = std::sqrt(myVal); + + if(myValMax > tlMax) tlMax = myValMax; + + }, Kokkos::Sum(fieldEnergy), Kokkos::Max(ExAmp)); + + + Kokkos::fence(); double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - fieldEnergy = globaltemp * volume / totalP ; - - double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), - KOKKOS_LAMBDA(const size_t i, double& valL) - { - double myVal = std::fabs(Eview(i)[0]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); - //ExAmp = 0.0; - ExAmp = tempMax; - //MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - - - //for (int rank=0; rank < Ippl::Comm->size(); ++rank) { - // if(Ippl::Comm->rank() == rank) { - std::stringstream fname; - fname << "data/FieldLandau_"; - fname << Ippl::Comm->rank(); - fname << "_iter_"; - fname << iter; - fname << ".csv"; - - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - //if(time_m == 0.0) { - // csvout << "time, Ex_field_energy, Ex_max_norm" << endl; - //} - - csvout << time_m << " " - << fieldEnergy << " " - << ExAmp << endl; - // } - // Ippl::Comm->barrier(); + fieldEnergy *= volume; + + + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->rank(); + fname << "_iter_"; + fname << iter; + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + //if(time_m == 0.0) { + // csvout << "time, Ex_field_energy, Ex_max_norm" << endl; //} + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; } - void dumpEnergy(size_type /*totalP*/, const unsigned int& iter) { + void dumpEnergy(size_type /*totalP*/, const unsigned int& iter, ParticleAttrib& Ptemp) { double potentialEnergy, kineticEnergy; double temp = 0.0; - auto rhoview = rhoPIF_m.getView(); const int nghost = rhoPIF_m.getNghost(); using mdrange_type = Kokkos::MDRangePolicy>; @@ -278,14 +334,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { }, Kokkos::Sum(temp)); - //double globaltemp = 0.0; - double globaltemp = temp; - //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //potentialEnergy = 0.5 * globaltemp * volume / totalP ; - potentialEnergy = 0.25 * 0.5 * globaltemp * volume; + potentialEnergy = 0.5 * temp * volume; - auto Pview = P.getView(); + auto Pview = Ptemp.getView(); auto qView = q.getView(); temp = 0.0; @@ -299,40 +351,35 @@ class ChargedParticlesPinT : public ippl::ParticleBase { temp *= 0.5; //globaltemp = 0.0; - globaltemp = temp; + double globaltemp = temp; //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); kineticEnergy = globaltemp; - //for (int rank=0; rank < Ippl::Comm->size(); ++rank) { - // if(Ippl::Comm->rank() == rank) { - std::stringstream fname; - fname << "data/Energy_"; - fname << Ippl::Comm->rank(); - fname << "_iter_"; - fname << iter; - fname << ".csv"; + std::stringstream fname; + fname << "data/Energy_"; + fname << Ippl::Comm->rank(); + fname << "_iter_"; + fname << iter; + fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); - //csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + //csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; - csvout << time_m << " " - << potentialEnergy << " " - << kineticEnergy << " " - << potentialEnergy + kineticEnergy << endl; - //} - //Ippl::Comm->barrier(); - //} + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << endl; } void LeapFrogPIC(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, - const double dt) { + const double dt, const double& tStartMySlice) { PLayout& PL = this->getLayout(); rhoPIC_m = 0.0; @@ -347,6 +394,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { // gather E field gather(E, EfieldPIC_m, Rtemp); + time_m = tStartMySlice; + + for (unsigned int it=0; it { //kick Ptemp = Ptemp - 0.5 * dt * E; + + time_m += dt; } } @@ -398,10 +450,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - //isConverged = false; if((time_m == 0.0)) { dumpLandau(this->getLocalNum(), iter); - dumpEnergy(this->getLocalNum(), iter); + dumpEnergy(this->getLocalNum(), iter, Ptemp); } for (unsigned int it=0; it { Ptemp = Ptemp - 0.5 * dt * E; time_m += dt; - //if(isConverged) { + dumpLandau(this->getLocalNum(), iter); - dumpEnergy(this->getLocalNum(), iter); - //} + dumpEnergy(this->getLocalNum(), iter, Ptemp); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 85d09a6b2..c29865203 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -270,23 +270,28 @@ int main(int argc, char *argv[]){ double alpha = 0.05; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; - double dx = rmax[0] / nrPIC[0]; - double dy = rmax[1] / nrPIC[1]; - double dz = rmax[2] / nrPIC[2]; + double dxPIC = rmax[0] / nrPIC[0]; + double dyPIC = rmax[1] / nrPIC[1]; + double dzPIC = rmax[2] / nrPIC[2]; - Vector_t hr = {dx, dy, dz}; + + double dxPIF = rmax[0] / nmPIF[0]; + double dyPIF = rmax[1] / nmPIF[1]; + double dzPIF = rmax[2] / nmPIF[2]; + Vector_t hrPIC = {dxPIC, dyPIC, dzPIC}; + Vector_t hrPIF = {dxPIF, dyPIF, dzPIF}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; const bool isAllPeriodic=true; - Mesh_t meshPIC(domainPIC, hr, origin); - Mesh_t meshPIF(domainPIF, hr, origin); + Mesh_t meshPIC(domainPIC, hrPIC, origin); + Mesh_t meshPIF(domainPIF, hrPIF, origin); FieldLayout_t FLPIC(domainPIC, decomp, isAllPeriodic); FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); PLayout_t PL(FLPIC, meshPIC); //Q = -\int\int f dx dv double Q = -rmax[0] * rmax[1] * rmax[2]; - Pcoarse = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -319,7 +324,7 @@ int main(int argc, char *argv[]){ //condition is not the same on different GPUs int tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() == 0) { - Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); Kokkos::parallel_for(nloc, generate_random, Dim>( Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); @@ -343,58 +348,58 @@ int main(int argc, char *argv[]){ Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); } + Ippl::Comm->barrier(); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( - Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, alpha, kw, minU, maxU)); Kokkos::fence(); + Ippl::Comm->barrier(); #endif - Ippl::Comm->barrier(); + Pcoarse->q = Pcoarse->Q_m/totalP; IpplTimings::stopTimer(particleCreation); - Pcoarse->R = Pbegin->R * 1; - Pcoarse->P = Pbegin->P * 1; - Pcoarse->q = Pcoarse->Q_m/totalP; + //Pcoarse->R = Pbegin->R * 1; + //Pcoarse->P = Pbegin->P * 1; + msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later - //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - Pcoarse->R0 = Pcoarse->R * 1; - Pcoarse->P0 = Pcoarse->P * 1; + Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + //Pcoarse->R0 = Pcoarse->R * 1; + //Pcoarse->P0 = Pcoarse->P * 1; //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); } Ippl::Comm->barrier(); - //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - Pbegin->R = Pcoarse->R * 1; - Pbegin->P = Pcoarse->P * 1; + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + //Pbegin->R = Pcoarse->R * 1; + //Pbegin->P = Pcoarse->P * 1; - //Pcoarse->dumpLandau(nloc); - //Pcoarse->dumpEnergy(nloc); //Run the coarse integrator to get the values at the end of the time slice - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); //The following might not be needed - //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - Pend->R = Pcoarse->R * 1; - Pend->P = Pcoarse->P * 1; + Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + //Pend->R = Pcoarse->R * 1; + //Pend->P = Pcoarse->P * 1; - //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pend->R.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pend->P.getView()); msg << "Starting parareal iterations ..." << endl; bool isConverged = false; @@ -403,18 +408,28 @@ int main(int argc, char *argv[]){ //Run fine integrator in parallel Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1); - if(isConverged) { - break; - } + //if(isConverged) { + + //test with the serial solution + //Pcoarse->LeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, dtFine, isConverged, tStartMySlice, it+1); + //Ippl::Comm->barrier(); + //double Rerror = computeL2Error(Pcoarse->R0, Pbegin->R, it+1, Ippl::Comm->rank()); + //double Perror = computeL2Error(Pcoarse->P0, Pbegin->P, it+1, Ippl::Comm->rank()); + //msg << "Finished iteration: " << it+1 + //<< " Rerror: " << Rerror + //<< " Perror: " << Perror + //<< endl; + // break; + //} //Difference = Fine - Coarse Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; - //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); - Pcoarse->RprevIter = Pcoarse->R * 1; - Pcoarse->PprevIter = Pcoarse->P * 1; + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + //Pcoarse->RprevIter = Pcoarse->R * 1; + //Pcoarse->PprevIter = Pcoarse->P * 1; tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() > 0) { @@ -424,19 +439,19 @@ int main(int argc, char *argv[]){ buf->resetReadPos(); } else { - //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); - Pbegin->R = Pcoarse->R0 * 1; - Pbegin->P = Pcoarse->P0 * 1; + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + //Pbegin->R = Pcoarse->R0 * 1; + //Pbegin->P = Pcoarse->P0 * 1; } - //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - Pcoarse->R = Pbegin->R * 1; - Pcoarse->P = Pbegin->P * 1; + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + //Pcoarse->R = Pbegin->R * 1; + //Pcoarse->P = Pbegin->P * 1; - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; @@ -453,11 +468,7 @@ int main(int argc, char *argv[]){ double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank()); double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank()); - //double Rerror = computeL2Error(Pend->R, Pcoarse->RprevIter); - //double Perror = computeL2Error(Pend->P, Pcoarse->PprevIter); - //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pend->R.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pend->P.getView()); msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror @@ -465,7 +476,7 @@ int main(int argc, char *argv[]){ << endl; if((Rerror <= tol) && (Perror <= tol)) { - isConverged = true; + break; } } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 800cd9350..e2c3928e5 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -269,7 +269,8 @@ namespace ippl { }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { - viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; + //viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; + fview(i+nghost,j+nghost,k+nghost) = reducedValue; } } @@ -277,7 +278,7 @@ namespace ippl { IpplTimings::stopTimer(scatterTimer); - Kokkos::deep_copy(fview, viewLocal); + //Kokkos::deep_copy(fview, viewLocal); //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); //IpplTimings::startTimer(scatterAllReduceTimer); //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); From f8afcdaaa464d1ad7b4104e6d354b441ce848d36 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 10 Dec 2022 07:03:38 +0100 Subject: [PATCH 026/117] Some cleanup done --- alpine/PinT/ChargedParticlesPinT.hpp | 12 +-------- alpine/PinT/LandauDampingPinT.cpp | 39 ++++------------------------ 2 files changed, 6 insertions(+), 45 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 9c993c2bd..d292e65a9 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -182,8 +182,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //ExAmp = tempMax; ////MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - - auto rhoview = rhoPIF_m.getView(); const int nghost = rhoPIF_m.getNghost(); using mdrange_type = Kokkos::MDRangePolicy>; @@ -398,12 +396,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { for (unsigned int it=0; it { } for (unsigned int it=0; it& Q, ParticleAttrib& QprevIter, const unsigned int& iter, const int& myrank) { @@ -319,10 +304,11 @@ int main(int argc, char *argv[]){ Pend->create(nloc); using buffer_type = ippl::Communicate::buffer_type; + int tag; #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs - int tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); Kokkos::parallel_for(nloc, @@ -365,16 +351,11 @@ int main(int argc, char *argv[]){ Pcoarse->q = Pcoarse->Q_m/totalP; IpplTimings::stopTimer(particleCreation); - //Pcoarse->R = Pbegin->R * 1; - //Pcoarse->P = Pbegin->P * 1; - msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - //Pcoarse->R0 = Pcoarse->R * 1; - //Pcoarse->P0 = Pcoarse->P * 1; //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { @@ -386,9 +367,6 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - //Pbegin->R = Pcoarse->R * 1; - //Pbegin->P = Pcoarse->P * 1; - //Run the coarse integrator to get the values at the end of the time slice @@ -397,8 +375,6 @@ int main(int argc, char *argv[]){ //The following might not be needed Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - //Pend->R = Pcoarse->R * 1; - //Pend->P = Pcoarse->P * 1; msg << "Starting parareal iterations ..." << endl; @@ -411,7 +387,8 @@ int main(int argc, char *argv[]){ //if(isConverged) { //test with the serial solution - //Pcoarse->LeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, dtFine, isConverged, tStartMySlice, it+1); + //Pcoarse->LeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, + // dtFine, isConverged, tStartMySlice, it+1); //Ippl::Comm->barrier(); //double Rerror = computeL2Error(Pcoarse->R0, Pbegin->R, it+1, Ippl::Comm->rank()); //double Perror = computeL2Error(Pcoarse->P0, Pbegin->P, it+1, Ippl::Comm->rank()); @@ -428,8 +405,7 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); - //Pcoarse->RprevIter = Pcoarse->R * 1; - //Pcoarse->PprevIter = Pcoarse->P * 1; + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() > 0) { @@ -441,15 +417,10 @@ int main(int argc, char *argv[]){ else { Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); - //Pbegin->R = Pcoarse->R0 * 1; - //Pbegin->P = Pcoarse->P0 * 1; } Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - //Pcoarse->R = Pbegin->R * 1; - //Pcoarse->P = Pbegin->P * 1; - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); From 5a39d31c87180fa30201b6953637a4dd18d3268b Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 10 Dec 2022 08:04:06 +0100 Subject: [PATCH 027/117] Error Vs iterations file writing added --- alpine/PinT/ChargedParticlesPinT.hpp | 24 ++++++++++++++++++++++++ alpine/PinT/LandauDampingPinT.cpp | 2 ++ 2 files changed, 26 insertions(+) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index d292e65a9..a7955ce63 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -375,6 +375,30 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } + void writeError(double Rerror, double Perror, unsigned int iter) { + + if(Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/Error_Vs_Iter.csv"; + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(iter == 1) { + csvout << "Iter, Rerror, Perror" << endl; + } + + csvout << iter << " " + << Rerror << " " + << Perror << endl; + + } + + Ippl::Comm->barrier(); + + } + void LeapFrogPIC(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, const double dt, const double& tStartMySlice) { diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index a3f59bb8a..e680ee7e9 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -446,6 +446,8 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; + Pcoarse->writeError(Rerror, Perror, it+1); + if((Rerror <= tol) && (Perror <= tol)) { break; } From 0c588e25ec195a7a2a0209ae7636db64f1d61541 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 12 Dec 2022 12:27:22 +0100 Subject: [PATCH 028/117] Particle periodic BCs changed as the previous one was giving seg faults --- alpine/PinT/ChargedParticlesPinT.hpp | 50 ++++++++++++++++++++++++++++ alpine/PinT/LandauDampingPinT.cpp | 5 +-- src/Particle/ParticleAttrib.hpp | 13 +++++--- src/Particle/ParticleBC.h | 18 ++++++++-- 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index a7955ce63..d019dd982 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -399,11 +399,57 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } + void checkBounds(ParticleAttrib& R) { + + auto Rview = R.getView(); + double xMin = 0.0; + double yMin = 0.0; + double zMin = 0.0; + double xMax = 0.0; + double yMax = 0.0; + double zMax = 0.0; + Kokkos::parallel_reduce("Bounds calculation", R.size(), + KOKKOS_LAMBDA(const int i, + double& xlMin, + double& ylMin, + double& zlMin, + double& xlMax, + double& ylMax, + double& zlMax){ + + if(Rview(i)[0] < xlMin) xlMin = Rview(i)[0]; + if(Rview(i)[1] < ylMin) ylMin = Rview(i)[1]; + if(Rview(i)[2] < zlMin) zlMin = Rview(i)[2]; + + if(Rview(i)[0] > xlMax) xlMax = Rview(i)[0]; + if(Rview(i)[1] > ylMax) ylMax = Rview(i)[1]; + if(Rview(i)[2] > zlMax) zlMax = Rview(i)[2]; + + }, Kokkos::Min(xMin), Kokkos::Min(yMin), Kokkos::Min(zMin), + Kokkos::Max(xMax), Kokkos::Max(yMax), Kokkos::Max(zMax)); + + Kokkos::fence(); + + Vector_t Rmin = {xMin, yMin, zMin}; + Vector_t Rmax = {xMax, yMax, zMax}; + + for (unsigned d = 0; d < 3; ++d) { + if(Rmin[d] < rmin_m[d]) { + std::cout << "Invalid particles with min. in rank: " << Ippl::Comm->rank() << " Rmin: " << Rmin << std::endl; + } + if(Rmax[d] > rmax_m[d]) { + std::cout << "Invalid particles with max. in rank: " << Ippl::Comm->rank() << " Rmax: " << Rmax << std::endl; + } + } + } + void LeapFrogPIC(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, const double dt, const double& tStartMySlice) { PLayout& PL = this->getLayout(); + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); rhoPIC_m = 0.0; scatter(q, rhoPIC_m, Rtemp); @@ -429,6 +475,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //Apply particle BC PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); //scatter the charge onto the underlying grid rhoPIC_m = 0.0; @@ -458,6 +505,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { const double& tStartMySlice, const unsigned int& iter) { PLayout& PL = this->getLayout(); + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; scatterPIF(q, rhoPIF_m, Rtemp); @@ -483,6 +532,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //Apply particle BC PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index e680ee7e9..3db0ea0e3 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -197,7 +197,6 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); - static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); IpplTimings::startTimer(mainTimer); @@ -212,7 +211,7 @@ int main(int argc, char *argv[]){ const unsigned int maxIter = std::atoi(argv[12]); const double tStartMySlice = Ippl::Comm->rank() * dtSlice; - const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; + //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; msg << "Parareal Landau damping" << endl @@ -363,6 +362,7 @@ int main(int argc, char *argv[]){ } Ippl::Comm->barrier(); + msg << "First Leap frog PIC done " << endl; Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); @@ -371,6 +371,7 @@ int main(int argc, char *argv[]){ //Run the coarse integrator to get the values at the end of the time slice Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + msg << "Second Leap frog PIC done " << endl; //The following might not be needed Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index e2c3928e5..498d49cd7 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -170,10 +170,15 @@ namespace ippl { Vector whi = l - index; Vector wlo = 1.0 - whi; - const size_t i = index[0] - lDom[0].first() + nghost; - const size_t j = index[1] - lDom[1].first() + nghost; - const size_t k = index[2] - lDom[2].first() + nghost; - + const int i = index[0] - lDom[0].first() + nghost; + const int j = index[1] - lDom[1].first() + nghost; + const int k = index[2] - lDom[2].first() + nghost; + + //if((i < 1) || (i > lDom[0].last() + 2) || (j < 1) || (j > lDom[1].last() + 2) + // || (k < 1) || (k > lDom[0].last() + 2)) { + // std::cout << "i: " << i << ", j: " << j << ", k: " << k << std::endl; + // std::cout << "Invalid particle co-ordinates: " << pp(idx) << std::endl; + //} // scatter const value_type& val = dview_m(idx); diff --git a/src/Particle/ParticleBC.h b/src/Particle/ParticleBC.h index 275f04e00..dfd5aa5a0 100644 --- a/src/Particle/ParticleBC.h +++ b/src/Particle/ParticleBC.h @@ -77,8 +77,11 @@ namespace ippl { struct PeriodicBC : public ParticleBC { using value_type = typename ParticleBC::value_type; - using ParticleBC::extent_m; - using ParticleBC::middle_m; + //using ParticleBC::extent_m; + //using ParticleBC::middle_m; + using ParticleBC::maxval_m; + using ParticleBC::minval_m; + using ParticleBC::isUpper_m; KOKKOS_DEFAULTED_FUNCTION PeriodicBC() = default; @@ -94,7 +97,16 @@ namespace ippl { KOKKOS_INLINE_FUNCTION void operator()(const size_t& i) const { value_type& value = this->view_m(i)[this->dim_m]; - value = value - extent_m * (int)((value - middle_m) * 2 / extent_m); + //value = value - this->extent_m * (int)((value - this->middle_m) * 2 / extent_m); + //if ((value < this->minval_m) && (!this->isUpper_m)) + // value = (this->maxval_m - (this->minval_m - value)); + //else if ((value >= this->maxval_m) && (this->isUpper_m)) + // value = (this->minval_m + (value - this->maxval_m)); + bool tooHigh = value >= maxval_m; + bool tooLow = value < minval_m; + + value += tooHigh * (minval_m - maxval_m) + + tooLow * (maxval_m - minval_m); } KOKKOS_DEFAULTED_FUNCTION From 0e09cb2a6aaecefbce11a37568f280caa22915db Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 12 Dec 2022 22:12:12 +0100 Subject: [PATCH 029/117] Several tests performed. Need to run with larger no. of particles and CPU cores --- alpine/PinT/ChargedParticlesPinT.hpp | 2 + alpine/PinT/LandauDampingPinT.cpp | 138 +++++++++++++++++++++++---- 2 files changed, 124 insertions(+), 16 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index d019dd982..fd6735720 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -53,8 +53,10 @@ template class ChargedParticlesPinT : public ippl::ParticleBase { public: CxField_t rhoPIF_m; + CxField_t rhoPIFprevIter_m; Field_t rhoPIC_m; VField_t EfieldPIC_m; + //VField_t EfieldPICprevIter_m; Vector nr_m; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 3db0ea0e3..48c05bca5 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -141,32 +141,36 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); - double temp = 0.0; + double localError = 0.0; + double localNorm = 0.0; - Kokkos::parallel_reduce("Abs. error", Q.size(), - KOKKOS_LAMBDA(const int i, double& valL){ + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); - double myVal = dot(diff, diff).apply(); - valL += myVal; - }, Kokkos::Sum(temp)); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); - std::cout << "Rank: " << myrank << " Iter: " << iter << " Abs. Error: " << temp << std::endl; + Kokkos::fence(); + std::cout << "Rank: " << myrank << " Iter: " << iter << " Abs. Error: " << localError << std::endl; double globaltemp = 0.0; - MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); double absError = std::sqrt(globaltemp); - temp = 0.0; - Kokkos::parallel_reduce("Q norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = dot(Qview(i), Qview(i)).apply(); - valL += myVal; - }, Kokkos::Sum(temp)); + //temp = 0.0; + //Kokkos::parallel_reduce("Q norm", Q.size(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = dot(Qview(i), Qview(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(temp)); globaltemp = 0.0; - MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); double relError = absError / std::sqrt(globaltemp); @@ -174,6 +178,88 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr } +double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { + + auto rhoview = rhoPIF.getView(); + auto rhoprevview = rhoPIFprevIter.getView(); + const int nghost = rhoPIF.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rhoPIF.getLayout(); + const Mesh_t& mesh = rhoPIF.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + double AbsError = 0.0; + double Enorm = 0.0; + //Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ex field error", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& errorSum, + double& fieldSum) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + Dr += kVec[d] * kVec[d]; + } + + double myError = 0.0; + double myField = 0.0; + //Kokkos::complex Ek = {0.0, 0.0}; + //Kokkos::complex Ekprev = {0.0, 0.0}; + //for(size_t d = 0; d < Dim; ++d) { + // if(Dr != 0.0) { + // Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + // Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); + // } + // Ekprev = Ekprev - Ek; + // myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); + // myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + //} + //errorSum += myError; + //fieldSum += myField; + Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); + Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); + rhokprev = rhokprev - rhok; + myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); + errorSum += myError; + myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); + fieldSum += myField; + + }, Kokkos::Sum(AbsError), Kokkos::Sum(Enorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&AbsError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double globalNorm = 0.0; + MPI_Allreduce(&Enorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //fieldEnergy *= volume; + + double relError = std::sqrt(globalError)/std::sqrt(globalNorm); + + return relError; +} + const char* TestName = "LandauDampingPinT"; @@ -282,8 +368,11 @@ int main(int argc, char *argv[]){ Pcoarse->nr_m = nrPIC; Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); Pcoarse->time_m = tStartMySlice; @@ -373,6 +462,8 @@ int main(int argc, char *argv[]){ Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); msg << "Second Leap frog PIC done " << endl; + //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + //The following might not be needed Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); @@ -384,6 +475,7 @@ int main(int argc, char *argv[]){ //Run fine integrator in parallel Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1); + //if(isConverged) { @@ -437,17 +529,31 @@ int main(int argc, char *argv[]){ MPI_Wait(&request, MPI_STATUS_IGNORE); } + //Pcoarse->EfieldPICprevIter_m = Pcoarse->EfieldPICprevIter_m - Pcoarse->EfieldPIC_m; + //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPICprevIter_m, Pcoarse->EfieldPICprevIter_m); + //double absFieldError = std::sqrt(Pcoarse->rhoPIC_m.sum()); + //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPIC_m, Pcoarse->EfieldPIC_m); + //double EfieldNorm = std::sqrt(Pcoarse->rhoPIC_m.sum()); + //double EfieldError = absFieldError / EfieldNorm; double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank()); double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank()); - + + double EfieldError = 0; + if(it > 0) { + EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); + } + Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror + //<< " Efield error: " << EfieldError + << " Rhofield error: " << EfieldError << endl; Pcoarse->writeError(Rerror, Perror, it+1); + //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); if((Rerror <= tol) && (Perror <= tol)) { break; From fb65a6676f33daa9f10b05ce9e9dab3d2eedd92a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 13 Dec 2022 07:20:19 +0100 Subject: [PATCH 030/117] Some checking things for PIC --- alpine/PinT/ChargedParticlesPinT.hpp | 66 +++++++++++++++++++++++ alpine/PinT/LandauDampingPinT.cpp | 80 ++++++++++++++-------------- 2 files changed, 107 insertions(+), 39 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index fd6735720..93ce2f64f 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -152,7 +152,71 @@ class ChargedParticlesPinT : public ippl::ParticleBase { solver_mp->setLhs(EfieldPIC_m); } + void dumpLandauPIC() { + const int nghostE = EfieldPIC_m.getNghost(); + auto Eview = EfieldPIC_m.getView(); + double fieldEnergy, ExAmp; + using mdrange_type = Kokkos::MDRangePolicy>; + + double temp = 0.0; + Kokkos::parallel_reduce("Ex inner product", + mdrange_type({nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, + Eview.extent(1) - nghostE, + Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, + const size_t k, double& valL) + { + double myVal = std::pow(Eview(i, j, k)[0], 2); + valL += myVal; + }, Kokkos::Sum(temp)); + double globaltemp = temp; + //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; + + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", + mdrange_type({nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, + Eview.extent(1) - nghostE, + Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, + const size_t k, double& valL) + { + double myVal = std::fabs(Eview(i, j, k)[0]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + ExAmp = tempMax; + //MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + + void dumpLandau(size_type /*totalP*/, const unsigned int& iter) { @@ -466,6 +530,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; + dumpLandauPIC(); for (unsigned int it=0; it { Ptemp = Ptemp - 0.5 * dt * E; time_m += dt; + dumpLandauPIC(); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 48c05bca5..05dd0b4ec 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -199,7 +199,7 @@ double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { double AbsError = 0.0; double Enorm = 0.0; - //Kokkos::complex imag = {0.0, 1.0}; + Kokkos::complex imag = {0.0, 1.0}; double pi = std::acos(-1.0); Kokkos::parallel_reduce("Ex field error", mdrange_type({0, 0, 0}, @@ -224,26 +224,26 @@ double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { double myError = 0.0; double myField = 0.0; - //Kokkos::complex Ek = {0.0, 0.0}; - //Kokkos::complex Ekprev = {0.0, 0.0}; - //for(size_t d = 0; d < Dim; ++d) { - // if(Dr != 0.0) { - // Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - // Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); - // } - // Ekprev = Ekprev - Ek; - // myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); - // myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); - //} - //errorSum += myError; - //fieldSum += myField; - Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); - Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); - rhokprev = rhokprev - rhok; - myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); + Kokkos::complex Ek = {0.0, 0.0}; + Kokkos::complex Ekprev = {0.0, 0.0}; + for(size_t d = 0; d < Dim; ++d) { + if(Dr != 0.0) { + Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); + } + Ekprev = Ekprev - Ek; + myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); + myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } errorSum += myError; - myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); fieldSum += myField; + //Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); + //Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); + //rhokprev = rhokprev - rhok; + //myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); + //errorSum += myError; + //myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); + //fieldSum += myField; }, Kokkos::Sum(AbsError), Kokkos::Sum(Enorm)); @@ -446,30 +446,32 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); //Get initial guess for ranks other than 0 by propagating the coarse solver - if (Ippl::Comm->rank() > 0) { - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); + //if (Ippl::Comm->rank() > 0) { + if (Ippl::Comm->rank() == 0) { + //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->size()*ntCoarse, dtCoarse, tStartMySlice); } Ippl::Comm->barrier(); - msg << "First Leap frog PIC done " << endl; + //msg << "First Leap frog PIC done " << endl; - - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + // + //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - //Run the coarse integrator to get the values at the end of the time slice - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - msg << "Second Leap frog PIC done " << endl; + ////Run the coarse integrator to get the values at the end of the time slice + //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + //msg << "Second Leap frog PIC done " << endl; - //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + ////Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - //The following might not be needed - Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + ////The following might not be needed + //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - msg << "Starting parareal iterations ..." << endl; + //msg << "Starting parareal iterations ..." << endl; bool isConverged = false; for (unsigned int it=0; itR, Pcoarse->RprevIter, it+1, Ippl::Comm->rank()); double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank()); - double EfieldError = 0; - if(it > 0) { - EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); - } + //double EfieldError = 0; + //if(it > 0) { + // EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); + //} - Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); + //Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror //<< " Efield error: " << EfieldError - << " Rhofield error: " << EfieldError + //<< " Rhofield error: " << EfieldError << endl; Pcoarse->writeError(Rerror, Perror, it+1); From 631b008db1b428bfcb74a87eabf755452d48e7a0 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 13 Dec 2022 09:39:55 +0100 Subject: [PATCH 031/117] PIC checked --- alpine/PinT/ChargedParticlesPinT.hpp | 8 +++---- alpine/PinT/LandauDampingPinT.cpp | 32 +++++++++++++--------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 93ce2f64f..90f055175 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -53,7 +53,7 @@ template class ChargedParticlesPinT : public ippl::ParticleBase { public: CxField_t rhoPIF_m; - CxField_t rhoPIFprevIter_m; + //CxField_t rhoPIFprevIter_m; Field_t rhoPIC_m; VField_t EfieldPIC_m; //VField_t EfieldPICprevIter_m; @@ -212,7 +212,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } - Ippl::Comm->barrier(); + //Ippl::Comm->barrier(); } @@ -530,7 +530,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - dumpLandauPIC(); + //dumpLandauPIC(); for (unsigned int it=0; it { Ptemp = Ptemp - 0.5 * dt * E; time_m += dt; - dumpLandauPIC(); + //dumpLandauPIC(); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 05dd0b4ec..c4b7d3580 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -368,7 +368,7 @@ int main(int argc, char *argv[]){ Pcoarse->nr_m = nrPIC; Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); - Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); + //Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); @@ -446,32 +446,30 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); //Get initial guess for ranks other than 0 by propagating the coarse solver - //if (Ippl::Comm->rank() > 0) { - if (Ippl::Comm->rank() == 0) { - //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->size()*ntCoarse, dtCoarse, tStartMySlice); + if (Ippl::Comm->rank() > 0) { + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); } Ippl::Comm->barrier(); - //msg << "First Leap frog PIC done " << endl; + msg << "First Leap frog PIC done " << endl; - // - //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - ////Run the coarse integrator to get the values at the end of the time slice - //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - //msg << "Second Leap frog PIC done " << endl; + //Run the coarse integrator to get the values at the end of the time slice + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + msg << "Second Leap frog PIC done " << endl; - ////Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - ////The following might not be needed - //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + //The following might not be needed + Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - //msg << "Starting parareal iterations ..." << endl; + msg << "Starting parareal iterations ..." << endl; bool isConverged = false; for (unsigned int it=0; it Date: Fri, 16 Dec 2022 16:04:53 +0100 Subject: [PATCH 032/117] Current version corresponding to the slides --- alpine/PinT/ChargedParticlesPinT.hpp | 10 ++++- alpine/PinT/LandauDampingPinT.cpp | 56 +++++++++++++++++++++++----- src/Particle/ParticleAttrib.hpp | 36 +++++++++--------- 3 files changed, 74 insertions(+), 28 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 90f055175..59f8aeaed 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -53,7 +53,7 @@ template class ChargedParticlesPinT : public ippl::ParticleBase { public: CxField_t rhoPIF_m; - //CxField_t rhoPIFprevIter_m; + CxField_t rhoPIFprevIter_m; Field_t rhoPIC_m; VField_t EfieldPIC_m; //VField_t EfieldPICprevIter_m; @@ -513,6 +513,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { ParticleAttrib& Ptemp, const unsigned int nt, const double dt, const double& tStartMySlice) { + static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); PLayout& PL = this->getLayout(); PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); @@ -553,7 +554,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); //Field solve + IpplTimings::startTimer(fieldSolvePIC); solver_mp->solve(); + IpplTimings::stopTimer(fieldSolvePIC); // gather E field gather(E, EfieldPIC_m, Rtemp); @@ -572,6 +575,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { const double& dt, const bool& /*isConverged*/, const double& tStartMySlice, const unsigned int& iter) { + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); @@ -586,8 +590,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; if((time_m == 0.0)) { + IpplTimings::startTimer(dumpData); dumpLandau(this->getLocalNum(), iter); dumpEnergy(this->getLocalNum(), iter, Ptemp); + IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { time_m += dt; + IpplTimings::startTimer(dumpData); dumpLandau(this->getLocalNum(), iter); dumpEnergy(this->getLocalNum(), iter, Ptemp); + IpplTimings::stopTimer(dumpData); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index c4b7d3580..3b3aabee0 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -137,7 +137,7 @@ struct generate_random { double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& iter, const int& myrank) { + const unsigned int& /*iter*/, const int& /*myrank*/) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -154,7 +154,7 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - std::cout << "Rank: " << myrank << " Iter: " << iter << " Abs. Error: " << localError << std::endl; + //std::cout << "Rank: " << myrank << " Iter: " << iter << " Abs. Error: " << localError << std::endl; double globaltemp = 0.0; MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); @@ -283,6 +283,12 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef timeCommunication = IpplTimings::getTimer("timeCommunication"); + static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); + static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); + static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); IpplTimings::startTimer(mainTimer); @@ -368,7 +374,7 @@ int main(int argc, char *argv[]){ Pcoarse->nr_m = nrPIC; Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); - //Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); @@ -423,8 +429,10 @@ int main(int argc, char *argv[]){ buf->resetReadPos(); } Ippl::Comm->barrier(); + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, @@ -442,31 +450,41 @@ int main(int argc, char *argv[]){ msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); //Get initial guess for ranks other than 0 by propagating the coarse solver + IpplTimings::startTimer(coarsePropagator); if (Ippl::Comm->rank() > 0) { Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); } + IpplTimings::stopTimer(coarsePropagator); Ippl::Comm->barrier(); msg << "First Leap frog PIC done " << endl; + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); //Run the coarse integrator to get the values at the end of the time slice + IpplTimings::startTimer(coarsePropagator); Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + IpplTimings::stopTimer(coarsePropagator); msg << "Second Leap frog PIC done " << endl; //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); //The following might not be needed + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); msg << "Starting parareal iterations ..." << endl; @@ -474,7 +492,9 @@ int main(int argc, char *argv[]){ for (unsigned int it=0; itLeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1); + IpplTimings::stopTimer(finePropagator); //if(isConverged) { @@ -496,9 +516,12 @@ int main(int argc, char *argv[]){ Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(timeCommunication); tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() > 0) { @@ -511,15 +534,21 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); } + IpplTimings::stopTimer(timeCommunication); + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(coarsePropagator); Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; + IpplTimings::startTimer(timeCommunication); if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -528,6 +557,7 @@ int main(int argc, char *argv[]){ buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } + IpplTimings::stopTimer(timeCommunication); //Pcoarse->EfieldPICprevIter_m = Pcoarse->EfieldPICprevIter_m - Pcoarse->EfieldPIC_m; //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPICprevIter_m, Pcoarse->EfieldPICprevIter_m); @@ -536,26 +566,34 @@ int main(int argc, char *argv[]){ //double EfieldNorm = std::sqrt(Pcoarse->rhoPIC_m.sum()); //double EfieldError = absFieldError / EfieldNorm; + IpplTimings::startTimer(computeErrors); double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank()); double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank()); - //double EfieldError = 0; - //if(it > 0) { - // EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); - //} + double EfieldError = 0; + if(it > 0) { + EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); + } + IpplTimings::stopTimer(computeErrors); - //Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); + IpplTimings::stopTimer(deepCopy); + msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror - //<< " Efield error: " << EfieldError + << " Efield error: " << EfieldError //<< " Rhofield error: " << EfieldError << endl; + IpplTimings::startTimer(dumpData); Pcoarse->writeError(Rerror, Perror, it+1); + IpplTimings::stopTimer(dumpData); //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); if((Rerror <= tol) && (Perror <= tol)) { + //if(Perror <= tol) { break; } } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 498d49cd7..3c0d9e183 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -142,8 +142,8 @@ namespace ippl { const ParticleAttrib< Vector, Properties... >& pp) const { - static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("Scatter"); - IpplTimings::startTimer(scatterTimer); + static IpplTimings::TimerRef scatterPICTimer = IpplTimings::getTimer("ScatterPIC"); + IpplTimings::startTimer(scatterPICTimer); typename Field::view_type view = f.getView(); const M& mesh = f.get_mesh(); @@ -192,12 +192,12 @@ namespace ippl { Kokkos::atomic_add(&view(i, j, k ), whi[0] * whi[1] * whi[2] * val); } ); - IpplTimings::stopTimer(scatterTimer); + IpplTimings::stopTimer(scatterPICTimer); - static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("AccumulateHalo"); - IpplTimings::startTimer(accumulateHaloTimer); + //static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("AccumulateHalo"); + //IpplTimings::startTimer(accumulateHaloTimer); f.accumulateHalo(); - IpplTimings::stopTimer(accumulateHaloTimer); + //IpplTimings::stopTimer(accumulateHaloTimer); } @@ -209,8 +209,8 @@ namespace ippl { { //Inform msg("scatterPIF"); - static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("Scatter"); - IpplTimings::startTimer(scatterTimer); + static IpplTimings::TimerRef scatterPIFTimer = IpplTimings::getTimer("ScatterPIF"); + IpplTimings::startTimer(scatterPIFTimer); using view_type = typename Field::view_type; using vector_type = typename M::vector_type; @@ -281,7 +281,7 @@ namespace ippl { } ); - IpplTimings::stopTimer(scatterTimer); + IpplTimings::stopTimer(scatterPIFTimer); //Kokkos::deep_copy(fview, viewLocal); //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); @@ -300,13 +300,13 @@ namespace ippl { const ParticleAttrib, Properties...>& pp) { - static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("FillHalo"); - IpplTimings::startTimer(fillHaloTimer); + //static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("FillHalo"); + //IpplTimings::startTimer(fillHaloTimer); f.fillHalo(); - IpplTimings::stopTimer(fillHaloTimer); + //IpplTimings::stopTimer(fillHaloTimer); - static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("Gather"); - IpplTimings::startTimer(gatherTimer); + static IpplTimings::TimerRef gatherPICTimer = IpplTimings::getTimer("GatherPIC"); + IpplTimings::startTimer(gatherPICTimer); const typename Field::view_type view = f.getView(); const M& mesh = f.get_mesh(); @@ -349,7 +349,7 @@ namespace ippl { + whi[0] * whi[1] * whi[2] * view(i, j, k ); } ); - IpplTimings::stopTimer(gatherTimer); + IpplTimings::stopTimer(gatherPICTimer); } template @@ -359,8 +359,8 @@ namespace ippl { const { //Inform msg("gatherPIF"); - static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("Gather"); - IpplTimings::startTimer(gatherTimer); + static IpplTimings::TimerRef gatherPIFTimer = IpplTimings::getTimer("GatherPIF"); + IpplTimings::startTimer(gatherPIFTimer); using view_type = typename Field::view_type; using vector_type = typename M::vector_type; @@ -444,7 +444,7 @@ namespace ippl { ); - IpplTimings::stopTimer(gatherTimer); + IpplTimings::stopTimer(gatherPIFTimer); } From 9ffde44be7e19ec0b823c473467ba574ea0a8741 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 19 Dec 2022 15:44:30 +0100 Subject: [PATCH 033/117] Twostream instability and Penning trap PinT files added --- alpine/PinT/BumponTailInstabilityPinT.cpp | 717 ++++++++++++++++++++++ alpine/PinT/CMakeLists.txt | 6 + alpine/PinT/ChargedParticlesPinT.hpp | 355 ++++++++++- alpine/PinT/LandauDampingPinT.cpp | 65 +- alpine/PinT/PenningTrapPinT.cpp | 683 +++++++++++++++++++++ 5 files changed, 1790 insertions(+), 36 deletions(-) create mode 100644 alpine/PinT/BumponTailInstabilityPinT.cpp create mode 100644 alpine/PinT/PenningTrapPinT.cpp diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp new file mode 100644 index 000000000..6bc012cbc --- /dev/null +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -0,0 +1,717 @@ +// Parallel-in-time (PinT) method Parareal combined with Particle-in-cell +// and Particle-in-Fourier schemes. The example is electrostatic Landau +// damping. The implementation of Parareal follows the open source implementation +// https://github.com/Parallel-in-Time/PararealF90 by Daniel Ruprecht. The corresponding +// publication is Ruprecht, Daniel. "Shared memory pipelined parareal." +// European Conference on Parallel Processing. Springer, Cham, 2017. +// +// Usage: +// srun ./BumponTailInstability --info 5 +// nmx = No. of Fourier modes in the x-direction +// nmy = No. of Fourier modes in the y-direction +// nmz = No. of Fourier modes in the z-direction +// nx = No. of grid points in the x-direction +// ny = No. of grid points in the y-direction +// nz = No. of grid points in the z-direction +// Np = Total no. of macro-particles in the simulation +// Example: +// srun ./BumponTailInstability 16 16 16 32 32 32 655360 20.0 0.05 0.05 1e-5 100 --info 5 +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPinT.hpp" +#include "StatesBeginSlice.hpp" +#include "StatesEndSlice.hpp" +//#include "LeapFrogPIC.cpp" +//#include "LeapFrogPIF.cpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T k, delta, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& k_, const T& delta_, + const T& u_) + : k(k_), delta(delta_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = x + (delta * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = 1 + (delta * std::cos(k * x)); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type delta, sigma, muBulk, muBeam; + size_type nlocBulk; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + value_type& delta_, T& k_, value_type& sigma_, + value_type& muBulk_, value_type& muBeam_, + size_type& nlocBulk_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + delta(delta_), sigma(sigma_), muBulk(muBulk_), muBeam(muBeam_), + nlocBulk(nlocBulk_), k(k_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + bool isBeam = (i >= nlocBulk); + + value_type muZ = (value_type)(((!isBeam) * muBulk) + (isBeam * muBeam)); + + for (unsigned d = 0; d < Dim-1; ++d) { + + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + v(i)[d] = rand_gen.normal(0.0, sigma); + } + v(i)[Dim-1] = rand_gen.normal(muZ, sigma); + + value_type u = rand_gen.drand(minU[Dim-1], maxU[Dim-1]); + x(i)[Dim-1] = u / (1 + delta); + Newton1D solver(k[Dim-1], delta, u); + solver.solve(x(i)[Dim-1]); + + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + lError = std::sqrt(localError)/std::sqrt(localNorm); + //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; + + + double globaltemp = 0.0; + MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + + double absError = std::sqrt(globaltemp); + + //temp = 0.0; + //Kokkos::parallel_reduce("Q norm", Q.size(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = dot(Qview(i), Qview(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(temp)); + + + globaltemp = 0.0; + MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + + double relError = absError / std::sqrt(globaltemp); + + return relError; + +} + +double computeLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + if(myValError > valLError) valLError = myValError; + + double myValnorm = dot(Qview(i), Qview(i)).apply(); + myValnorm = std::sqrt(myValnorm); + + if(myValnorm > valLnorm) valLnorm = myValnorm; + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); + + Kokkos::fence(); + lError = localError/localNorm; + //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; + + + double globaltemp = 0.0; + MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + + double absError = globaltemp; + + globaltemp = 0.0; + MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + + double relError = absError / globaltemp; + + return relError; + +} + + +double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { + + auto rhoview = rhoPIF.getView(); + auto rhoprevview = rhoPIFprevIter.getView(); + const int nghost = rhoPIF.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rhoPIF.getLayout(); + const Mesh_t& mesh = rhoPIF.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + double AbsError = 0.0; + double Enorm = 0.0; + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ex field error", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& errorSum, + double& fieldSum) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + Dr += kVec[d] * kVec[d]; + } + + double myError = 0.0; + double myField = 0.0; + Kokkos::complex Ek = {0.0, 0.0}; + Kokkos::complex Ekprev = {0.0, 0.0}; + for(size_t d = 0; d < Dim; ++d) { + if(Dr != 0.0) { + Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); + } + Ekprev = Ekprev - Ek; + myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); + myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } + errorSum += myError; + fieldSum += myField; + //Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); + //Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); + //rhokprev = rhokprev - rhok; + //myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); + //errorSum += myError; + //myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); + //fieldSum += myField; + + }, Kokkos::Sum(AbsError), Kokkos::Sum(Enorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&AbsError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double globalNorm = 0.0; + MPI_Allreduce(&Enorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //fieldEnergy *= volume; + + double relError = std::sqrt(globalError)/std::sqrt(globalNorm); + + return relError; +} + + +//const char* TestName = "TwoStreamInstability"; +const char* TestName = "BumponTailInstability"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + Inform msg("TestName"); + Inform msg2all("TestName",INFORM_ALL_NODES); + + ippl::Vector nmPIF = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + ippl::Vector nrPIC = { + std::atoi(argv[4]), + std::atoi(argv[5]), + std::atoi(argv[6]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef timeCommunication = IpplTimings::getTimer("timeCommunication"); + static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); + static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); + static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[7]); + const double tEnd = std::atof(argv[8]); + const double dtSlice = tEnd / Ippl::Comm->size(); + const double dtFine = std::atof(argv[9]); + const double dtCoarse = std::atof(argv[10]); + const unsigned int ntFine = (unsigned int)(dtSlice / dtFine); + const unsigned int ntCoarse = (unsigned int)(dtSlice / dtCoarse); + const double tol = std::atof(argv[11]); + const unsigned int maxIter = std::atoi(argv[12]); + + msg << "dtSlice: " << dtSlice + << "dtSlice/dtFine: " << dtSlice / dtFine + << "(int)dtSlice/dtFine: " << (unsigned int)(dtSlice / dtFine) + << endl; + + const double tStartMySlice = Ippl::Comm->rank() * dtSlice; + //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; + + + using bunch_type = ChargedParticlesPinT; + using states_begin_type = StatesBeginSlice; + using states_end_type = StatesEndSlice; + + std::unique_ptr Pcoarse; + std::unique_ptr Pbegin; + std::unique_ptr Pend; + + ippl::NDIndex domainPIC; + ippl::NDIndex domainPIF; + for (unsigned i = 0; i< Dim; i++) { + domainPIC[i] = ippl::Index(nrPIC[i]); + domainPIF[i] = ippl::Index(nmPIF[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t kw; + double sigma, muBulk, muBeam, epsilon, delta; + + + if(std::strcmp(TestName,"TwoStreamInstability") == 0) { + // Parameters for two stream instability as in + // https://www.frontiersin.org/articles/10.3389/fphy.2018.00105/full + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; + epsilon = 0.5; + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; + } + else if(std::strcmp(TestName,"BumponTailInstability") == 0) { + kw = {0.21, 0.21, 0.21}; + sigma = 1.0 / std::sqrt(2.0); + epsilon = 0.1; + muBulk = 0.0; + muBeam = 4.0; + delta = 0.01; + } + else { + //Default value is two stream instability + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; + epsilon = 0.5; + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; + } + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw ; + double dxPIC = rmax[0] / nrPIC[0]; + double dyPIC = rmax[1] / nrPIC[1]; + double dzPIC = rmax[2] / nrPIC[2]; + + + double dxPIF = rmax[0] / nmPIF[0]; + double dyPIF = rmax[1] / nmPIF[1]; + double dzPIF = rmax[2] / nmPIF[2]; + Vector_t hrPIC = {dxPIC, dyPIC, dzPIC}; + Vector_t hrPIF = {dxPIF, dyPIF, dzPIF}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + + const bool isAllPeriodic=true; + Mesh_t meshPIC(domainPIC, hrPIC, origin); + Mesh_t meshPIF(domainPIF, hrPIF, origin); + FieldLayout_t FLPIC(domainPIC, decomp, isAllPeriodic); + FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); + PLayout_t PL(FLPIC, meshPIC); + + //Q = -\int\int f dx dv + double Q = -rmax[0] * rmax[1] * rmax[2]; + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); + + Pcoarse->nr_m = nrPIC; + + Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); + + Pcoarse->initFFTSolver(); + Pcoarse->time_m = tStartMySlice; + + IpplTimings::startTimer(particleCreation); + + Vector_t minU, maxU; + for (unsigned d = 0; d create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + using buffer_type = ippl::Communicate::buffer_type; + int tag; +#ifdef KOKKOS_ENABLE_CUDA + //If we don't do the following even with the same seed the initial + //condition is not the same on different GPUs + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(Ippl::Comm->rank() == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, delta, kw, + sigma, muBulk, muBeam, nlocBulk, minU, maxU)); + + + Kokkos::fence(); + size_type bufSize = Pbegin->packedSize(nloc); + std::vector requests(0); + int sends = 0; + for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); + requests.resize(requests.size() + 1); + Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); + buf->resetWritePos(); + ++sends; + } + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + } + else { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + } + Ippl::Comm->barrier(); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); +#else + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, delta, kw, + sigma, muBulk, muBeam, nlocBulk, minU, maxU)); + + + Kokkos::fence(); + Ippl::Comm->barrier(); +#endif + + + msg << "Parareal Bump on tail instability" + << endl + << "Slice dT: " << dtSlice + << endl + << "No. of fine time steps: " << ntFine + << endl + << "No. of coarse time steps: " << ntCoarse + << endl + << "Tolerance: " << tol + << " Max. iterations: " << maxIter + << endl + << "Np= " << nloc + << " Fourier modes = " << nmPIF + << " Grid points = " << nrPIC + << endl; + + Pcoarse->q = Pcoarse->Q_m/nloc; + IpplTimings::stopTimer(particleCreation); + + msg << "particles created and initial conditions assigned " << endl; + + //Copy initial conditions as they are needed later + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + //Get initial guess for ranks other than 0 by propagating the coarse solver + IpplTimings::startTimer(coarsePropagator); + if (Ippl::Comm->rank() > 0) { + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); + } + + Ippl::Comm->barrier(); + + IpplTimings::stopTimer(coarsePropagator); + + msg << "First Leap frog PIC done " << endl; + + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + //Run the coarse integrator to get the values at the end of the time slice + IpplTimings::startTimer(coarsePropagator); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + IpplTimings::stopTimer(coarsePropagator); + msg << "Second Leap frog PIC done " << endl; + + //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + + //The following might not be needed + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + msg << "Starting parareal iterations ..." << endl; + bool isConverged = false; + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); + //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, + // dtFine, isConverged, tStartMySlice, 0); + //Ippl::Comm->barrier(); + for (unsigned int it=0; itLeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1); + IpplTimings::stopTimer(finePropagator); + + + //if(isConverged) { + + //test with the serial solution + //Pcoarse->LeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, + // dtFine, isConverged, tStartMySlice, it+1); + //Ippl::Comm->barrier(); + //double Rerror = computeL2Error(Pcoarse->R0, Pbegin->R, it+1, Ippl::Comm->rank()); + //double Perror = computeL2Error(Pcoarse->P0, Pbegin->P, it+1, Ippl::Comm->rank()); + //msg << "Finished iteration: " << it+1 + //<< " Rerror: " << Rerror + //<< " Perror: " << Perror + //<< endl; + // break; + //} + + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(Ippl::Comm->rank() > 0) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + } + else { + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + IpplTimings::stopTimer(coarsePropagator); + + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; + + IpplTimings::startTimer(timeCommunication); + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + + //Pcoarse->EfieldPICprevIter_m = Pcoarse->EfieldPICprevIter_m - Pcoarse->EfieldPIC_m; + //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPICprevIter_m, Pcoarse->EfieldPICprevIter_m); + //double absFieldError = std::sqrt(Pcoarse->rhoPIC_m.sum()); + //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPIC_m, Pcoarse->EfieldPIC_m); + //double EfieldNorm = std::sqrt(Pcoarse->rhoPIC_m.sum()); + //double EfieldError = absFieldError / EfieldNorm; + + IpplTimings::startTimer(computeErrors); + double localRerror, localPerror; + double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + //double Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + //double Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + double EfieldError = 0; + if(it > 0) { + EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); + } + IpplTimings::stopTimer(computeErrors); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); + IpplTimings::stopTimer(deepCopy); + + msg << "Finished iteration: " << it+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << " Efield error: " << EfieldError + //<< " Rhofield error: " << EfieldError + << endl; + + IpplTimings::startTimer(dumpData); + Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(localRerror, localPerror, it+1); + IpplTimings::stopTimer(dumpData); + //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + + if((Rerror <= tol) && (Perror <= tol)) { + //if(Perror <= tol) { + break; + } + } + + msg << "Twostream instability Parareal: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + return 0; +} diff --git a/alpine/PinT/CMakeLists.txt b/alpine/PinT/CMakeLists.txt index f73338484..73976aa27 100644 --- a/alpine/PinT/CMakeLists.txt +++ b/alpine/PinT/CMakeLists.txt @@ -16,6 +16,12 @@ set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) add_executable (LandauDampingPinT LandauDampingPinT.cpp) target_link_libraries (LandauDampingPinT ${IPPL_LIBS}) +add_executable (BumponTailInstabilityPinT BumponTailInstabilityPinT.cpp) +target_link_libraries (BumponTailInstabilityPinT ${IPPL_LIBS}) + +add_executable (PenningTrapPinT PenningTrapPinT.cpp) +target_link_libraries (PenningTrapPinT ${IPPL_LIBS}) + # vi: set et ts=4 sw=4 sts=4: # Local Variables: diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 59f8aeaed..166ea1d29 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -217,36 +217,11 @@ class ChargedParticlesPinT : public ippl::ParticleBase { - void dumpLandau(size_type /*totalP*/, const unsigned int& iter) { + void dumpLandau(const unsigned int& iter) { double fieldEnergy = 0.0; double ExAmp = 0.0; - //auto Eview = E.getView(); - //double temp = 0.0; - - //Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), - // KOKKOS_LAMBDA(const int i, double& valL){ - // double myVal = Eview(i)[0] * Eview(i)[0]; - // valL += myVal; - // }, Kokkos::Sum(temp)); - - ////double globaltemp = 0.0; - //double globaltemp = temp; - ////MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //fieldEnergy = globaltemp * volume / totalP ; - - //double tempMax = 0.0; - //Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), - // KOKKOS_LAMBDA(const size_t i, double& valL) - // { - // double myVal = std::fabs(Eview(i)[0]); - // if(myVal > valL) valL = myVal; - // }, Kokkos::Max(tempMax)); - ////ExAmp = 0.0; - //ExAmp = tempMax; - ////MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); auto rhoview = rhoPIF_m.getView(); const int nghost = rhoPIF_m.getNghost(); @@ -320,15 +295,98 @@ class ChargedParticlesPinT : public ippl::ParticleBase { csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); - //if(time_m == 0.0) { - // csvout << "time, Ex_field_energy, Ex_max_norm" << endl; - //} csvout << time_m << " " << fieldEnergy << " " << ExAmp << endl; } + void dumpBumponTail(const unsigned int& iter) { + + + double fieldEnergy = 0.0; + double EzAmp = 0.0; + + auto rhoview = rhoPIF_m.getView(); + const int nghost = rhoPIF_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rhoPIF_m.getLayout(); + const Mesh_t& mesh = rhoPIF_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ez energy and Max", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& tlSum, + double& tlMax) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + if(Dr != 0.0) { + Ek = -(imag * kVec[2] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + } + double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + + tlSum += myVal; + + double myValMax = std::sqrt(myVal); + + if(myValMax > tlMax) tlMax = myValMax; + + }, Kokkos::Sum(fieldEnergy), Kokkos::Max(EzAmp)); + + + Kokkos::fence(); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy *= volume; + + + std::stringstream fname; + fname << "data/FieldBumponTail_"; + fname << Ippl::Comm->rank(); + fname << "_iter_"; + fname << iter; + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + + csvout << time_m << " " + << fieldEnergy << " " + << EzAmp << endl; + } + + + void dumpEnergy(size_type /*totalP*/, const unsigned int& iter, ParticleAttrib& Ptemp) { @@ -441,6 +499,28 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } + void writelocalError(double Rerror, double Perror, unsigned int iter) { + + std::stringstream fname; + fname << "data/localError_"; + fname << Ippl::Comm->rank(); + fname << ".csv"; + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(iter == 1) { + csvout << "Iter, Rerror, Perror" << endl; + } + + csvout << iter << " " + << Rerror << " " + << Perror << endl; + + } + + void writeError(double Rerror, double Perror, unsigned int iter) { if(Ippl::Comm->rank() == 0) { @@ -570,6 +650,113 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } + void BorisPIC(ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int nt, + const double dt, const double& tStartMySlice, const double& Bext) { + + static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); + PLayout& PL = this->getLayout(); + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); + rhoPIC_m = 0.0; + scatter(q, rhoPIC_m, Rtemp); + + rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); + rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + + //Field solve + solver_mp->solve(); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); + + time_m = tStartMySlice; + + //dumpLandauPIC(); + double alpha = -0.5 * dt; + double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); + Vector_t rmax = rmax_m; + + for (unsigned int it=0; itgetLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(Rview(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eview(j)[0] += Eext_x; + Eview(j)[1] += Eext_y; + Eview(j)[2] += Eext_z; + + Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eview(j)[2]; + }); + + //drift + Rtemp = Rtemp + dt * Ptemp; + + //Apply particle BC + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); + + //scatter the charge onto the underlying grid + rhoPIC_m = 0.0; + scatter(q, rhoPIC_m, Rtemp); + + + rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); + rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + + //Field solve + IpplTimings::startTimer(fieldSolvePIC); + solver_mp->solve(); + IpplTimings::stopTimer(fieldSolvePIC); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); + + //kick + auto R2view = Rtemp.getView(); + auto P2view = Ptemp.getView(); + auto E2view = E.getView(); + Kokkos::parallel_for("Kick2", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + E2view(j)[0] += Eext_x; + E2view(j)[1] += Eext_y; + E2view(j)[2] += Eext_z; + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] + + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] + - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); + P2view(j)[2] += alpha * E2view(j)[2]; + }); + + time_m += dt; + //dumpLandauPIC(); + } + + } + + + void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, const double& dt, const bool& /*isConverged*/, @@ -591,7 +778,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); - dumpLandau(this->getLocalNum(), iter); + //dumpLandau(iter); + dumpBumponTail(iter); dumpEnergy(this->getLocalNum(), iter, Ptemp); IpplTimings::stopTimer(dumpData); } @@ -623,7 +811,112 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; IpplTimings::startTimer(dumpData); - dumpLandau(this->getLocalNum(), iter); + //dumpLandau(iter); + dumpBumponTail(iter); + dumpEnergy(this->getLocalNum(), iter, Ptemp); + IpplTimings::stopTimer(dumpData); + + } + } + + + void BorisPIF(ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int& nt, + const double& dt, const bool& /*isConverged*/, + const double& tStartMySlice, const unsigned int& iter, const double& Bext) { + + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + PLayout& PL = this->getLayout(); + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); + rhoPIF_m = {0.0, 0.0}; + scatterPIF(q, rhoPIF_m, Rtemp); + + rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + + // Solve for and gather E field + gatherPIF(E, rhoPIF_m, Rtemp); + + time_m = tStartMySlice; + + if((time_m == 0.0)) { + IpplTimings::startTimer(dumpData); + dumpEnergy(this->getLocalNum(), iter, Ptemp); + IpplTimings::stopTimer(dumpData); + } + double alpha = -0.5 * dt; + double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); + Vector_t rmax = rmax_m; + for (unsigned int it=0; itgetLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(Rview(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eview(j)[0] += Eext_x; + Eview(j)[1] += Eext_y; + Eview(j)[2] += Eext_z; + + Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eview(j)[2]; + }); + + //drift + Rtemp = Rtemp + dt * Ptemp; + + //Apply particle BC + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //checkBounds(Rtemp); + + //scatter the charge onto the underlying grid + rhoPIF_m = {0.0, 0.0}; + scatterPIF(q, rhoPIF_m, Rtemp); + + rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + + // Solve for and gather E field + gatherPIF(E, rhoPIF_m, Rtemp); + + //kick + auto R2view = Rtemp.getView(); + auto P2view = Ptemp.getView(); + auto E2view = E.getView(); + Kokkos::parallel_for("Kick2", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + E2view(j)[0] += Eext_x; + E2view(j)[1] += Eext_y; + E2view(j)[2] += Eext_z; + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] + + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] + - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); + P2view(j)[2] += alpha * E2view(j)[2]; + }); + + time_m += dt; + + IpplTimings::startTimer(dumpData); dumpEnergy(this->getLocalNum(), iter, Ptemp); IpplTimings::stopTimer(dumpData); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 3b3aabee0..01b10d334 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -137,7 +137,7 @@ struct generate_random { double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/) { + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -154,7 +154,9 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - //std::cout << "Rank: " << myrank << " Iter: " << iter << " Abs. Error: " << localError << std::endl; + lError = std::sqrt(localError)/std::sqrt(localNorm); + //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; + double globaltemp = 0.0; MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); @@ -178,6 +180,48 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr } +double computeLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + if(myValError > valLError) valLError = myValError; + + double myValnorm = dot(Qview(i), Qview(i)).apply(); + myValnorm = std::sqrt(myValnorm); + + if(myValnorm > valLnorm) valLnorm = myValnorm; + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); + + Kokkos::fence(); + lError = localError/localNorm; + //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; + + + double globaltemp = 0.0; + MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + + double absError = globaltemp; + + globaltemp = 0.0; + MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + + double relError = absError / globaltemp; + + return relError; + +} + + double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { auto rhoview = rhoPIF.getView(); @@ -460,9 +504,11 @@ int main(int argc, char *argv[]){ if (Ippl::Comm->rank() > 0) { Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); } + + Ippl::Comm->barrier(); + IpplTimings::stopTimer(coarsePropagator); - Ippl::Comm->barrier(); msg << "First Leap frog PIC done " << endl; @@ -489,6 +535,11 @@ int main(int argc, char *argv[]){ msg << "Starting parareal iterations ..." << endl; bool isConverged = false; + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); + //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, + // dtFine, isConverged, tStartMySlice, 0); + //Ippl::Comm->barrier(); for (unsigned int it=0; itR, Pcoarse->RprevIter, it+1, Ippl::Comm->rank()); - double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank()); + double localRerror, localPerror; + double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + //double Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + //double Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); double EfieldError = 0; if(it > 0) { @@ -589,6 +643,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(localRerror, localPerror, it+1); IpplTimings::stopTimer(dumpData); //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp new file mode 100644 index 000000000..962698396 --- /dev/null +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -0,0 +1,683 @@ +// Parallel-in-time (PinT) method Parareal combined with Particle-in-cell +// and Particle-in-Fourier schemes. The example is electrostatic Landau +// damping. The implementation of Parareal follows the open source implementation +// https://github.com/Parallel-in-Time/PararealF90 by Daniel Ruprecht. The corresponding +// publication is Ruprecht, Daniel. "Shared memory pipelined parareal." +// European Conference on Parallel Processing. Springer, Cham, 2017. +// +// Usage: +// srun ./PenningTrap --info 5 +// nmx = No. of Fourier modes in the x-direction +// nmy = No. of Fourier modes in the y-direction +// nmz = No. of Fourier modes in the z-direction +// nx = No. of grid points in the x-direction +// ny = No. of grid points in the y-direction +// nz = No. of grid points in the z-direction +// Np = Total no. of macro-particles in the simulation +// Example: +// srun ./PenningTrap 16 16 16 32 32 32 655360 20.0 0.05 0.05 1e-5 100 --info 5 +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPinT.hpp" +#include "StatesBeginSlice.hpp" +#include "StatesEndSlice.hpp" +//#include "LeapFrogPIC.cpp" +//#include "LeapFrogPIF.cpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T mu, sigma, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& mu_, const T& sigma_, + const T& u_) + : mu(mu_), sigma(sigma_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = std::erf((x - mu)/(sigma * std::sqrt(2.0))) + - 2 * u + 1; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = (1 / sigma) * std::sqrt(2 / pi) * + std::exp(-0.5 * (std::pow(((x - mu) / sigma),2))); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while ((iterations < max_iter) && (std::fabs(f(x)) > tol)) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + T mu, sigma, minU, maxU; + + double pi = std::acos(-1.0); + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + T& mu_, T& sigma_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + mu(mu_), sigma(sigma_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = (std::sqrt(pi / 2) * (2 * u - 1)) * + sigma[d] + mu[d]; + Newton1D solver(mu[d], sigma[d], u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + + +double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + lError = std::sqrt(localError)/std::sqrt(localNorm); + //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; + + + double globaltemp = 0.0; + MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + + double absError = std::sqrt(globaltemp); + + //temp = 0.0; + //Kokkos::parallel_reduce("Q norm", Q.size(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = dot(Qview(i), Qview(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(temp)); + + + globaltemp = 0.0; + MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + + double relError = absError / std::sqrt(globaltemp); + + return relError; + +} + +double computeLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + if(myValError > valLError) valLError = myValError; + + double myValnorm = dot(Qview(i), Qview(i)).apply(); + myValnorm = std::sqrt(myValnorm); + + if(myValnorm > valLnorm) valLnorm = myValnorm; + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); + + Kokkos::fence(); + lError = localError/localNorm; + //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; + + + double globaltemp = 0.0; + MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + + double absError = globaltemp; + + globaltemp = 0.0; + MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + + double relError = absError / globaltemp; + + return relError; + +} + + +double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { + + auto rhoview = rhoPIF.getView(); + auto rhoprevview = rhoPIFprevIter.getView(); + const int nghost = rhoPIF.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rhoPIF.getLayout(); + const Mesh_t& mesh = rhoPIF.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + double AbsError = 0.0; + double Enorm = 0.0; + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ex field error", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& errorSum, + double& fieldSum) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + Dr += kVec[d] * kVec[d]; + } + + double myError = 0.0; + double myField = 0.0; + Kokkos::complex Ek = {0.0, 0.0}; + Kokkos::complex Ekprev = {0.0, 0.0}; + for(size_t d = 0; d < Dim; ++d) { + if(Dr != 0.0) { + Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); + } + Ekprev = Ekprev - Ek; + myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); + myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } + errorSum += myError; + fieldSum += myField; + //Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); + //Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); + //rhokprev = rhokprev - rhok; + //myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); + //errorSum += myError; + //myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); + //fieldSum += myField; + + }, Kokkos::Sum(AbsError), Kokkos::Sum(Enorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&AbsError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double globalNorm = 0.0; + MPI_Allreduce(&Enorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //fieldEnergy *= volume; + + double relError = std::sqrt(globalError)/std::sqrt(globalNorm); + + return relError; +} + + +const char* TestName = "PenningTrap"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + Inform msg(TestName); + Inform msg2all(TestName,INFORM_ALL_NODES); + + ippl::Vector nmPIF = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + ippl::Vector nrPIC = { + std::atoi(argv[4]), + std::atoi(argv[5]), + std::atoi(argv[6]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef timeCommunication = IpplTimings::getTimer("timeCommunication"); + static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); + static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); + static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[7]); + const double tEnd = std::atof(argv[8]); + const double dtSlice = tEnd / Ippl::Comm->size(); + const double dtFine = std::atof(argv[9]); + const double dtCoarse = std::atof(argv[10]); + const unsigned int ntFine = (unsigned int)(dtSlice / dtFine); + const unsigned int ntCoarse = (unsigned int)(dtSlice / dtCoarse); + const double tol = std::atof(argv[11]); + const unsigned int maxIter = std::atoi(argv[12]); + + msg << "dtSlice: " << dtSlice + << "dtSlice/dtFine: " << dtSlice / dtFine + << "(int)dtSlice/dtFine: " << (unsigned int)(dtSlice / dtFine) + << endl; + + const double tStartMySlice = Ippl::Comm->rank() * dtSlice; + //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; + + + using bunch_type = ChargedParticlesPinT; + using states_begin_type = StatesBeginSlice; + using states_end_type = StatesEndSlice; + + std::unique_ptr Pcoarse; + std::unique_ptr Pbegin; + std::unique_ptr Pend; + + ippl::NDIndex domainPIC; + ippl::NDIndex domainPIF; + for (unsigned i = 0; i< Dim; i++) { + domainPIC[i] = ippl::Index(nrPIC[i]); + domainPIF[i] = ippl::Index(nmPIF[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t rmin(0.0); + Vector_t rmax(20.0); + double dxPIC = rmax[0] / nrPIC[0]; + double dyPIC = rmax[1] / nrPIC[1]; + double dzPIC = rmax[2] / nrPIC[2]; + + Vector_t length = rmax - rmin; + + Vector_t mu, sd; + + for (unsigned d = 0; d(PL,hrPIC,rmin,rmax,decomp,Q); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); + + Pcoarse->nr_m = nrPIC; + + Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); + + Pcoarse->initFFTSolver(); + Pcoarse->time_m = tStartMySlice; + + IpplTimings::startTimer(particleCreation); + + Vector_t minU, maxU; + for (unsigned d = 0; d create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + using buffer_type = ippl::Communicate::buffer_type; + int tag; +#ifdef KOKKOS_ENABLE_CUDA + //If we don't do the following even with the same seed the initial + //condition is not the same on different GPUs + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(Ippl::Comm->rank() == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, + minU, maxU)); + + + Kokkos::fence(); + size_type bufSize = Pbegin->packedSize(nloc); + std::vector requests(0); + int sends = 0; + for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); + requests.resize(requests.size() + 1); + Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); + buf->resetWritePos(); + ++sends; + } + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + } + else { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + } + Ippl::Comm->barrier(); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); +#else + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, mu, sd, + minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); +#endif + + + msg << "Parareal Penning trap" + << endl + << "Slice dT: " << dtSlice + << endl + << "No. of fine time steps: " << ntFine + << endl + << "No. of coarse time steps: " << ntCoarse + << endl + << "Tolerance: " << tol + << " Max. iterations: " << maxIter + << endl + << "Np= " << nloc + << " Fourier modes = " << nmPIF + << " Grid points = " << nrPIC + << endl; + + Pcoarse->q = Pcoarse->Q_m/nloc; + IpplTimings::stopTimer(particleCreation); + + msg << "particles created and initial conditions assigned " << endl; + + //Copy initial conditions as they are needed later + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + //Get initial guess for ranks other than 0 by propagating the coarse solver + IpplTimings::startTimer(coarsePropagator); + if (Ippl::Comm->rank() > 0) { + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); + } + + Ippl::Comm->barrier(); + + IpplTimings::stopTimer(coarsePropagator); + + msg << "First Boris PIC done " << endl; + + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + //Run the coarse integrator to get the values at the end of the time slice + IpplTimings::startTimer(coarsePropagator); + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); + IpplTimings::stopTimer(coarsePropagator); + msg << "Second Boris PIC done " << endl; + + //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + + //The following might not be needed + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + msg << "Starting parareal iterations ..." << endl; + bool isConverged = false; + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); + //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, + // dtFine, isConverged, tStartMySlice, 0); + //Ippl::Comm->barrier(); + for (unsigned int it=0; itBorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1, Bext); + IpplTimings::stopTimer(finePropagator); + + + //if(isConverged) { + + //test with the serial solution + //Pcoarse->LeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, + // dtFine, isConverged, tStartMySlice, it+1); + //Ippl::Comm->barrier(); + //double Rerror = computeL2Error(Pcoarse->R0, Pbegin->R, it+1, Ippl::Comm->rank()); + //double Perror = computeL2Error(Pcoarse->P0, Pbegin->P, it+1, Ippl::Comm->rank()); + //msg << "Finished iteration: " << it+1 + //<< " Rerror: " << Rerror + //<< " Perror: " << Perror + //<< endl; + // break; + //} + + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(Ippl::Comm->rank() > 0) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + } + else { + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); + IpplTimings::stopTimer(coarsePropagator); + + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; + + IpplTimings::startTimer(timeCommunication); + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + + //Pcoarse->EfieldPICprevIter_m = Pcoarse->EfieldPICprevIter_m - Pcoarse->EfieldPIC_m; + //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPICprevIter_m, Pcoarse->EfieldPICprevIter_m); + //double absFieldError = std::sqrt(Pcoarse->rhoPIC_m.sum()); + //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPIC_m, Pcoarse->EfieldPIC_m); + //double EfieldNorm = std::sqrt(Pcoarse->rhoPIC_m.sum()); + //double EfieldError = absFieldError / EfieldNorm; + + IpplTimings::startTimer(computeErrors); + double localRerror, localPerror; + double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + //double Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + //double Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + double EfieldError = 0; + if(it > 0) { + EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); + } + IpplTimings::stopTimer(computeErrors); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); + IpplTimings::stopTimer(deepCopy); + + msg << "Finished iteration: " << it+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << " Efield error: " << EfieldError + //<< " Rhofield error: " << EfieldError + << endl; + + IpplTimings::startTimer(dumpData); + Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(localRerror, localPerror, it+1); + IpplTimings::stopTimer(dumpData); + //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + + if((Rerror <= tol) && (Perror <= tol)) { + //if(Perror <= tol) { + break; + } + } + + msg << "Penning trap Parareal: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + return 0; +} From cdb062dea481ab0734b189f92a26cb5c760da819 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 20 Dec 2022 12:35:10 +0100 Subject: [PATCH 034/117] Penningtrap IC generation fixed with CDF --- alpine/ElectrostaticPIC/PenningTrap.cpp | 6 +++--- alpine/PinT/BumponTailInstabilityPinT.cpp | 14 ++++++++++++-- alpine/PinT/ChargedParticlesPinT.hpp | 1 + alpine/PinT/LandauDampingPinT.cpp | 10 ++++++++-- alpine/PinT/PenningTrapPinT.cpp | 10 +++++++--- 5 files changed, 31 insertions(+), 10 deletions(-) diff --git a/alpine/ElectrostaticPIC/PenningTrap.cpp b/alpine/ElectrostaticPIC/PenningTrap.cpp index fbbdfd3d9..9ea440176 100644 --- a/alpine/ElectrostaticPIC/PenningTrap.cpp +++ b/alpine/ElectrostaticPIC/PenningTrap.cpp @@ -213,9 +213,9 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - unsigned int nrMax = 2048;// Max grid size in our studies - double dxFinest = rmax[0] / nrMax; - const double dt = 0.5 * dxFinest;//size of timestep + //unsigned int nrMax = 2048;// Max grid size in our studies + //double dxFinest = rmax[0] / nrMax; + const double dt = 0.05;//0.5 * dxFinest;//size of timestep const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 6bc012cbc..4df725214 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -147,6 +147,14 @@ struct generate_random { } }; +double CDF(const double& x, const double& delta, const double& k, + const unsigned& dim) { + + bool isDimZ = (dim == (Dim-1)); + double cdf = x + (double)(isDimZ * ((delta / k) * std::sin(k * x))); + return cdf; +} + double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { @@ -462,8 +470,10 @@ int main(int argc, char *argv[]){ Vector_t minU, maxU; for (unsigned d = 0; d { double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); Vector_t rmax = rmax_m; + for (unsigned int it=0; it& Q, ParticleAttrib& QprevIter, const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { @@ -431,8 +435,10 @@ int main(int argc, char *argv[]){ Vector_t minU, maxU; for (unsigned d = 0; d & Q, ParticleAttrib& QprevIter, const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { @@ -396,7 +401,6 @@ int main(int argc, char *argv[]){ sd[2] = 0.20*length[2]; - double dxPIF = rmax[0] / nmPIF[0]; double dyPIF = rmax[1] / nmPIF[1]; double dzPIF = rmax[2] / nmPIF[2]; @@ -433,8 +437,8 @@ int main(int argc, char *argv[]){ Vector_t minU, maxU; for (unsigned d = 0; d Date: Wed, 21 Dec 2022 09:01:58 +0100 Subject: [PATCH 035/117] ceil used for number of coarse and fine time steps --- alpine/PinT/BumponTailInstabilityPinT.cpp | 21 +++++++++------------ alpine/PinT/LandauDampingPinT.cpp | 4 ++-- alpine/PinT/PenningTrapPinT.cpp | 4 ++-- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 4df725214..ebb8d7abd 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -324,14 +324,14 @@ double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { } -//const char* TestName = "TwoStreamInstability"; -const char* TestName = "BumponTailInstability"; +const char* TestName = "TwoStreamInstability"; +//const char* TestName = "BumponTailInstability"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); - Inform msg("TestName"); - Inform msg2all("TestName",INFORM_ALL_NODES); + Inform msg(TestName); + Inform msg2all(TestName,INFORM_ALL_NODES); ippl::Vector nmPIF = { std::atoi(argv[1]), @@ -361,15 +361,11 @@ int main(int argc, char *argv[]){ const double dtSlice = tEnd / Ippl::Comm->size(); const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); - const unsigned int ntFine = (unsigned int)(dtSlice / dtFine); - const unsigned int ntCoarse = (unsigned int)(dtSlice / dtCoarse); + const unsigned int ntFine = std::ceil(dtSlice / dtFine); + const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); const unsigned int maxIter = std::atoi(argv[12]); - msg << "dtSlice: " << dtSlice - << "dtSlice/dtFine: " << dtSlice / dtFine - << "(int)dtSlice/dtFine: " << (unsigned int)(dtSlice / dtFine) - << endl; const double tStartMySlice = Ippl::Comm->rank() * dtSlice; //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; @@ -537,7 +533,8 @@ int main(int argc, char *argv[]){ #endif - msg << "Parareal Bump on tail instability" + msg << "Parareal " + << TestName << endl << "Slice dT: " << dtSlice << endl @@ -718,7 +715,7 @@ int main(int argc, char *argv[]){ } } - msg << "Twostream instability Parareal: End." << endl; + msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index ea6a987b0..48a85bb7f 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -345,8 +345,8 @@ int main(int argc, char *argv[]){ const double dtSlice = tEnd / Ippl::Comm->size(); const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); - const unsigned int ntFine = (unsigned int)(dtSlice / dtFine); - const unsigned int ntCoarse = (unsigned int)(dtSlice / dtCoarse); + const unsigned int ntFine = std::ceil(dtSlice / dtFine); + const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); const unsigned int maxIter = std::atoi(argv[12]); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index e1335e74f..bf9ab6586 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -348,8 +348,8 @@ int main(int argc, char *argv[]){ const double dtSlice = tEnd / Ippl::Comm->size(); const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); - const unsigned int ntFine = (unsigned int)(dtSlice / dtFine); - const unsigned int ntCoarse = (unsigned int)(dtSlice / dtCoarse); + const unsigned int ntFine = std::ceil(dtSlice / dtFine); + const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); const unsigned int maxIter = std::atoi(argv[12]); From a7112e7b82b8c34bf94a506addb5f2101a1633aa Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 23 Dec 2022 14:45:00 +0100 Subject: [PATCH 036/117] Stopping criteria changed in all mini-apps --- alpine/PinT/BumponTailInstabilityPinT.cpp | 104 ++++++++----------- alpine/PinT/ChargedParticlesPinT.hpp | 8 +- alpine/PinT/LandauDampingPinT.cpp | 116 +++++++++++----------- alpine/PinT/PenningTrapPinT.cpp | 102 ++++++++----------- src/Ippl.cpp | 2 +- 5 files changed, 143 insertions(+), 189 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index ebb8d7abd..fe3ce5c7a 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -226,15 +226,16 @@ double computeLinfError(ParticleAttrib& Q, ParticleAttrib& Q //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - double globaltemp = 0.0; - MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + //double globaltemp = 0.0; + //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - double absError = globaltemp; + //double absError = globaltemp; - globaltemp = 0.0; - MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + //globaltemp = 0.0; + //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - double relError = absError / globaltemp; + //double relError = absError / globaltemp; + double relError = lError; return relError; @@ -330,7 +331,7 @@ const char* TestName = "TwoStreamInstability"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); - Inform msg(TestName); + Inform msg(TestName, Ippl::Comm->size()-1); Inform msg2all(TestName,INFORM_ALL_NODES); ippl::Vector nmPIF = { @@ -468,8 +469,6 @@ int main(int argc, char *argv[]){ for (unsigned d = 0; d RprevIter.getView(), Pcoarse->R0.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); - //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, - // dtFine, isConverged, tStartMySlice, 0); - //Ippl::Comm->barrier(); + bool isPreviousDomainConverged; + if(Ippl::Comm->rank() == 0) { + isPreviousDomainConverged = true; + } + else { + isPreviousDomainConverged = false; + } for (unsigned int it=0; itLeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, - // dtFine, isConverged, tStartMySlice, it+1); - //Ippl::Comm->barrier(); - //double Rerror = computeL2Error(Pcoarse->R0, Pbegin->R, it+1, Ippl::Comm->rank()); - //double Perror = computeL2Error(Pcoarse->P0, Pbegin->P, it+1, Ippl::Comm->rank()); - //msg << "Finished iteration: " << it+1 - //<< " Rerror: " << Rerror - //<< " Perror: " << Perror - //<< endl; - // break; - //} - //Difference = Fine - Coarse Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; @@ -636,20 +622,25 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(timeCommunication); tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if(Ippl::Comm->rank() > 0) { + if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); - } - else { - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, + Ippl::getComm(), MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); } IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); @@ -661,6 +652,18 @@ int main(int argc, char *argv[]){ Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; + IpplTimings::startTimer(computeErrors); + double localRerror, localPerror; + double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + IpplTimings::stopTimer(computeErrors); + + if((Rerror <= tol) && (Perror <= tol)) { + isConverged = true; + } + + IpplTimings::startTimer(timeCommunication); if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { size_type bufSize = Pend->packedSize(nloc); @@ -669,52 +672,27 @@ int main(int argc, char *argv[]){ Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); } IpplTimings::stopTimer(timeCommunication); - //Pcoarse->EfieldPICprevIter_m = Pcoarse->EfieldPICprevIter_m - Pcoarse->EfieldPIC_m; - //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPICprevIter_m, Pcoarse->EfieldPICprevIter_m); - //double absFieldError = std::sqrt(Pcoarse->rhoPIC_m.sum()); - //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPIC_m, Pcoarse->EfieldPIC_m); - //double EfieldNorm = std::sqrt(Pcoarse->rhoPIC_m.sum()); - //double EfieldError = absFieldError / EfieldNorm; - - IpplTimings::startTimer(computeErrors); - double localRerror, localPerror; - double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - //double Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - //double Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - - double EfieldError = 0; - if(it > 0) { - EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); - } - IpplTimings::stopTimer(computeErrors); - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); - IpplTimings::stopTimer(deepCopy); msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror - << " Efield error: " << EfieldError - //<< " Rhofield error: " << EfieldError << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writeError(Rerror, Perror, it+1); + //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(localRerror, localPerror, it+1); IpplTimings::stopTimer(dumpData); - //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - if((Rerror <= tol) && (Perror <= tol)) { - //if(Perror <= tol) { + if(isConverged && isPreviousDomainConverged) { break; } } + Ippl::Comm->barrier(); msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 7cad84807..c1bd3c611 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -779,8 +779,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); - //dumpLandau(iter); - dumpBumponTail(iter); + dumpLandau(iter); + //dumpBumponTail(iter); dumpEnergy(this->getLocalNum(), iter, Ptemp); IpplTimings::stopTimer(dumpData); } @@ -812,8 +812,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; IpplTimings::startTimer(dumpData); - //dumpLandau(iter); - dumpBumponTail(iter); + dumpLandau(iter); + //dumpBumponTail(iter); dumpEnergy(this->getLocalNum(), iter, Ptemp); IpplTimings::stopTimer(dumpData); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 48a85bb7f..1fe18d756 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -211,15 +211,16 @@ double computeLinfError(ParticleAttrib& Q, ParticleAttrib& Q //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - double globaltemp = 0.0; - MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + //double globaltemp = 0.0; + //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - double absError = globaltemp; + //double absError = globaltemp; - globaltemp = 0.0; - MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + //globaltemp = 0.0; + //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - double relError = absError / globaltemp; + //double relError = absError / globaltemp; + double relError = lError; return relError; @@ -314,8 +315,8 @@ const char* TestName = "LandauDampingPinT"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); - Inform msg("LandauDampingPinT"); - Inform msg2all("LandauDampingPinT",INFORM_ALL_NODES); + Inform msg(TestName, Ippl::Comm->size()-1); + Inform msg2all(TestName,INFORM_ALL_NODES); ippl::Vector nmPIF = { std::atoi(argv[1]), @@ -353,7 +354,8 @@ int main(int argc, char *argv[]){ const double tStartMySlice = Ippl::Comm->rank() * dtSlice; //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; - msg << "Parareal Landau damping" + msg << "Parareal " + << TestName << endl << "Slice dT: " << dtSlice << endl @@ -530,7 +532,6 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(coarsePropagator); msg << "Second Leap frog PIC done " << endl; - //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); //The following might not be needed IpplTimings::startTimer(deepCopy); @@ -540,12 +541,21 @@ int main(int argc, char *argv[]){ msg << "Starting parareal iterations ..." << endl; - bool isConverged = false; //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, // dtFine, isConverged, tStartMySlice, 0); //Ippl::Comm->barrier(); + bool isConverged = false; + bool isPreviousDomainConverged; + if(Ippl::Comm->rank() == 0) { + isPreviousDomainConverged = true; + } + else { + isPreviousDomainConverged = false; + } + + //unsigned int maxIterRank; for (unsigned int it=0; itLeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, - // dtFine, isConverged, tStartMySlice, it+1); - //Ippl::Comm->barrier(); - //double Rerror = computeL2Error(Pcoarse->R0, Pbegin->R, it+1, Ippl::Comm->rank()); - //double Perror = computeL2Error(Pcoarse->P0, Pbegin->P, it+1, Ippl::Comm->rank()); - //msg << "Finished iteration: " << it+1 - //<< " Rerror: " << Rerror - //<< " Perror: " << Perror - //<< endl; - // break; - //} - //Difference = Fine - Coarse Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; @@ -580,20 +575,25 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(timeCommunication); tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if(Ippl::Comm->rank() > 0) { + if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); - } - else { - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, + Ippl::getComm(), MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); } IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); @@ -605,6 +605,22 @@ int main(int argc, char *argv[]){ Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; + IpplTimings::startTimer(computeErrors); + double localRerror, localPerror; + double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + //double EfieldError = 0; + //if(it > 0) { + // EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); + //} + IpplTimings::stopTimer(computeErrors); + + if((Rerror <= tol) && (Perror <= tol)) { + isConverged = true; + } + + IpplTimings::startTimer(timeCommunication); if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { size_type bufSize = Pend->packedSize(nloc); @@ -613,53 +629,33 @@ int main(int argc, char *argv[]){ Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); } IpplTimings::stopTimer(timeCommunication); - //Pcoarse->EfieldPICprevIter_m = Pcoarse->EfieldPICprevIter_m - Pcoarse->EfieldPIC_m; - //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPICprevIter_m, Pcoarse->EfieldPICprevIter_m); - //double absFieldError = std::sqrt(Pcoarse->rhoPIC_m.sum()); - //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPIC_m, Pcoarse->EfieldPIC_m); - //double EfieldNorm = std::sqrt(Pcoarse->rhoPIC_m.sum()); - //double EfieldError = absFieldError / EfieldNorm; - - IpplTimings::startTimer(computeErrors); - double localRerror, localPerror; - double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - //double Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - //double Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - - double EfieldError = 0; - if(it > 0) { - EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); - } - IpplTimings::stopTimer(computeErrors); - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); - IpplTimings::stopTimer(deepCopy); msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror - << " Efield error: " << EfieldError + //<< " Efield error: " << EfieldError //<< " Rhofield error: " << EfieldError << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writeError(Rerror, Perror, it+1); + //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(localRerror, localPerror, it+1); IpplTimings::stopTimer(dumpData); - //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - if((Rerror <= tol) && (Perror <= tol)) { - //if(Perror <= tol) { + if(isConverged && isPreviousDomainConverged) { + //maxIterRank = it+1; break; } + } - msg << "LandauDamping Parareal: End." << endl; + //std::cout << "Rank " << Ippl::Comm->rank() << " is out of the loop in iteration: " << maxIterRank << std::endl; + Ippl::Comm->barrier(); + msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index bf9ab6586..fa8ca441f 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -214,15 +214,16 @@ double computeLinfError(ParticleAttrib& Q, ParticleAttrib& Q //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - double globaltemp = 0.0; - MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + //double globaltemp = 0.0; + //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - double absError = globaltemp; + //double absError = globaltemp; - globaltemp = 0.0; - MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + //globaltemp = 0.0; + //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - double relError = absError / globaltemp; + //double relError = absError / globaltemp; + double relError = lError; return relError; @@ -317,7 +318,7 @@ const char* TestName = "PenningTrap"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); - Inform msg(TestName); + Inform msg(TestName, Ippl::Comm->size()-1); Inform msg2all(TestName,INFORM_ALL_NODES); ippl::Vector nmPIF = { @@ -560,11 +561,13 @@ int main(int argc, char *argv[]){ msg << "Starting parareal iterations ..." << endl; bool isConverged = false; - //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); - //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, - // dtFine, isConverged, tStartMySlice, 0); - //Ippl::Comm->barrier(); + bool isPreviousDomainConverged; + if(Ippl::Comm->rank() == 0) { + isPreviousDomainConverged = true; + } + else { + isPreviousDomainConverged = false; + } for (unsigned int it=0; itLeapFrogPIF(Pcoarse->R0, Pcoarse->P0, (Ippl::Comm->rank()+1)*ntFine, - // dtFine, isConverged, tStartMySlice, it+1); - //Ippl::Comm->barrier(); - //double Rerror = computeL2Error(Pcoarse->R0, Pbegin->R, it+1, Ippl::Comm->rank()); - //double Perror = computeL2Error(Pcoarse->P0, Pbegin->P, it+1, Ippl::Comm->rank()); - //msg << "Finished iteration: " << it+1 - //<< " Rerror: " << Rerror - //<< " Perror: " << Perror - //<< endl; - // break; - //} - //Difference = Fine - Coarse Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; @@ -599,20 +587,25 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(timeCommunication); tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(Ippl::Comm->rank() > 0) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); - } - else { - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, + Ippl::getComm(), MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); } IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); @@ -624,6 +617,18 @@ int main(int argc, char *argv[]){ Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; + IpplTimings::startTimer(computeErrors); + double localRerror, localPerror; + double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + IpplTimings::stopTimer(computeErrors); + + if((Rerror <= tol) && (Perror <= tol)) { + isConverged = true; + } + + IpplTimings::startTimer(timeCommunication); if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { size_type bufSize = Pend->packedSize(nloc); @@ -632,53 +637,28 @@ int main(int argc, char *argv[]){ Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); } IpplTimings::stopTimer(timeCommunication); - //Pcoarse->EfieldPICprevIter_m = Pcoarse->EfieldPICprevIter_m - Pcoarse->EfieldPIC_m; - //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPICprevIter_m, Pcoarse->EfieldPICprevIter_m); - //double absFieldError = std::sqrt(Pcoarse->rhoPIC_m.sum()); - //Pcoarse->rhoPIC_m = dot(Pcoarse->EfieldPIC_m, Pcoarse->EfieldPIC_m); - //double EfieldNorm = std::sqrt(Pcoarse->rhoPIC_m.sum()); - //double EfieldError = absFieldError / EfieldNorm; - - IpplTimings::startTimer(computeErrors); - double localRerror, localPerror; - double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - //double Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - //double Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - - double EfieldError = 0; - if(it > 0) { - EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); - } - IpplTimings::stopTimer(computeErrors); - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->rhoPIFprevIter_m.getView(), Pcoarse->rhoPIF_m.getView()); - IpplTimings::stopTimer(deepCopy); msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror - << " Efield error: " << EfieldError - //<< " Rhofield error: " << EfieldError << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writeError(Rerror, Perror, it+1); + //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(localRerror, localPerror, it+1); IpplTimings::stopTimer(dumpData); - //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - if((Rerror <= tol) && (Perror <= tol)) { - //if(Perror <= tol) { + if(isConverged && isPreviousDomainConverged) { break; } } - msg << "Penning trap Parareal: End." << endl; + Ippl::Comm->barrier(); + msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); diff --git a/src/Ippl.cpp b/src/Ippl.cpp index 420af917a..26b5b30a6 100644 --- a/src/Ippl.cpp +++ b/src/Ippl.cpp @@ -98,7 +98,7 @@ Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) if (infoLevel > 0 && Comm->myNode() == 0) { for (auto& l : notparsed) { - std::cout << "Warning: Option '" << l << "' is not parsed by Ippl." << std::endl; + std::cout << "Option '" << l << "' is not parsed by Ippl. Make sure your application parses it." << std::endl; } } From 17bf673dc32c5629d9ed8b6c696ca3631303a8cb Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 6 Jan 2023 16:44:10 +0100 Subject: [PATCH 037/117] If conditions removed and performance improved a little bit --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 107 ++++++++++++++---- alpine/PinT/ChargedParticlesPinT.hpp | 1 + src/Particle/ParticleAttrib.hpp | 67 +++++++++-- src/Solver/FFTPeriodicPoissonSolver.hpp | 20 ++-- 4 files changed, 150 insertions(+), 45 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 602964ab6..3429969a1 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -130,33 +130,96 @@ class ChargedParticlesPIF : public ippl::ParticleBase { } - void dumpLandau(size_type totalP) { + void dumpLandau(size_type /*totalP*/) { - auto Eview = E.getView(); + + double fieldEnergy = 0.0; + double ExAmp = 0.0; - double fieldEnergy, ExAmp; - double temp = 0.0; + auto rhoview = rho_m.getView(); + const int nghost = rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rho_m.getLayout(); + const Mesh_t& mesh = rho_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; - Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = Eview(i)[0] * Eview(i)[0]; - valL += myVal; - }, Kokkos::Sum(temp)); + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } - double globaltemp = 0.0; - MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ex energy and Max", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& tlSum, + double& tlMax) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + if(Dr != 0.0) { + Ek = -(imag * kVec[0] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); + } + double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + + tlSum += myVal; + + double myValMax = std::sqrt(myVal); + + if(myValMax > tlMax) tlMax = myValMax; + + }, Kokkos::Sum(fieldEnergy), Kokkos::Max(ExAmp)); + + + Kokkos::fence(); double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - fieldEnergy = globaltemp * volume / totalP ; - - double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), - KOKKOS_LAMBDA(const size_t i, double& valL) - { - double myVal = std::fabs(Eview(i)[0]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); - ExAmp = 0.0; - MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + fieldEnergy *= volume; + + //auto Eview = E.getView(); + + //double fieldEnergy, ExAmp; + //double temp = 0.0; + + //Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = Eview(i)[0] * Eview(i)[0]; + // valL += myVal; + // }, Kokkos::Sum(temp)); + + //double globaltemp = 0.0; + //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //fieldEnergy = globaltemp * volume / totalP ; + + //double tempMax = 0.0; + //Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), + // KOKKOS_LAMBDA(const size_t i, double& valL) + // { + // double myVal = std::fabs(Eview(i)[0]); + // if(myVal > valL) valL = myVal; + // }, Kokkos::Max(tempMax)); + //ExAmp = 0.0; + //MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); if (Ippl::Comm->rank() == 0) { diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index c1bd3c611..7fad12b69 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -791,6 +791,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Ptemp = Ptemp - 0.5 * dt * E; //drift + Rtemp = Rtemp + dt * Ptemp; //Apply particle BC diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 3c0d9e183..726db2b09 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -254,23 +254,41 @@ namespace ippl { const int i = flatIndex2D % N[0]; const int j = (int)(flatIndex2D / N[0]); + //const int i = (int)(flatIndex / (N[0] * N[1])); + //const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + //const int k = flatIndex2D % N[0]; + //const int j = (int)(flatIndex2D / N[0]); + FT reducedValue = 0.0; + Vector iVec = {i, j, k}; + vector_type kVec; + double Sk = 1.0; //Fourier transform of the shape function + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //double kh = kVec[d] * dx[d]; + ////Fourier transform of CIC + //if(kh != 0.0) { + // Sk *= std::pow(Kokkos::Experimental::sin(kh)/kh, 2); + //} + } Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), [=](const size_t idx, FT& innerReduce) { - Vector iVec = {i, j, k}; - vector_type kVec; - double arg=0.0; + //Vector iVec = {i, j, k}; + //vector_type kVec; + double arg = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); //kVec[d] = 2 * pi / Len[d] * iVec[d]; //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); arg += kVec[d]*pp(idx)[d]; } const value_type& val = dview_m(idx); - innerReduce += (Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; + innerReduce += Sk*(Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; + //innerReduce += Sk*(arg - imag*arg)*val; }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { @@ -395,17 +413,27 @@ namespace ippl { const size_t idx = teamMember.league_rank(); value_type reducedValue = 0.0; + //double ExReducedValue = 0.0, EyReducedValue = 0.0; + //double EzReducedValue = 0.0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), [=](const size_t flatIndex, value_type& innerReduce) + //[=](const size_t flatIndex, double& ExReduce, double& EyReduce, double& EzReduce) { const int k = (int)(flatIndex / (N[0] * N[1])); const int flatIndex2D = flatIndex - (k * N[0] * N[1]); const int i = flatIndex2D % N[0]; const int j = (int)(flatIndex2D / N[0]); + + //const int i = (int)(flatIndex / (N[0] * N[1])); + //const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + //const int k = flatIndex2D % N[0]; + //const int j = (int)(flatIndex2D / N[0]); + Vector iVec = {i, j, k}; vector_type kVec; - double Dr = 0.0, arg=0.0; + double Dr = 0.0, arg = 0.0; + double Sk = 1.0; //Fourier transform of shape function for(size_t d = 0; d < Dim; ++d) { bool shift = (iVec[d] > (N[d]/2)); kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); @@ -413,31 +441,46 @@ namespace ippl { //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); Dr += kVec[d] * kVec[d]; arg += kVec[d]*pp(idx)[d]; + //double kh = kVec[d] * dx[d]; + ////Fourier transform of CIC + //if(kh != 0.0) { + // Sk *= std::pow(Kokkos::Experimental::sin(kh)/kh, 2); + //} } FT Ek = 0.0; - value_type Ex; + value_type Ex = 0.0; for(size_t d = 0; d < Dim; ++d) { - if(Dr != 0.0) { - Ek = -(imag * kVec[d] * fview(i+nghost,j+nghost,k+nghost) / Dr); - } + + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[d] * fview(i+nghost,j+nghost,k+nghost) * factor); //Inverse Fourier transform when the lhs is real. Use when //we choose k \in [0 K) instead of from [-K/2+1 K/2] //Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) // - Ek.imag() * Kokkos::Experimental::sin(arg)); - Ek *= (Kokkos::Experimental::cos(arg) + Ek *= Sk * (Kokkos::Experimental::cos(arg) + imag * Kokkos::Experimental::sin(arg)); + //Ek *= Sk * (arg + imag * arg); Ex[d] = Ek.real(); } innerReduce += Ex; + //ExReduce += Ex[0]; + //EyReduce += Ex[1]; + //EzReduce += Ex[2]; }, Kokkos::Sum(reducedValue)); + //}, Kokkos::Sum(ExReducedValue), Kokkos::Sum(EyReducedValue), + //Kokkos::Sum(EzReducedValue)); teamMember.team_barrier(); if(teamMember.team_rank() == 0) { dview_m(idx) = reducedValue; + //dview_m(idx)[0] = ExReducedValue; + //dview_m(idx)[1] = EyReducedValue; + //dview_m(idx)[2] = EzReducedValue; } } diff --git a/src/Solver/FFTPeriodicPoissonSolver.hpp b/src/Solver/FFTPeriodicPoissonSolver.hpp index e6f690942..73d7d2a2c 100644 --- a/src/Solver/FFTPeriodicPoissonSolver.hpp +++ b/src/Solver/FFTPeriodicPoissonSolver.hpp @@ -113,12 +113,11 @@ namespace ippl { double Dr = kVec[0] * kVec[0] + kVec[1] * kVec[1] + kVec[2] * kVec[2]; - - //It would be great if we can remove this conditional - if(Dr != 0.0) - view(i, j, k) *= 1 / Dr; - else - view(i, j, k) = 0.0; + + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + + view(i, j, k) *= factor; }); fft_mp->transform(-1, *this->rhs_mp, fieldComplex_m); @@ -168,11 +167,10 @@ namespace ippl { tempview(i, j, k) = view(i, j, k); - //It would be great if we can remove this conditional - if(Dr != 0.0) - tempview(i, j, k) *= -(imag * kVec[gd] / Dr); - else - tempview(i, j, k) = 0.0; + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + + tempview(i, j, k) *= -(imag * kVec[gd] * factor); }); fft_mp->transform(-1, *this->rhs_mp, tempFieldComplex_m); From 8032aed2b2f21c019c21b47c4e40a6f81861126f Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 9 Jan 2023 08:23:15 +0100 Subject: [PATCH 038/117] Layout lefts and rights specified --- src/Particle/ParticleAttrib.hpp | 27 ++++++++++++++++----------- src/Types/ViewTypes.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 726db2b09..f25022922 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -248,16 +248,19 @@ namespace ippl { team_policy(flatN, Kokkos::AUTO), KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { const size_t flatIndex = teamMember.league_rank(); - + +#ifdef KOKKOS_ENABLE_CUDA const int k = (int)(flatIndex / (N[0] * N[1])); const int flatIndex2D = flatIndex - (k * N[0] * N[1]); const int i = flatIndex2D % N[0]; const int j = (int)(flatIndex2D / N[0]); +#else - //const int i = (int)(flatIndex / (N[0] * N[1])); - //const int flatIndex2D = flatIndex - (i * N[0] * N[1]); - //const int k = flatIndex2D % N[0]; - //const int j = (int)(flatIndex2D / N[0]); + const int i = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + const int k = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#endif FT reducedValue = 0.0; Vector iVec = {i, j, k}; @@ -419,16 +422,18 @@ namespace ippl { [=](const size_t flatIndex, value_type& innerReduce) //[=](const size_t flatIndex, double& ExReduce, double& EyReduce, double& EzReduce) { + +#ifdef KOKKOS_ENABLE_CUDA const int k = (int)(flatIndex / (N[0] * N[1])); const int flatIndex2D = flatIndex - (k * N[0] * N[1]); const int i = flatIndex2D % N[0]; const int j = (int)(flatIndex2D / N[0]); - - - //const int i = (int)(flatIndex / (N[0] * N[1])); - //const int flatIndex2D = flatIndex - (i * N[0] * N[1]); - //const int k = flatIndex2D % N[0]; - //const int j = (int)(flatIndex2D / N[0]); +#else + const int i = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + const int k = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#endif Vector iVec = {i, j, k}; vector_type kVec; diff --git a/src/Types/ViewTypes.h b/src/Types/ViewTypes.h index 179cc4056..7cfc4238d 100644 --- a/src/Types/ViewTypes.h +++ b/src/Types/ViewTypes.h @@ -54,7 +54,7 @@ namespace ippl { }; /*! - * Specialized view type for thee dimensions. + * Specialized view type for three dimensions. */ template struct ViewType { From ab9b6b99de0ac44fa32285ed5849a7e32efcfd9d Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 9 Jan 2023 16:10:56 +0100 Subject: [PATCH 039/117] Some performance tests made and good configuration for delta shape functions found --- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 2 +- src/Particle/ParticleAttrib.hpp | 27 +++----------------- src/Utility/IpplTimings.cpp | 6 ++--- 3 files changed, 8 insertions(+), 27 deletions(-) diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 93e9e7796..f90cb56fb 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -220,7 +220,7 @@ int main(int argc, char *argv[]){ //const RegionLayout_t& RLayout = PL.getRegionLayout(); //const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t minU, maxU; - int myRank = Ippl::Comm->rank(); + //int myRank = Ippl::Comm->rank(); for (unsigned d = 0; d ::member_type member_type; - using view_type_temp = typename detail::ViewType::view_type; + //using view_type_temp = typename detail::ViewType::view_type; - view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); + //view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); double pi = std::acos(-1.0); Kokkos::complex imag = {0.0, 1.0}; @@ -278,20 +278,13 @@ namespace ippl { Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), [=](const size_t idx, FT& innerReduce) { - //Vector iVec = {i, j, k}; - //vector_type kVec; double arg = 0.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - //kVec[d] = 2 * pi / Len[d] * iVec[d]; - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); arg += kVec[d]*pp(idx)[d]; } const value_type& val = dview_m(idx); innerReduce += Sk*(Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; - //innerReduce += Sk*(arg - imag*arg)*val; }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { @@ -304,7 +297,6 @@ namespace ippl { IpplTimings::stopTimer(scatterPIFTimer); - //Kokkos::deep_copy(fview, viewLocal); //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); //IpplTimings::startTimer(scatterAllReduceTimer); //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); @@ -416,11 +408,8 @@ namespace ippl { const size_t idx = teamMember.league_rank(); value_type reducedValue = 0.0; - //double ExReducedValue = 0.0, EyReducedValue = 0.0; - //double EzReducedValue = 0.0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), [=](const size_t flatIndex, value_type& innerReduce) - //[=](const size_t flatIndex, double& ExReduce, double& EyReduce, double& EzReduce) { #ifdef KOKKOS_ENABLE_CUDA @@ -455,11 +444,12 @@ namespace ippl { FT Ek = 0.0; value_type Ex = 0.0; + auto rho = fview(i+nghost,j+nghost,k+nghost); for(size_t d = 0; d < Dim; ++d) { bool isNotZero = (Dr != 0.0); double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); - Ek = -(imag * kVec[d] * fview(i+nghost,j+nghost,k+nghost) * factor); + Ek = -(imag * kVec[d] * rho * factor); //Inverse Fourier transform when the lhs is real. Use when //we choose k \in [0 K) instead of from [-K/2+1 K/2] @@ -467,25 +457,16 @@ namespace ippl { // - Ek.imag() * Kokkos::Experimental::sin(arg)); Ek *= Sk * (Kokkos::Experimental::cos(arg) + imag * Kokkos::Experimental::sin(arg)); - //Ek *= Sk * (arg + imag * arg); Ex[d] = Ek.real(); } innerReduce += Ex; - //ExReduce += Ex[0]; - //EyReduce += Ex[1]; - //EzReduce += Ex[2]; }, Kokkos::Sum(reducedValue)); - //}, Kokkos::Sum(ExReducedValue), Kokkos::Sum(EyReducedValue), - //Kokkos::Sum(EzReducedValue)); teamMember.team_barrier(); if(teamMember.team_rank() == 0) { dview_m(idx) = reducedValue; - //dview_m(idx)[0] = ExReducedValue; - //dview_m(idx)[1] = EyReducedValue; - //dview_m(idx)[2] = EzReducedValue; } } diff --git a/src/Utility/IpplTimings.cpp b/src/Utility/IpplTimings.cpp index 7cc1079c4..c52c53b51 100644 --- a/src/Utility/IpplTimings.cpp +++ b/src/Utility/IpplTimings.cpp @@ -115,7 +115,7 @@ void Timing::print() { msg << level1 << "---------------------------------------------"; msg << "\n"; - msg << " Timing results for " << Ippl::Comm->getNodes() << " nodes:" << "\n"; + msg << " Timing results for " << Ippl::Comm->getNodes() << " ranks:" << "\n"; msg << "---------------------------------------------"; msg << "\n"; @@ -178,7 +178,7 @@ void Timing::print(const std::string &fn, const std::mapR.getView(); + auto Pview = P->P.getView(); + auto Eview = P->E.getView(); + double V0 = 30*rmax[2]; + Kokkos::parallel_for("Kick1", P->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(Rview(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eview(j)[0] += Eext_x; + Eview(j)[1] += Eext_y; + Eview(j)[2] += Eext_z; + + Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eview(j)[2]; + }); + IpplTimings::stopTimer(PTimer); + + //drift + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + IpplTimings::stopTimer(RTimer); + + //Apply particle BC + IpplTimings::startTimer(BCTimer); + PL.applyBC(P->R, PL.getRegionLayout().getDomain()); + IpplTimings::stopTimer(BCTimer); + + //scatter the charge onto the underlying grid + P->scatter(); + + // Solve for and gather E field + P->gather(); + + //kick + IpplTimings::startTimer(PTimer); + auto R2view = P->R.getView(); + auto P2view = P->P.getView(); + auto E2view = P->E.getView(); + Kokkos::parallel_for("Kick2", P->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + E2view(j)[0] += Eext_x; + E2view(j)[1] += Eext_y; + E2view(j)[2] += Eext_z; + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] + + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] + - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); + P2view(j)[2] += alpha * E2view(j)[2]; + }); + IpplTimings::stopTimer(PTimer); + + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + P->dumpEnergy(); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + } + + msg << TestName << " End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + return 0; +} From 06ca5e6a1e372626cdaf55e8e9505c35ea4774f3 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 16 Jan 2023 10:02:23 +0100 Subject: [PATCH 045/117] bug fixed for BumponTail and PenningTrap --- .../ElectrostaticPIF/BumponTailInstabilityPIF.cpp | 3 ++- alpine/PinT/BumponTailInstabilityPinT.cpp | 2 ++ alpine/PinT/ChargedParticlesPinT.hpp | 15 ++++++++++----- alpine/PinT/LandauDampingPinT.cpp | 2 +- alpine/PinT/PenningTrapPinT.cpp | 11 ++++------- src/Particle/ParticleAttrib.hpp | 12 +++++++----- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp index 53537509d..2ac5b18f4 100644 --- a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -194,6 +194,8 @@ int main(int argc, char *argv[]){ } // create mesh and layout objects for this problem domain + Vector_t kw; + double sigma, muBulk, muBeam, epsilon, delta; if(std::strcmp(TestName,"TwoStreamInstabilityPIF") == 0) { // Parameters for two stream instability as in @@ -270,7 +272,6 @@ int main(int argc, char *argv[]){ size_type nlocBulk = (size_type)(factorConf * factorVelBulk * totalP); size_type nlocBeam = (size_type)(factorConf * factorVelBeam * totalP); size_type nloc = nlocBulk + nlocBeam; - size_type nloc = (size_type)(factor * totalP); size_type Total_particles = 0; MPI_Allreduce(&nloc, &Total_particles, 1, diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 55eb7127d..235df8b14 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -357,6 +357,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); IpplTimings::startTimer(mainTimer); @@ -455,6 +456,7 @@ int main(int argc, char *argv[]){ Pend = std::make_unique(PL); Pcoarse->nr_m = nrPIC; + Pcoarse->nm_m = nmPIF; Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index a0c88178e..52c19f614 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -596,9 +596,11 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void initializeShapeFunctionPIF() { + Inform m("initializeShape"); using mdrange_type = Kokkos::MDRangePolicy>; auto Skview = Sk_m.getView(); auto N = nm_m; + const int nghost = Sk_m.getNghost(); const Mesh_t& mesh = rhoPIF_m.get_mesh(); const Vector_t& dx = mesh.getMeshSpacing(); const Vector_t& Len = rmax_m - rmin_m; @@ -635,14 +637,17 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //Fourier transform of CIC Sk *= std::pow(arg, order); } - Skview(i, j, k) = Sk; + Skview(i+nghost, j+nghost, k+nghost) = Sk; }); + } else { throw IpplException("initializeShapeFunctionPIF", "Unrecognized shape function type"); } + double Sknorm = norm(Sk_m); + m << "Sknorm in initialize: " << Sknorm << endl; } @@ -836,8 +841,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); - dumpLandau(iter); - //dumpBumponTail(iter); + //dumpLandau(iter); + dumpBumponTail(iter); dumpEnergy(this->getLocalNum(), iter, Ptemp); IpplTimings::stopTimer(dumpData); } @@ -870,8 +875,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; IpplTimings::startTimer(dumpData); - dumpLandau(iter); - //dumpBumponTail(iter); + //dumpLandau(iter); + dumpBumponTail(iter); dumpEnergy(this->getLocalNum(), iter, Ptemp); IpplTimings::stopTimer(dumpData); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 4c456eaa5..b23198bd8 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -502,7 +502,7 @@ int main(int argc, char *argv[]){ #endif - Pcoarse->q = Pcoarse->Q_m/totalP; + Pcoarse->q = Pcoarse->Q_m/nloc; IpplTimings::stopTimer(particleCreation); msg << "particles created and initial conditions assigned " << endl; diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 3a59dd439..9629dd0ab 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -316,7 +316,7 @@ double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { } -const char* TestName = "PenningTrap"; +const char* TestName = "PenningTrapPinT"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); @@ -344,6 +344,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); IpplTimings::startTimer(mainTimer); @@ -357,11 +358,6 @@ int main(int argc, char *argv[]){ const double tol = std::atof(argv[11]); const unsigned int maxIter = std::atoi(argv[12]); - msg << "dtSlice: " << dtSlice - << "dtSlice/dtFine: " << dtSlice / dtFine - << "(int)dtSlice/dtFine: " << (unsigned int)(dtSlice / dtFine) - << endl; - const double tStartMySlice = Ippl::Comm->rank() * dtSlice; //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; @@ -426,6 +422,7 @@ int main(int argc, char *argv[]){ Pend = std::make_unique(PL); Pcoarse->nr_m = nrPIC; + Pcoarse->nm_m = nmPIF; Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); @@ -598,7 +595,7 @@ int main(int argc, char *argv[]){ tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if(Ippl::Comm->rank() > 0) { + if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 687505bc1..156368dad 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -207,7 +207,6 @@ namespace ippl { const ParticleAttrib< Vector, Properties... >& pp) const { - //Inform msg("scatterPIF"); static IpplTimings::TimerRef scatterPIFTimer = IpplTimings::getTimer("ScatterPIF"); IpplTimings::startTimer(scatterPIFTimer); @@ -225,6 +224,7 @@ namespace ippl { vector_type Len; Vector N; + for (unsigned d=0; d < Dim; ++d) { N[d] = domain[d].length(); Len[d] = dx[d] * N[d]; @@ -270,7 +270,7 @@ namespace ippl { bool shift = (iVec[d] > (N[d]/2)); kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); } - auto Sk = Skview(i, j, k); + auto Sk = Skview(i+nghost, j+nghost, k+nghost); Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), [=](const size_t idx, FT& innerReduce) { @@ -280,7 +280,8 @@ namespace ippl { } const value_type& val = dview_m(idx); - innerReduce += Sk*(Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; + innerReduce += Sk * (Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; + //innerReduce += (Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { @@ -367,7 +368,6 @@ namespace ippl { const ParticleAttrib< Vector, Properties... >& pp) const { - //Inform msg("gatherPIF"); static IpplTimings::TimerRef gatherPIFTimer = IpplTimings::getTimer("GatherPIF"); IpplTimings::startTimer(gatherPIFTimer); @@ -389,6 +389,8 @@ namespace ippl { Len[d] = dx[d] * N[d]; } + + typedef Kokkos::TeamPolicy<> team_policy; typedef Kokkos::TeamPolicy<>::member_type member_type; @@ -437,7 +439,7 @@ namespace ippl { FT Ek = 0.0; value_type Ex = 0.0; auto rho = fview(i+nghost,j+nghost,k+nghost); - auto Sk = Skview(i,j,k); + auto Sk = Skview(i+nghost,j+nghost,k+nghost); for(size_t d = 0; d < Dim; ++d) { bool isNotZero = (Dr != 0.0); From a3151c653f4f5c093a69a2f386ae13b412e1b901 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 16 Jan 2023 10:17:11 +0100 Subject: [PATCH 046/117] few tweaks --- alpine/PinT/ChargedParticlesPinT.hpp | 3 --- src/Particle/ParticleAttrib.hpp | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 52c19f614..31976918f 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -596,7 +596,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void initializeShapeFunctionPIF() { - Inform m("initializeShape"); using mdrange_type = Kokkos::MDRangePolicy>; auto Skview = Sk_m.getView(); auto N = nm_m; @@ -646,8 +645,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { throw IpplException("initializeShapeFunctionPIF", "Unrecognized shape function type"); } - double Sknorm = norm(Sk_m); - m << "Sknorm in initialize: " << Sknorm << endl; } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 156368dad..452d0f923 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -280,8 +280,8 @@ namespace ippl { } const value_type& val = dview_m(idx); - innerReduce += Sk * (Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; - //innerReduce += (Kokkos::Experimental::cos(arg) - imag*Kokkos::Experimental::sin(arg))*val; + innerReduce += Sk * (Kokkos::Experimental::cos(arg) + - imag * Kokkos::Experimental::sin(arg)) * val; }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { From 62ca48ce4836c07c1446290fba7cabdbc7204746 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 20 Jan 2023 23:09:08 +0100 Subject: [PATCH 047/117] Position error calculation changed --- alpine/PinT/BumponTailInstabilityPinT.cpp | 125 ++++++++++++++++++---- alpine/PinT/ChargedParticlesPinT.hpp | 29 +++++ alpine/PinT/PenningTrapPinT.cpp | 3 + 3 files changed, 139 insertions(+), 18 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 235df8b14..af00d033c 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -180,10 +180,10 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - double globaltemp = 0.0; - MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //double globaltemp = 0.0; + //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - double absError = std::sqrt(globaltemp); + //double absError = std::sqrt(globaltemp); //temp = 0.0; //Kokkos::parallel_reduce("Q norm", Q.size(), @@ -193,10 +193,10 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr // }, Kokkos::Sum(temp)); - globaltemp = 0.0; - MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //globaltemp = 0.0; + //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - double relError = absError / std::sqrt(globaltemp); + double relError = lError;//absError / std::sqrt(globaltemp); return relError; @@ -244,6 +244,74 @@ double computeLinfError(ParticleAttrib& Q, ParticleAttrib& Q } +double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + + myValError = std::sqrt(myValError); + + bool isIncluded = (myValError < 10.0); + + myValError *= isIncluded; + + if(myValError > valLError) valLError = myValError; + + double myValnorm = dot(Qview(i), Qview(i)).apply(); + myValnorm = std::sqrt(myValnorm); + + myValnorm *= isIncluded; + + if(myValnorm > valLnorm) valLnorm = myValnorm; + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); + + Kokkos::fence(); + lError = localError/localNorm; + + double relError = lError; + + return relError; + +} + +double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + if(myValError > valLError) valLError = myValError; + + double myValnorm = dot(Qview(i), Qview(i)).apply(); + myValnorm = std::sqrt(myValnorm); + + if(myValnorm > valLnorm) valLnorm = myValnorm; + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); + + Kokkos::fence(); + lError = localError/localNorm; + + double relError = lError; + + return relError; + +} double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { @@ -429,15 +497,16 @@ int main(int argc, char *argv[]){ delta = 0.01; } Vector_t rmin(0.0); - Vector_t rmax = 2 * pi / kw ; - double dxPIC = rmax[0] / nrPIC[0]; - double dyPIC = rmax[1] / nrPIC[1]; - double dzPIC = rmax[2] / nrPIC[2]; + Vector_t rmax = (2 * pi / kw); + Vector_t length = rmax - rmin; + double dxPIC = length[0] / nrPIC[0]; + double dyPIC = length[1] / nrPIC[1]; + double dzPIC = length[2] / nrPIC[2]; - double dxPIF = rmax[0] / nmPIF[0]; - double dyPIF = rmax[1] / nmPIF[1]; - double dzPIF = rmax[2] / nmPIF[2]; + double dxPIF = length[0] / nmPIF[0]; + double dyPIF = length[1] / nmPIF[1]; + double dzPIF = length[2] / nmPIF[2]; Vector_t hrPIC = {dxPIC, dyPIC, dzPIC}; Vector_t hrPIF = {dxPIF, dyPIF, dzPIF}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; @@ -450,7 +519,7 @@ int main(int argc, char *argv[]){ PLayout_t PL(FLPIC, meshPIC); //Q = -\int\int f dx dv - double Q = -rmax[0] * rmax[1] * rmax[2]; + double Q = -length[0] * length[1] * length[2]; Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -476,6 +545,8 @@ int main(int argc, char *argv[]){ maxU[d] = CDF(rmax[d], delta, kw[d], d); } + minU = minU; + maxU = maxU; double factorVelBulk = 1.0 - epsilon; double factorVelBeam = 1.0 - factorVelBulk; size_type nlocBulk = (size_type)(factorVelBulk * totalP); @@ -536,6 +607,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); #endif + Pcoarse->dumpParticleData(0, Pcoarse->R, Pcoarse->P, "Parareal"); msg << "Parareal " << TestName @@ -614,7 +686,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeShapeFunctionPIF); Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - + for (unsigned int it=0; itR = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); + //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); @@ -664,17 +739,28 @@ int main(int argc, char *argv[]){ Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; - IpplTimings::startTimer(computeErrors); + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); + + + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); double localRerror, localPerror; - double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + //if(it > 0) { + IpplTimings::startTimer(computeErrors); + double Rerror = computeRLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computePLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + //Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + //Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); IpplTimings::stopTimer(computeErrors); + //} if((Rerror <= tol) && (Perror <= tol)) { isConverged = true; } + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pend->R.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pend->P.getView()); IpplTimings::startTimer(timeCommunication); if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { @@ -697,6 +783,9 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(localRerror, localPerror, it+1); + //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { + Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); + //} IpplTimings::stopTimer(dumpData); if(isConverged && isPreviousDomainConverged) { diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 31976918f..f9474643d 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -504,6 +504,35 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } + + void dumpParticleData(const unsigned int& iter, ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const char* fname) { + + typename ParticleAttrib::HostMirror R_host = Rtemp.getHostMirror(); + typename ParticleAttrib::HostMirror P_host = Ptemp.getHostMirror(); + Kokkos::deep_copy(R_host, Rtemp.getView()); + Kokkos::deep_copy(P_host, Ptemp.getView()); + std::stringstream pname; + pname << "data/"; + pname << fname; + pname << "_rank_"; + pname << Ippl::Comm->rank(); + pname << "_iter_"; + pname << iter; + pname << ".csv"; + Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout.precision(10); + pcsvout.setf(std::ios::scientific, std::ios::floatfield); + pcsvout << "R_x, R_y, R_z, V_x, V_y, V_z" << endl; + for (size_type i = 0; i< this->getLocalNum(); i++) { + pcsvout << R_host(i)[0] << " " + << R_host(i)[1] << " " + << R_host(i)[2] << " " + << P_host(i)[0] << " " + << P_host(i)[1] << " " + << P_host(i)[2] << endl; + } + } + void writelocalError(double Rerror, double Perror, unsigned int iter) { std::stringstream fname; diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 9629dd0ab..3e6d0a879 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -656,6 +656,9 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(localRerror, localPerror, it+1); + if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { + Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P); + } IpplTimings::stopTimer(dumpData); if(isConverged && isPreviousDomainConverged) { From 3f1e8703b28070ef1d8732302cf72e1aa6b371e6 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 20 Jan 2023 23:12:58 +0100 Subject: [PATCH 048/117] unnecesary thing removed --- alpine/PinT/BumponTailInstabilityPinT.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index af00d033c..5b086c3f8 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -545,8 +545,6 @@ int main(int argc, char *argv[]){ maxU[d] = CDF(rmax[d], delta, kw[d], d); } - minU = minU; - maxU = maxU; double factorVelBulk = 1.0 - epsilon; double factorVelBeam = 1.0 - factorVelBulk; size_type nlocBulk = (size_type)(factorVelBulk * totalP); From a838a0caefecd8536c5c153c5bd4432fd5465c29 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 21 Jan 2023 09:08:46 +0100 Subject: [PATCH 049/117] modified stopping added to LandauDamping and BumponTail --- alpine/PinT/BumponTailInstabilityPinT.cpp | 63 +++++---------------- alpine/PinT/ChargedParticlesPinT.hpp | 8 +-- alpine/PinT/LandauDampingPinT.cpp | 67 +++++++++++++++++------ alpine/PinT/PenningTrapPinT.cpp | 1 + 4 files changed, 69 insertions(+), 70 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 5b086c3f8..63d978b92 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -202,58 +202,20 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr } -double computeLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { - - auto Qview = Q.getView(); - auto QprevIterView = QprevIter.getView(); - double localError = 0.0; - double localNorm = 0.0; - - Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ - Vector_t diff = Qview(i) - QprevIterView(i); - double myValError = dot(diff, diff).apply(); - myValError = std::sqrt(myValError); - - if(myValError > valLError) valLError = myValError; - - double myValnorm = dot(Qview(i), Qview(i)).apply(); - myValnorm = std::sqrt(myValnorm); - - if(myValnorm > valLnorm) valLnorm = myValnorm; - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - - Kokkos::fence(); - lError = localError/localNorm; - //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - - - //double globaltemp = 0.0; - //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - - //double absError = globaltemp; - - //globaltemp = 0.0; - //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); - - //double relError = absError / globaltemp; - double relError = lError; - - return relError; - -} double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, + unsigned int& notIncluded) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); double localError = 0.0; double localNorm = 0.0; + notIncluded = 0; Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm, + unsigned int& excluded){ Vector_t diff = Qview(i) - QprevIterView(i); double myValError = dot(diff, diff).apply(); @@ -261,6 +223,7 @@ double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& bool isIncluded = (myValError < 10.0); + myValError *= isIncluded; if(myValError > valLError) valLError = myValError; @@ -271,7 +234,10 @@ double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& myValnorm *= isIncluded; if(myValnorm > valLnorm) valLnorm = myValnorm; - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); + + excluded += (!isIncluded); + }, Kokkos::Max(localError), Kokkos::Max(localNorm), + Kokkos::Sum(notIncluded)); Kokkos::fence(); lError = localError/localNorm; @@ -742,13 +708,11 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); double localRerror, localPerror; + unsigned int excludedNp; - //if(it > 0) { IpplTimings::startTimer(computeErrors); - double Rerror = computeRLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Rerror = computeRLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, excludedNp); double Perror = computePLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - //Rerror = computeLinfError(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - //Perror = computeLinfError(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); IpplTimings::stopTimer(computeErrors); //} @@ -757,8 +721,6 @@ int main(int argc, char *argv[]){ isConverged = true; } - //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pend->R.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pend->P.getView()); IpplTimings::startTimer(timeCommunication); if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { @@ -776,6 +738,7 @@ int main(int argc, char *argv[]){ msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror + << " # Excluded: " << excludedNp << endl; IpplTimings::startTimer(dumpData); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index f9474643d..c827669a5 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -683,7 +683,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); PLayout& PL = this->getLayout(); - PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIC_m = 0.0; scatter(q, rhoPIC_m, Rtemp); @@ -744,7 +744,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); PLayout& PL = this->getLayout(); - PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIC_m = 0.0; scatter(q, rhoPIC_m, Rtemp); @@ -853,7 +853,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); - PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; scatterPIF(q, rhoPIF_m, Sk_m, Rtemp); @@ -917,7 +917,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); - PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; scatterPIF(q, rhoPIF_m, Sk_m, Rtemp); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index b23198bd8..a22e325e8 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -187,42 +187,76 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr } -double computeLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, + unsigned int& notIncluded) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); double localError = 0.0; double localNorm = 0.0; + notIncluded = 0; Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm, + unsigned int& excluded){ Vector_t diff = Qview(i) - QprevIterView(i); double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + bool isIncluded = (myValError < 10.0); + + + myValError *= isIncluded; if(myValError > valLError) valLError = myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); myValnorm = std::sqrt(myValnorm); + + myValnorm *= isIncluded; if(myValnorm > valLnorm) valLnorm = myValnorm; - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); + + excluded += (!isIncluded); + }, Kokkos::Max(localError), Kokkos::Max(localNorm), + Kokkos::Sum(notIncluded)); Kokkos::fence(); lError = localError/localNorm; - //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - + + double relError = lError; + + return relError; - //double globaltemp = 0.0; - //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); +} - //double absError = globaltemp; +double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; - //globaltemp = 0.0; - //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + if(myValError > valLError) valLError = myValError; + + double myValnorm = dot(Qview(i), Qview(i)).apply(); + myValnorm = std::sqrt(myValnorm); + + if(myValnorm > valLnorm) valLnorm = myValnorm; + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - //double relError = absError / globaltemp; + Kokkos::fence(); + lError = localError/localNorm; + double relError = lError; return relError; @@ -617,10 +651,12 @@ int main(int argc, char *argv[]){ Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); double localRerror, localPerror; - double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + unsigned int excludedNp; + double Rerror = computeRLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, excludedNp); + double Perror = computePLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); //double EfieldError = 0; //if(it > 0) { @@ -649,8 +685,7 @@ int main(int argc, char *argv[]){ msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror - //<< " Efield error: " << EfieldError - //<< " Rhofield error: " << EfieldError + << " # Excluded: " << excludedNp << endl; IpplTimings::startTimer(dumpData); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 3e6d0a879..c7b2f13ba 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -623,6 +623,7 @@ int main(int argc, char *argv[]){ Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); double localRerror, localPerror; double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); From b8cd2d9b4c567e2c3f0e094777afa929956d5005 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 21 Jan 2023 12:22:57 +0100 Subject: [PATCH 050/117] bug in PenningTrapPinT fixed --- alpine/PinT/BumponTailInstabilityPinT.cpp | 2 +- alpine/PinT/PenningTrapPinT.cpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 63d978b92..0ee2ee66e 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -745,7 +745,7 @@ int main(int argc, char *argv[]){ //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(localRerror, localPerror, it+1); //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { - Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); + //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); //} IpplTimings::stopTimer(dumpData); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index c7b2f13ba..7bf180a3d 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -657,9 +657,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(localRerror, localPerror, it+1); - if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { - Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P); - } + //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); if(isConverged && isPreviousDomainConverged) { From 77f7b679007c8766a866875e18b147f3561a4fcb Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 21 Jan 2023 19:40:54 +0100 Subject: [PATCH 051/117] Linf errors changed to L2 --- alpine/PinT/BumponTailInstabilityPinT.cpp | 85 ++++++++++++-------- alpine/PinT/LandauDampingPinT.cpp | 98 ++++++++++++++--------- alpine/PinT/PenningTrapPinT.cpp | 24 ++---- 3 files changed, 121 insertions(+), 86 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 0ee2ee66e..389f619d9 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -158,8 +158,9 @@ double CDF(const double& x, const double& delta, const double& k, return cdf; } -double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, + Vector_t& length) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -169,6 +170,15 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr Kokkos::parallel_reduce("Abs. error and norm", Q.size(), KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -10.0); + bool isRight = (diff[d] >= 10.0); + bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + double myValError = dot(diff, diff).apply(); valLError += myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); @@ -177,24 +187,32 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr Kokkos::fence(); lError = std::sqrt(localError)/std::sqrt(localNorm); - //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - - //double globaltemp = 0.0; - //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double relError = lError;//absError / std::sqrt(globaltemp); + + return relError; - //double absError = std::sqrt(globaltemp); +} - //temp = 0.0; - //Kokkos::parallel_reduce("Q norm", Q.size(), - // KOKKOS_LAMBDA(const int i, double& valL){ - // double myVal = dot(Qview(i), Qview(i)).apply(); - // valL += myVal; - // }, Kokkos::Sum(temp)); +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); - //globaltemp = 0.0; - //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + Kokkos::fence(); + lError = std::sqrt(localError)/std::sqrt(localNorm); double relError = lError;//absError / std::sqrt(globaltemp); @@ -202,42 +220,46 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr } - double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - unsigned int& notIncluded) { + Vector_t& length) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); double localError = 0.0; double localNorm = 0.0; - notIncluded = 0; Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm, - unsigned int& excluded){ + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -10.0); + bool isRight = (diff[d] >= 10.0); + bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + double myValError = dot(diff, diff).apply(); myValError = std::sqrt(myValError); - bool isIncluded = (myValError < 10.0); + //bool isIncluded = (myValError < 10.0); - - myValError *= isIncluded; + //myValError *= isIncluded; if(myValError > valLError) valLError = myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); myValnorm = std::sqrt(myValnorm); - myValnorm *= isIncluded; + //myValnorm *= isIncluded; if(myValnorm > valLnorm) valLnorm = myValnorm; - excluded += (!isIncluded); - }, Kokkos::Max(localError), Kokkos::Max(localNorm), - Kokkos::Sum(notIncluded)); + //excluded += (!isIncluded); + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); Kokkos::fence(); lError = localError/localNorm; @@ -248,6 +270,7 @@ double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& } + double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { @@ -571,7 +594,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); #endif - Pcoarse->dumpParticleData(0, Pcoarse->R, Pcoarse->P, "Parareal"); + //Pcoarse->dumpParticleData(0, Pcoarse->R, Pcoarse->P, "Parareal"); msg << "Parareal " << TestName @@ -708,11 +731,10 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); double localRerror, localPerror; - unsigned int excludedNp; IpplTimings::startTimer(computeErrors); - double Rerror = computeRLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, excludedNp); - double Perror = computePLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); IpplTimings::stopTimer(computeErrors); //} @@ -738,7 +760,6 @@ int main(int argc, char *argv[]){ msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror - << " # Excluded: " << excludedNp << endl; IpplTimings::startTimer(dumpData); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index a22e325e8..ea02ff985 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -143,8 +143,9 @@ double CDF(const double& x, const double& alpha, const double& k) { return cdf; } -double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, + Vector_t& length) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -154,6 +155,15 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr Kokkos::parallel_reduce("Abs. error and norm", Q.size(), KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -10.0); + bool isRight = (diff[d] >= 10.0); + bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + double myValError = dot(diff, diff).apply(); valLError += myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); @@ -162,26 +172,34 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr Kokkos::fence(); lError = std::sqrt(localError)/std::sqrt(localNorm); - //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - - double globaltemp = 0.0; - MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double relError = lError;//absError / std::sqrt(globaltemp); + + return relError; - double absError = std::sqrt(globaltemp); +} - //temp = 0.0; - //Kokkos::parallel_reduce("Q norm", Q.size(), - // KOKKOS_LAMBDA(const int i, double& valL){ - // double myVal = dot(Qview(i), Qview(i)).apply(); - // valL += myVal; - // }, Kokkos::Sum(temp)); +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); - globaltemp = 0.0; - MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + Kokkos::fence(); + lError = std::sqrt(localError)/std::sqrt(localNorm); - double relError = absError / std::sqrt(globaltemp); + double relError = lError;//absError / std::sqrt(globaltemp); return relError; @@ -189,39 +207,44 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - unsigned int& notIncluded) { + Vector_t& length) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); double localError = 0.0; double localNorm = 0.0; - notIncluded = 0; Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm, - unsigned int& excluded){ + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -10.0); + bool isRight = (diff[d] >= 10.0); + bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + double myValError = dot(diff, diff).apply(); myValError = std::sqrt(myValError); - bool isIncluded = (myValError < 10.0); - + //bool isIncluded = (myValError < 10.0); - myValError *= isIncluded; + //myValError *= isIncluded; if(myValError > valLError) valLError = myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); myValnorm = std::sqrt(myValnorm); - myValnorm *= isIncluded; + //myValnorm *= isIncluded; if(myValnorm > valLnorm) valLnorm = myValnorm; - excluded += (!isIncluded); - }, Kokkos::Max(localError), Kokkos::Max(localNorm), - Kokkos::Sum(notIncluded)); + //excluded += (!isIncluded); + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); Kokkos::fence(); lError = localError/localNorm; @@ -435,14 +458,15 @@ int main(int argc, char *argv[]){ Vector_t alpha = {0.05, 0.05, 0.05}; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; - double dxPIC = rmax[0] / nrPIC[0]; - double dyPIC = rmax[1] / nrPIC[1]; - double dzPIC = rmax[2] / nrPIC[2]; + Vector_t length = rmax - rmin; + double dxPIC = length[0] / nrPIC[0]; + double dyPIC = length[1] / nrPIC[1]; + double dzPIC = length[2] / nrPIC[2]; - double dxPIF = rmax[0] / nmPIF[0]; - double dyPIF = rmax[1] / nmPIF[1]; - double dzPIF = rmax[2] / nmPIF[2]; + double dxPIF = length[0] / nmPIF[0]; + double dyPIF = length[1] / nmPIF[1]; + double dzPIF = length[2] / nmPIF[2]; Vector_t hrPIC = {dxPIC, dyPIC, dzPIC}; Vector_t hrPIF = {dxPIF, dyPIF, dzPIF}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; @@ -455,7 +479,7 @@ int main(int argc, char *argv[]){ PLayout_t PL(FLPIC, meshPIC); //Q = -\int\int f dx dv - double Q = -rmax[0] * rmax[1] * rmax[2]; + double Q = -length[0] * length[1] * length[2]; Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -654,9 +678,8 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); double localRerror, localPerror; - unsigned int excludedNp; - double Rerror = computeRLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, excludedNp); - double Perror = computePLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); //double EfieldError = 0; //if(it > 0) { @@ -685,7 +708,6 @@ int main(int argc, char *argv[]){ msg << "Finished iteration: " << it+1 << " Rerror: " << Rerror << " Perror: " << Perror - << " # Excluded: " << excludedNp << endl; IpplTimings::startTimer(dumpData); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 7bf180a3d..e4b25794e 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -168,23 +168,15 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - double globaltemp = 0.0; - MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - - double absError = std::sqrt(globaltemp); - - //temp = 0.0; - //Kokkos::parallel_reduce("Q norm", Q.size(), - // KOKKOS_LAMBDA(const int i, double& valL){ - // double myVal = dot(Qview(i), Qview(i)).apply(); - // valL += myVal; - // }, Kokkos::Sum(temp)); + //double globaltemp = 0.0; + //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //double absError = std::sqrt(globaltemp); - globaltemp = 0.0; - MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //globaltemp = 0.0; + //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - double relError = absError / std::sqrt(globaltemp); + double relError = lError;//absError / std::sqrt(globaltemp); return relError; @@ -626,8 +618,8 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); double localRerror, localPerror; - double Rerror = computeLinfError(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - double Perror = computeLinfError(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); + double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); IpplTimings::stopTimer(computeErrors); From 9249627574799edae19bb77ac15104ccb447247e Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 31 Jan 2023 11:19:53 +0100 Subject: [PATCH 052/117] Grid, Pc change test done for all examples --- alpine/ElectrostaticPIC/ChargedParticles.hpp | 2 +- alpine/ElectrostaticPIC/PenningTrap.cpp | 4 +- alpine/PinT/LandauDampingPinT.cpp | 15 ++- alpine/PinT/PenningTrapPinT.cpp | 134 ++++++++++++++----- 4 files changed, 115 insertions(+), 40 deletions(-) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 61730648d..67b8f738f 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -341,7 +341,7 @@ class ChargedParticles : public ippl::ParticleBase { rhoNorm_m = norm(rho_m); IpplTimings::stopTimer(sumTimer); - //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); //rho = rho_e - rho_i rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); diff --git a/alpine/ElectrostaticPIC/PenningTrap.cpp b/alpine/ElectrostaticPIC/PenningTrap.cpp index 9ea440176..669634089 100644 --- a/alpine/ElectrostaticPIC/PenningTrap.cpp +++ b/alpine/ElectrostaticPIC/PenningTrap.cpp @@ -205,8 +205,8 @@ int main(int argc, char *argv[]){ } // create mesh and layout objects for this problem domain - Vector_t rmin(0.0); - Vector_t rmax(20.0); + Vector_t rmin = {0.0, 0.0, 0.0}; + Vector_t rmax = {20.0, 20.0, 20.0}; double dx = rmax[0] / nr[0]; double dy = rmax[1] / nr[1]; double dz = rmax[2] / nr[2]; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index ea02ff985..e80bed086 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -606,11 +606,6 @@ int main(int argc, char *argv[]){ msg << "Starting parareal iterations ..." << endl; - //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); - //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, - // dtFine, isConverged, tStartMySlice, 0); - //Ippl::Comm->barrier(); bool isConverged = false; bool isPreviousDomainConverged; if(Ippl::Comm->rank() == 0) { @@ -625,6 +620,14 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeShapeFunctionPIF); Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); + + + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); + //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); + //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, + // dtFine, isConverged, tStartMySlice, 0); + //Ippl::Comm->barrier(); + //unsigned int maxIterRank; for (unsigned int it=0; itR, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + //double Rerror = computeRL2Error(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + //double Perror = computePL2Error(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); //double EfieldError = 0; //if(it > 0) { // EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index e4b25794e..016351a19 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -145,9 +145,9 @@ double CDF(const double& x, const double& mu, const double& sigma) { return cdf; } - -double computeL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, + Vector_t& length) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -157,6 +157,15 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr Kokkos::parallel_reduce("Abs. error and norm", Q.size(), KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -22.0); + bool isRight = (diff[d] >= 22.0); + bool isInside = ((diff[d] > -22.0) && (diff[d] < 22.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + double myValError = dot(diff, diff).apply(); valLError += myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); @@ -165,16 +174,32 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr Kokkos::fence(); lError = std::sqrt(localError)/std::sqrt(localNorm); - //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; + double relError = lError;//absError / std::sqrt(globaltemp); + + return relError; - //double globaltemp = 0.0; - //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); +} - //double absError = std::sqrt(globaltemp); +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; - //globaltemp = 0.0; - //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + lError = std::sqrt(localError)/std::sqrt(localNorm); double relError = lError;//absError / std::sqrt(globaltemp); @@ -182,8 +207,9 @@ double computeL2Error(ParticleAttrib& Q, ParticleAttrib& Qpr } -double computeLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, + Vector_t& length) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -193,31 +219,69 @@ double computeLinfError(ParticleAttrib& Q, ParticleAttrib& Q Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -22.0); + bool isRight = (diff[d] >= 22.0); + bool isInside = ((diff[d] > -22.0) && (diff[d] < 22.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + //bool isIncluded = (myValError < 10.0); + + //myValError *= isIncluded; if(myValError > valLError) valLError = myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); myValnorm = std::sqrt(myValnorm); + + //myValnorm *= isIncluded; if(myValnorm > valLnorm) valLnorm = myValnorm; + + //excluded += (!isIncluded); }, Kokkos::Max(localError), Kokkos::Max(localNorm)); Kokkos::fence(); lError = localError/localNorm; - //std::cout << "Rank: " << myrank << " Iter: " << iter << " Local. Error: " << lError << std::endl; - + + double relError = lError; + + return relError; - //double globaltemp = 0.0; - //MPI_Allreduce(&localError, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); +} - //double absError = globaltemp; +double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, + const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; - //globaltemp = 0.0; - //MPI_Allreduce(&localNorm, &globaltemp, 1, MPI_DOUBLE, MPI_MAX, Ippl::getComm()); + Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + myValError = std::sqrt(myValError); + + if(myValError > valLError) valLError = myValError; + + double myValnorm = dot(Qview(i), Qview(i)).apply(); + myValnorm = std::sqrt(myValnorm); + + if(myValnorm > valLnorm) valLnorm = myValnorm; + }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - //double relError = absError / globaltemp; + Kokkos::fence(); + lError = localError/localNorm; + double relError = lError; return relError; @@ -376,26 +440,26 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t rmin(0.0); - Vector_t rmax(20.0); - double dxPIC = rmax[0] / nrPIC[0]; - double dyPIC = rmax[1] / nrPIC[1]; - double dzPIC = rmax[2] / nrPIC[2]; - + Vector_t rmax(25.0); Vector_t length = rmax - rmin; + double dxPIC = length[0] / nrPIC[0]; + double dyPIC = length[1] / nrPIC[1]; + double dzPIC = length[2] / nrPIC[2]; + Vector_t mu, sd; for (unsigned d = 0; dR = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); + //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); + + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); @@ -615,11 +683,13 @@ int main(int argc, char *argv[]){ Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); double localRerror, localPerror; - double Rerror = computeL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror); - double Perror = computeL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); IpplTimings::stopTimer(computeErrors); From 55bc603edb5c163f903bb71b1518032d83ae50cf Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 31 Jan 2023 18:56:02 +0100 Subject: [PATCH 053/117] dumpVTK commented --- alpine/ElectrostaticPIC/ChargedParticles.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 67b8f738f..61730648d 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -341,7 +341,7 @@ class ChargedParticles : public ippl::ParticleBase { rhoNorm_m = norm(rho_m); IpplTimings::stopTimer(sumTimer); - dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); //rho = rho_e - rho_i rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); From 31720f9115b24098fc01b0ba7cf52437db8b1061 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 10 Feb 2023 14:07:07 +0100 Subject: [PATCH 054/117] cuFINUFFT interface made. Need to create a test and see if it works --- src/FFT/FFT.h | 77 ++++ src/FFT/FFT.hpp | 1036 ++++++++++++++++++++++++++++------------------- 2 files changed, 687 insertions(+), 426 deletions(-) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 8a13e3b45..703fc9373 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -64,6 +65,10 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; + /** + Tag classes for Non-uniform type of Fourier transforms + */ + class NUFFTransform {}; enum FFTComm { a2av = 0, @@ -110,6 +115,29 @@ namespace ippl { using backendCos = heffte::backend::stock_cos; }; #endif +#endif + +#ifdef KOKKOS_ENABLE_CUDA + template + struct CufinufftType {}; + + template <> + struct Cufinufft { + using makeplan = cufinufftf_makeplan; + using setpts = cufinufftf_setpts; + using transform = cufinufftf_execute; + using destroy = cufinufftf_destroy; + using plan_t = cufinufftf_plan; + }; + + template <> + struct Cufinufft { + using makeplan = cufinufft_makeplan; + using setpts = cufinufft_setpts; + using transform = cufinufft_execute; + using destroy = cufinufft_destroy; + using plan_t = cufinufft_plan; + }; #endif } @@ -296,6 +324,55 @@ namespace ippl { }; + /** + Non-uniform FFT class + */ + template + class FFT { + + public: + + typedef FieldLayout Layout_t; + typedef std::complex Complex_t; + typedef Field ComplexField_t; + + using makeplan = detail::Cufinufft::makeplan; + using setpts = detail::Cufinufft::setpts; + using transform = detail::Cufinufft::transform; + using destroy = detail::Cufinufft::destroy; + using plan_t = detail::Cufinufft::plan_t; + + /** Create a new FFT object with the layout for the input Field, type + * (1 or 2) for the NUFFT and parameters for cuFINUFFT. + */ + FFT(const Layout_t& layout, int type, const ParameterList& params); + + // Destructor + ~FFT(); + + /** Do the NUFFT. + */ + template + void transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, ComplexField_t& f); + + + private: + + /** + setup performs the initialization necessary. + */ + void setup(const std::array& nmodes, + const ParameterList& params); + + plan_t plan_m; + int ier_m; + T tol_m; + int type_m; + + }; + + } #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 853651858..f804413b5 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -57,8 +57,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -88,45 +88,45 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -138,74 +138,74 @@ namespace ippl { int direction, typename FFT::ComplexField_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost).real( - fview(i, j, k).real()); - tempField(i-nghost, j-nghost, k-nghost).imag( - fview(i, j, k).imag()); - }); - - - - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k).real() = - tempField(i-nghost, j-nghost, k-nghost).real(); - fview(i, j, k).imag() = - tempField(i-nghost, j-nghost, k-nghost).imag(); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).real( + fview(i, j, k).real()); + tempField(i-nghost, j-nghost, k-nghost).imag( + fview(i, j, k).imag()); + }); + + + + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).real(); + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).imag(); + }); } @@ -275,46 +275,46 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {lowInput, highInput}; - heffte::box3d outbox = {lowOutput, highOutput}; + heffte::box3d inbox = {lowInput, highInput}; + heffte::box3d outbox = {lowOutput, highOutput}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), - heffteOptions); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), + heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -325,104 +325,104 @@ namespace ippl { typename FFT::RealField_t& f, typename FFT::ComplexField_t& g) { - auto fview = f.getView(); - auto gview = g.getView(); - const int nghostf = f.getNghost(); - const int nghostg = g.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, - fview.extent(1) - 2*nghostf, - fview.extent(2) - 2*nghostf); - - Kokkos::View - tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, - gview.extent(1) - 2*nghostg, - gview.extent(2) - 2*nghostg); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos f field in FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); - }); - Kokkos::parallel_for("copy from Kokkos g field in FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( - gview(i, j, k).real()); - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( - gview(i, j, k).imag()); - }); + auto fview = f.getView(); + auto gview = g.getView(); + const int nghostf = f.getNghost(); + const int nghostg = g.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, + fview.extent(1) - 2*nghostf, + fview.extent(2) - 2*nghostf); + + Kokkos::View + tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, + gview.extent(1) - 2*nghostg, + gview.extent(2) - 2*nghostg); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos f field in FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); + }); + Kokkos::parallel_for("copy from Kokkos g field in FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( + gview(i, j, k).real()); + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( + gview(i, j, k).imag()); + }); - if ( direction == 1 ) - { - heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), - heffte::scale::full ); - } - else if ( direction == -1 ) - { - heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), - heffte::scale::none ); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos f field FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); - }); - - Kokkos::parallel_for("copy to Kokkos g field FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - gview(i, j, k).real() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); - gview(i, j, k).imag() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); - }); + if ( direction == 1 ) + { + heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), + heffte::scale::full ); + } + else if ( direction == -1 ) + { + heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), + heffte::scale::none ); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos f field FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); + }); + + Kokkos::parallel_for("copy to Kokkos g field FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + gview(i, j, k).real() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); + gview(i, j, k).imag() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); + }); } @@ -446,8 +446,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -477,44 +477,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -524,66 +524,66 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); } @@ -607,8 +607,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -638,44 +638,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -686,66 +686,250 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); + + } + + + //========================================================================= + // FFT NUFFTransform Constructors + //========================================================================= + + /** + Create a new FFT object of type NUFFTransform, with a + given layout and cuFINUFFT parameters. + */ + + template + FFT::FFT(const Layout_t& layout, + int type, + const ParameterList& params) + { + + + /** + * cuFINUFFT requires to pass a 3D array even for 2D and + * 1D FFTs we just have to fill in other + * dimensions to be 1. Note this is different from Heffte + * where we fill 0. + */ + + std::array nmodes; + + const NDIndex& lDom = layout.getLocalNDIndex(); + + nmodes.fill(1); + + for(size_t d = 0; d < Dim; ++d) { + nmodes[d] = lDom[d].length();; + } + + type_m = type; + setup(nmodes, params); + } + + + /** + setup performs the initialization necessary. + */ + template + void + FFT::setup(const std::array& nmodes, + const ParameterList& params) + { + + cufinufft_opts opts; + ier = cufinufft_default_opts(type, Dim, &opts); + + if(!params.get("use_cufinufft_defaults")) { + tol = params.get("tolerance"); + opts.gpu_method = params.get("gpu_method"); + opts.gpu_sort = params.get("gpu_sort"); + opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); + } + + int maxbatchsize = 0; //default option. ignored for ntransf = 1 which + // is our case + + int iflag; + + if(type_m == 1) { + iflag = -1; + } + else if(type_m == 2) { + iflag = 1; + } + else { + throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); + } + + ier = makeplan(type_m, Dim, nmodes, iflag, 1, tol, + maxbatchsize, &plan, &opts); + + } + + + + template + template + void + FFT::transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, + typename FFT::ComplexField_t& f) + { + auto fview = f.getView(); + auto Rview = R.getView(); + auto Qview = Q.getView(); + const int nghost = f.getNghost(); + + auto localNp = R.getParticleCount(); + + /** + * cuFINUFFT's layout is left, hence we allocate the temporary + * Kokkos views with the same layout + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + + std::array, 3> tempR; + + tempR.fill(NULL); + + for(size_t d = 0; d < Dim; ++d) { + Kokkos::realloc(tempR[d], localNp); + } + + Kokkos::View*,Kokkos::LayoutLeft> tempQ("tempQ", localNp); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).real( + fview(i, j, k).real()); + tempField(i-nghost, j-nghost, k-nghost).imag( + fview(i, j, k).imag()); + }); + + + Kokkos::parallel_for("copy from particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + for(size_t d = 0; d < Dim; ++d) { + temp[R](i) = Rview(i)[d]; + } + tempQ(i).real(Qview(i)); + tempQ(i).imag(0.0); + }); + + ier = setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + NULL, NULL, NULL, plan); + + ier = transform(tempQ.data(), tempField.data(), plan); + + + if(type_m == 1) { + Kokkos::parallel_for("copy to field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).real(); + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).imag(); + }); + } + else if(type_m == 2) { + Kokkos::parallel_for("copy to particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + Qview(i) = tempQ(i).real(); + }); + } + } + + template + FFT::~FFT() { + + ier = destroy(plan); } } From 64d3df9980d1476d3fc1903260194e84366cf365 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 13 Feb 2023 16:54:07 +0100 Subject: [PATCH 055/117] Interface made and test done but have some compilation issues --- CMakeLists.txt | 7 +++++ src/CMakeLists.txt | 8 +++++- src/FFT/FFT.h | 60 ++++++++++++++++++++++++++--------------- src/FFT/FFT.hpp | 52 ++++++++++++++++++----------------- test/FFT/CMakeLists.txt | 6 +++++ 5 files changed, 87 insertions(+), 46 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 91e072bf8..8f15ec370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,13 @@ if (ENABLE_FFT) message (STATUS "Found Heffte_DIR: ${Heffte_DIR}") endif () +option (ENABLE_NUFFT "Enable NUFFT transform" OFF) +if (ENABLE_NUFFT) + add_definitions (-DENABLE_NUFFT) + find_package(CUFINUFFT REQUIRED) + message (STATUS "Found CUFINUFFT_DIR: ${CUFINUFFT_DIR}") +endif () + option (ENABLE_SOLVERS "Enable IPPL solvers" OFF) add_subdirectory (src) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ad4b1f186..b4c04d6c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,7 +94,13 @@ include_directories ( add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) -target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) + +if (ENABLE_NUFFT) + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}/libcufinufft.a) +else() + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) +endif() + install (TARGETS ippl DESTINATION lib) install (FILES ${IPPL_BASEDIR_HDRS} DESTINATION include) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 703fc9373..16fab61f3 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -37,6 +37,7 @@ #include "FieldLayout/FieldLayout.h" #include "Field/Field.h" +#include "Particle/ParticleAttrib.h" #include "Utility/ParameterList.h" #include "Utility/IpplException.h" @@ -122,21 +123,33 @@ namespace ippl { struct CufinufftType {}; template <> - struct Cufinufft { - using makeplan = cufinufftf_makeplan; - using setpts = cufinufftf_setpts; - using transform = cufinufftf_execute; - using destroy = cufinufftf_destroy; - using plan_t = cufinufftf_plan; + struct CufinufftType { + //using makeplan = typename cufinufftf_makeplan; + //using setpts = typename cufinufftf_setpts; + //using execute = typename cufinufftf_execute; + //using destroy = typename cufinufftf_destroy; + //using plan_t = typename cufinufftf_plan; + + + //typedef typename cufinufftf_makeplan makeplan; + //typedef typename cufinufftf_setpts setpts; + //typedef typename cufinufftf_execute execute; + //typedef typename cufinufftf_destroy destroy; + //typedef typename cufinufftf_plan plan_t; }; template <> - struct Cufinufft { - using makeplan = cufinufft_makeplan; - using setpts = cufinufft_setpts; - using transform = cufinufft_execute; - using destroy = cufinufft_destroy; - using plan_t = cufinufft_plan; + struct CufinufftType { + //using makeplan = typename cufinufft_makeplan; + //using setpts = typename cufinufft_setpts; + //using execute = typename cufinufft_execute; + //using destroy = typename cufinufft_destroy; + //using plan_t = typename cufinufft_plan; + //typedef typename cufinufft_makeplan makeplan; + //typedef typename cufinufft_setpts setpts; + //typedef typename cufinufft_execute execute; + //typedef typename cufinufft_destroy destroy; + //typedef typename cufinufft_plan plan_t; }; #endif } @@ -333,14 +346,15 @@ namespace ippl { public: typedef FieldLayout Layout_t; - typedef std::complex Complex_t; - typedef Field ComplexField_t; + typedef std::complex StdComplex_t; + typedef Kokkos::complex KokkosComplex_t; + typedef Field ComplexField_t; - using makeplan = detail::Cufinufft::makeplan; - using setpts = detail::Cufinufft::setpts; - using transform = detail::Cufinufft::transform; - using destroy = detail::Cufinufft::destroy; - using plan_t = detail::Cufinufft::plan_t; + //using makeplan = typename detail::CufinufftType::makeplan; + //using setpts = typename detail::CufinufftType::setpts; + //using execute = typename detail::CufinufftType::execute; + //using destroy = typename detail::CufinufftType::destroy; + //using plan_t = typename detail::CufinufftType::plan_t; /** Create a new FFT object with the layout for the input Field, type * (1 or 2) for the NUFFT and parameters for cuFINUFFT. @@ -355,6 +369,9 @@ namespace ippl { template void transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, ComplexField_t& f); + //template + //void transform(const ParticleAttrib< Vector>& R, + // ParticleAttrib>& Q, ComplexField_t& f); private: @@ -362,10 +379,11 @@ namespace ippl { /** setup performs the initialization necessary. */ - void setup(const std::array& nmodes, + void setup(std::array& nmodes, const ParameterList& params); - plan_t plan_m; + //plan_t plan_m; + cufinufft_plan plan_m; int ier_m; T tol_m; int type_m; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index f804413b5..59abf7184 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -793,15 +793,15 @@ namespace ippl { */ template void - FFT::setup(const std::array& nmodes, + FFT::setup(std::array& nmodes, const ParameterList& params) { cufinufft_opts opts; - ier = cufinufft_default_opts(type, Dim, &opts); + ier_m = cufinufft_default_opts(type_m, Dim, &opts); if(!params.get("use_cufinufft_defaults")) { - tol = params.get("tolerance"); + tol_m = params.get("tolerance"); opts.gpu_method = params.get("gpu_method"); opts.gpu_sort = params.get("gpu_sort"); opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); @@ -821,9 +821,10 @@ namespace ippl { else { throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); } - - ier = makeplan(type_m, Dim, nmodes, iflag, 1, tol, - maxbatchsize, &plan, &opts); + + int dim = static_cast(Dim); + ier_m = cufinufft_makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, + maxbatchsize, &plan_m, &opts); } @@ -835,6 +836,9 @@ namespace ippl { FFT::transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, typename FFT::ComplexField_t& f) + //FFT::transform(const ParticleAttrib< Vector>& R, + // ParticleAttrib>& Q, + // typename FFT::ComplexField_t& f) { auto fview = f.getView(); auto Rview = R.getView(); @@ -847,21 +851,21 @@ namespace ippl { * cuFINUFFT's layout is left, hence we allocate the temporary * Kokkos views with the same layout */ - Kokkos::View + Kokkos::View tempField("tempField", fview.extent(0) - 2*nghost, fview.extent(1) - 2*nghost, fview.extent(2) - 2*nghost); - std::array, 3> tempR; + Vector, 3> tempR; - tempR.fill(NULL); for(size_t d = 0; d < Dim; ++d) { Kokkos::realloc(tempR[d], localNp); } + - Kokkos::View*,Kokkos::LayoutLeft> tempQ("tempQ", localNp); + Kokkos::View tempQ("tempQ", localNp); using mdrange_type = Kokkos::MDRangePolicy>; @@ -875,10 +879,10 @@ namespace ippl { const size_t j, const size_t k) { - tempField(i-nghost, j-nghost, k-nghost).real( - fview(i, j, k).real()); - tempField(i-nghost, j-nghost, k-nghost).imag( - fview(i, j, k).imag()); + tempField(i-nghost, j-nghost, k-nghost).x = + fview(i, j, k).real(); + tempField(i-nghost, j-nghost, k-nghost).y = + fview(i, j, k).imag(); }); @@ -887,16 +891,16 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i) { for(size_t d = 0; d < Dim; ++d) { - temp[R](i) = Rview(i)[d]; + tempR[d](i) = Rview(i)[d]; } - tempQ(i).real(Qview(i)); - tempQ(i).imag(0.0); + tempQ(i).x = Qview(i).real(); + tempQ(i).y = Qview(i).imag(); }); - ier = setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, - NULL, NULL, NULL, plan); + ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + NULL, NULL, NULL, plan_m); - ier = transform(tempQ.data(), tempField.data(), plan); + ier_m = cufinufft_execute(tempQ.data(), tempField.data(), plan_m); if(type_m == 1) { @@ -911,9 +915,9 @@ namespace ippl { const size_t k) { fview(i, j, k).real() = - tempField(i-nghost, j-nghost, k-nghost).real(); + tempField(i-nghost, j-nghost, k-nghost).x; fview(i, j, k).imag() = - tempField(i-nghost, j-nghost, k-nghost).imag(); + tempField(i-nghost, j-nghost, k-nghost).y; }); } else if(type_m == 2) { @@ -921,7 +925,7 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - Qview(i) = tempQ(i).real(); + Qview(i) = tempQ(i).x; }); } } @@ -929,7 +933,7 @@ namespace ippl { template FFT::~FFT() { - ier = destroy(plan); + ier_m = cufinufft_destroy(plan_m); } } diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 5d0332166..7b7ecfdde 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -39,6 +39,12 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestNUFFT1 TestNUFFT1.cpp) +target_link_libraries ( + TestNUFFT1 + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) # vi: set et ts=4 sw=4 sts=4: # Local Variables: From e1ab9d118a0b2bdaaabba8b24831e56ac496fa96 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 13 Feb 2023 16:55:04 +0100 Subject: [PATCH 056/117] test file added --- test/FFT/TestNUFFT1.cpp | 199 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 test/FFT/TestNUFFT1.cpp diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp new file mode 100644 index 000000000..822236627 --- /dev/null +++ b/test/FFT/TestNUFFT1.cpp @@ -0,0 +1,199 @@ +#include "Ippl.h" +#include "Utility/ParameterList.h" + +#include +#include +#include +#include +#include + +template +struct Bunch : public ippl::ParticleBase +{ + + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) + { + this->addAttribute(Q); + } + + ~Bunch(){ } + + typedef ippl::ParticleAttrib> charge_container_type; + charge_container_type Q; + +}; + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + using view_type_complex = typename ippl::detail::ViewType, 1>::view_type; + // Output View for the random numbers + view_type x; + + view_type_complex Q; + + // The GeneratorPool + GeneratorPool rand_pool; + + T minU, maxU; + + // Initialize all members + generate_random(view_type x_,view_type_complex Q_, GeneratorPool rand_pool_, + T& minU_, T& maxU_) + : x(x_), Q(Q_), rand_pool(rand_pool_), + minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + for (unsigned d = 0; d < Dim; ++d) { + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + } + Q(i).real() = rand_gen.drand(0.0, 1.0); + Q(i).imag() = rand_gen.drand(0.0, 1.0); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + + +int main(int argc, char *argv[]) { + + Ippl ippl(argc,argv); + + constexpr unsigned int dim = 3; + const double pi = std::acos(-1.0); + + typedef ippl::ParticleSpatialLayout playout_type; + typedef Bunch bunch_type; + + + std::array pt = {32, 32, 32}; + ippl::Index I(pt[0]); + ippl::Index J(pt[1]); + ippl::Index K(pt[2]); + ippl::NDIndex owned(I, J, K); + + ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d=0; d layout(owned, decomp); + + std::array dx = { + 2.0 * pi / double(pt[0]), + 2.0 * pi / double(pt[1]), + 2.0 * pi / double(pt[2]), + }; + + typedef ippl::Vector Vector_t; + + Vector_t hx = {dx[0], dx[1], dx[2]}; + Vector_t origin = {-pi, -pi, -pi}; + ippl::UniformCartesian mesh(owned, hx, origin); + + playout_type pl(layout, mesh); + + bunch_type bunch(pl); + bunch.setParticleBC(ippl::BC::PERIODIC); + + using size_type = ippl::detail::size_type; + + + size_type Np = std::pow(32,3) * 10; + + typedef ippl::Field, dim> field_type; + + field_type field(mesh, layout); + + ippl::ParameterList fftParams; + + fftParams.add("use_cufinufft_defaults", true); + + typedef ippl::FFT FFT_type; + + std::unique_ptr fft; + + int type = 1; + + fft = std::make_unique(layout, type, fftParams); + + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + + + size_type nloc = Np/Ippl::Comm->size(); + + bunch.create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42)); + Kokkos::parallel_for(nloc, + generate_random, dim>( + bunch.R.getView(), bunch.Q.getView(), rand_pool64, minU, maxU)); + + + fft->transform(bunch.R, bunch.Q, field); + + auto field_result = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), field.getView()); + + Kokkos::complex max_error_abs(0.0, 0.0); + Kokkos::complex max_error_rel(0.0, 0.0); + + //Pick some mode to check. We choose it same as cuFINUFFT testcase example2d1many.cpp in + //the first 2 dimensions + ippl::Vector kVec; + kVec[0] = (int)(0.37 * pt[0]); + kVec[1] = (int)(0.26 * pt[1]); + kVec[2] = (int)(0.20 * pt[2]); + + //Linearize based on LayoutLeft and the results from cuFINUFFT are already fftshifted + //int it = (pt[0]/2 + kVec[0]) + (pt[0] * (pt[1]/2 + kVec[1])) + + // (pt[0] * pt[1] * (pt[2]/2 + kVec[2])); + + int iInd = (pt[0]/2 + kVec[0]); + int jInd = (pt[1]/2 + kVec[1]); + int kInd = (pt[2]/2 + kVec[2]); + + + Kokkos::complex reducedValue(0.0, 0.0); + + auto Rview = bunch.R.getView(); + auto Qview = bunch.Q.getView(); + + Kokkos::complex imag = {0.0, 1.0}; + + Kokkos::parallel_reduce("NUDFT type1", nloc, + KOKKOS_LAMBDA(const size_t idx, Kokkos::complex& valL) { + + double arg = 0.0; + for(size_t d = 0; d < dim; ++d) { + arg += kVec[d]*Rview(idx)[d]; + } + + valL += (Kokkos::Experimental::cos(arg) + - imag * Kokkos::Experimental::sin(arg)) * Qview(idx); + }, Kokkos::Sum>(reducedValue)); + + double abs_error_real = std::fabs(reducedValue.real() - field_result(iInd, jInd, kInd).real()); + double rel_error_real = std::fabs(reducedValue.real() - field_result(iInd, jInd, kInd).real()) /std::fabs(reducedValue.real()); + double abs_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()); + double rel_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()) /std::fabs(reducedValue.imag()); + + std::cout << "Abs Error in real part: " << std::setprecision(16) + << abs_error_real << "Rel. error: " << std::setprecision(16) << rel_error_real << std::endl; + std::cout << "Abs Error in imag part: " << std::setprecision(16) + << abs_error_imag << "Rel. error: " << std::setprecision(16) << rel_error_imag << std::endl; + + + //Kokkos::complex max_error(0.0, 0.0); + //MPI_Reduce(&max_error_local, &max_error, 1, + // MPI_C_DOUBLE_COMPLEX, MPI_MAX, 0, Ippl::getComm()); + + return 0; +} From 9f7534f129b3e31118d898686605043bb2a33bdb Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 08:56:56 +0100 Subject: [PATCH 057/117] Find cmake file added for cufinufft --- CMakeModules/FindCUFINUFFT.cmake | 31 +++++++++++++++++++++++++++++++ src/CMakeLists.txt | 2 +- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 CMakeModules/FindCUFINUFFT.cmake diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake new file mode 100644 index 000000000..755062d33 --- /dev/null +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -0,0 +1,31 @@ +# +# Find CUFINUFFT includes and library +# +# CUFINUFFT_INCLUDE_DIR - where to find cufinufft.h +# CUFINUFFT_LIBRARY - libcufinufft.a path +# CUFINUFFT_FOUND - do not attempt to use if "no" or undefined. + +FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h + HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include + PATHS ENV C_INCLUDE_PATH +) + +FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.a + HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib-static $ENV{CUFINUFFT_DIR}/lib-static $ENV{CUFINUFFT}/lib-static ${PROJECT_SOURCE_DIR}/lib-static + PATHS ENV LIBRARY_PATH +) + +IF(CUFINUFFT_INCLUDE_DIR AND CUFINUFFT_LIBRARY_DIR) + SET( CUFINUFFT_FOUND "YES" ) +ENDIF() + +IF (CUFINUFFT_FOUND) + IF (NOT CUFINUFFT_FIND_QUIETLY) + MESSAGE(STATUS "Found cufinufft library dir: ${CUFINUFFT_LIBRARY_DIR}") + MESSAGE(STATUS "Found cufinufft include dir: ${CUFINUFFT_INCLUDE_DIR}") + ENDIF (NOT CUFINUFFT_FIND_QUIETLY) +ELSE (CUFINUFFT_FOUND) + IF (CUFINUFFT_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find CUFINUFFT!") + ENDIF (CUFINUFFT_FIND_REQUIRED) +ENDIF (CUFINUFFT_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b4c04d6c5..8c96a6bc7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,7 +96,7 @@ add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) if (ENABLE_NUFFT) - target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}/libcufinufft.a) + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) else() target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) endif() From dccf8cac5b049465c2ec25486b1b4e7a4e94d594 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 13:27:34 +0100 Subject: [PATCH 058/117] Test for type 2 NUFFT also added --- CMakeModules/FindCUFINUFFT.cmake | 6 +- src/FFT/FFT.hpp | 31 +++-- test/FFT/CMakeLists.txt | 6 + test/FFT/TestNUFFT1.cpp | 30 ++-- test/FFT/TestNUFFT2.cpp | 229 +++++++++++++++++++++++++++++++ 5 files changed, 273 insertions(+), 29 deletions(-) create mode 100644 test/FFT/TestNUFFT2.cpp diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake index 755062d33..691eb510f 100644 --- a/CMakeModules/FindCUFINUFFT.cmake +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -7,11 +7,11 @@ FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include - PATHS ENV C_INCLUDE_PATH + PATHS ENV CPP_INCLUDE_PATH ) -FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.a - HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib-static $ENV{CUFINUFFT_DIR}/lib-static $ENV{CUFINUFFT}/lib-static ${PROJECT_SOURCE_DIR}/lib-static +FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so + HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib $ENV{CUFINUFFT_DIR}/lib $ENV{CUFINUFFT}/lib ${PROJECT_SOURCE_DIR}/lib PATHS ENV LIBRARY_PATH ) diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 59abf7184..4ef372730 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -799,6 +799,7 @@ namespace ippl { cufinufft_opts opts; ier_m = cufinufft_default_opts(type_m, Dim, &opts); + tol_m = 1e-6; if(!params.get("use_cufinufft_defaults")) { tol_m = params.get("tolerance"); @@ -836,9 +837,6 @@ namespace ippl { FFT::transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, typename FFT::ComplexField_t& f) - //FFT::transform(const ParticleAttrib< Vector>& R, - // ParticleAttrib>& Q, - // typename FFT::ComplexField_t& f) { auto fview = f.getView(); auto Rview = R.getView(); @@ -857,12 +855,15 @@ namespace ippl { fview.extent(2) - 2*nghost); - Vector, 3> tempR; + //Vector, 3> tempR; + Kokkos::View tempRx("tempRx", localNp); + Kokkos::View tempRy("tempRy", localNp); + Kokkos::View tempRz("tempRz", localNp); - for(size_t d = 0; d < Dim; ++d) { - Kokkos::realloc(tempR[d], localNp); - } + //for(size_t d = 0; d < Dim; ++d) { + // Kokkos::realloc(tempR[d], localNp); + //} Kokkos::View tempQ("tempQ", localNp); @@ -890,14 +891,19 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - for(size_t d = 0; d < Dim; ++d) { - tempR[d](i) = Rview(i)[d]; - } + //for(size_t d = 0; d < Dim; ++d) { + // tempR[d](i) = Rview(i)[d]; + //} + tempRx(i) = Rview(i)[0]; + tempRy(i) = Rview(i)[1]; + tempRz(i) = Rview(i)[2]; tempQ(i).x = Qview(i).real(); tempQ(i).y = Qview(i).imag(); }); - ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + //ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + // NULL, NULL, NULL, plan_m); + ier_m = cufinufft_setpts(localNp, tempRx.data(), tempRy.data(), tempRz.data(), 0, NULL, NULL, NULL, plan_m); ier_m = cufinufft_execute(tempQ.data(), tempField.data(), plan_m); @@ -925,7 +931,8 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - Qview(i) = tempQ(i).x; + Qview(i).real() = tempQ(i).x; + Qview(i).imag() = tempQ(i).y; }); } } diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 7b7ecfdde..4d3e5fe90 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -45,6 +45,12 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestNUFFT2 TestNUFFT2.cpp) +target_link_libraries ( + TestNUFFT2 + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) # vi: set et ts=4 sw=4 sts=4: # Local Variables: diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index 822236627..06ac71234 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { typedef Bunch bunch_type; - std::array pt = {32, 32, 32}; + std::array pt = {256, 256, 256}; ippl::Index I(pt[0]); ippl::Index J(pt[1]); ippl::Index K(pt[2]); @@ -106,7 +106,7 @@ int main(int argc, char *argv[]) { using size_type = ippl::detail::size_type; - size_type Np = std::pow(32,3) * 10; + size_type Np = std::pow(256,3) * 8; typedef ippl::Field, dim> field_type; @@ -114,7 +114,12 @@ int main(int argc, char *argv[]) { ippl::ParameterList fftParams; - fftParams.add("use_cufinufft_defaults", true); + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-6); + + fftParams.add("use_cufinufft_defaults", false); typedef ippl::FFT FFT_type; @@ -145,20 +150,17 @@ int main(int argc, char *argv[]) { Kokkos::complex max_error_abs(0.0, 0.0); Kokkos::complex max_error_rel(0.0, 0.0); - //Pick some mode to check. We choose it same as cuFINUFFT testcase example2d1many.cpp in - //the first 2 dimensions + //Pick some mode to check. We choose it same as cuFINUFFT testcase cufinufft3d1_test.cu ippl::Vector kVec; kVec[0] = (int)(0.37 * pt[0]); kVec[1] = (int)(0.26 * pt[1]); - kVec[2] = (int)(0.20 * pt[2]); + kVec[2] = (int)(0.13 * pt[2]); - //Linearize based on LayoutLeft and the results from cuFINUFFT are already fftshifted - //int it = (pt[0]/2 + kVec[0]) + (pt[0] * (pt[1]/2 + kVec[1])) + - // (pt[0] * pt[1] * (pt[2]/2 + kVec[2])); + const int nghost = field.getNghost(); - int iInd = (pt[0]/2 + kVec[0]); - int jInd = (pt[1]/2 + kVec[1]); - int kInd = (pt[2]/2 + kVec[2]); + int iInd = (pt[0]/2 + kVec[0] + nghost); + int jInd = (pt[1]/2 + kVec[1] + nghost); + int kInd = (pt[2]/2 + kVec[2] + nghost); Kokkos::complex reducedValue(0.0, 0.0); @@ -186,9 +188,9 @@ int main(int argc, char *argv[]) { double rel_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()) /std::fabs(reducedValue.imag()); std::cout << "Abs Error in real part: " << std::setprecision(16) - << abs_error_real << "Rel. error: " << std::setprecision(16) << rel_error_real << std::endl; + << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; std::cout << "Abs Error in imag part: " << std::setprecision(16) - << abs_error_imag << "Rel. error: " << std::setprecision(16) << rel_error_imag << std::endl; + << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; //Kokkos::complex max_error(0.0, 0.0); diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp new file mode 100644 index 000000000..147c2ba74 --- /dev/null +++ b/test/FFT/TestNUFFT2.cpp @@ -0,0 +1,229 @@ +#include "Ippl.h" +#include "Utility/ParameterList.h" + +#include +#include +#include +#include +#include + +template +struct Bunch : public ippl::ParticleBase +{ + + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) + { + this->addAttribute(Q); + } + + ~Bunch(){ } + + typedef ippl::ParticleAttrib> charge_container_type; + charge_container_type Q; + +}; + +template +struct generate_random_particles { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x; + + // The GeneratorPool + GeneratorPool rand_pool; + + T minU, maxU; + + // Initialize all members + generate_random_particles(view_type x_, GeneratorPool rand_pool_, + T& minU_, T& maxU_) + : x(x_), rand_pool(rand_pool_), + minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + for (unsigned d = 0; d < Dim; ++d) { + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +template +struct generate_random_field { + + using view_type = typename ippl::detail::ViewType::view_type; + view_type f; + + // The GeneratorPool + GeneratorPool rand_pool; + + // Initialize all members + generate_random_field(view_type f_, GeneratorPool rand_pool_) + : f(f_), rand_pool(rand_pool_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i, const size_t j, const size_t k) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + f(i, j, k).real() = rand_gen.drand(0.0, 1.0); + f(i, j, k).imag() = rand_gen.drand(0.0, 1.0); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +int main(int argc, char *argv[]) { + + Ippl ippl(argc,argv); + + constexpr unsigned int dim = 3; + const double pi = std::acos(-1.0); + + typedef ippl::ParticleSpatialLayout playout_type; + typedef Bunch bunch_type; + + + ippl::Vector pt = {32, 32, 32}; + ippl::Index I(pt[0]); + ippl::Index J(pt[1]); + ippl::Index K(pt[2]); + ippl::NDIndex owned(I, J, K); + + ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d=0; d layout(owned, decomp); + + std::array dx = { + 2.0 * pi / double(pt[0]), + 2.0 * pi / double(pt[1]), + 2.0 * pi / double(pt[2]), + }; + + typedef ippl::Vector Vector_t; + //typedef ippl::Vector, 3> CxVector_t; + + Vector_t hx = {dx[0], dx[1], dx[2]}; + Vector_t origin = {-pi, -pi, -pi}; + ippl::UniformCartesian mesh(owned, hx, origin); + + playout_type pl(layout, mesh); + + bunch_type bunch(pl); + bunch.setParticleBC(ippl::BC::PERIODIC); + + using size_type = ippl::detail::size_type; + + + size_type Np = std::pow(32,3) * 10; + + typedef ippl::Field, dim> field_type; + + field_type field(mesh, layout); + + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-12); + + fftParams.add("use_cufinufft_defaults", false); + + typedef ippl::FFT FFT_type; + + std::unique_ptr fft; + + int type = 2; + + fft = std::make_unique(layout, type, fftParams); + + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + + + size_type nloc = Np/Ippl::Comm->size(); + + const int nghost = field.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + auto fview = field.getView(); + bunch.create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42)); + Kokkos::parallel_for(nloc, + generate_random_particles, dim>( + bunch.R.getView(), rand_pool64, minU, maxU)); + + Kokkos::parallel_for(mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost}), + generate_random_field, Kokkos::Random_XorShift64_Pool<>, dim>( + field.getView(), rand_pool64)); + + fft->transform(bunch.R, bunch.Q, field); + + auto Q_result = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), bunch.Q.getView()); + + Kokkos::complex max_error_abs(0.0, 0.0); + Kokkos::complex max_error_rel(0.0, 0.0); + + //Pick some target point to check. We choose it same as cuFINUFFT testcase cufinufft3d2_test.cu + + int idx = nloc/2; + + Kokkos::complex reducedValue(0.0, 0.0); + + auto Rview = bunch.R.getView(); + + Kokkos::complex imag = {0.0, 1.0}; + + Kokkos::parallel_reduce("NUDFT type2", + mdrange_type({0, 0, 0}, + {fview.extent(0) - 2 * nghost, + fview.extent(1) - 2 * nghost, + fview.extent(2) - 2 * nghost}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + Kokkos::complex& valL) + { + ippl::Vector iVec = {i, j, k}; + double arg = 0.0; + for(size_t d = 0; d < dim; ++d) { + arg += (iVec[d] - (pt[d]/2)) * Rview(idx)[d]; + } + + valL += (Kokkos::Experimental::cos(arg) + + imag * Kokkos::Experimental::sin(arg)) * fview(i + nghost, j + nghost, k + nghost); + }, Kokkos::Sum>(reducedValue)); + + double abs_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()); + double rel_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()) /std::fabs(reducedValue.real()); + double abs_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()); + double rel_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()) /std::fabs(reducedValue.imag()); + + std::cout << "Abs Error in real part: " << std::setprecision(16) + << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; + std::cout << "Abs Error in imag part: " << std::setprecision(16) + << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; + + + //Kokkos::complex max_error(0.0, 0.0); + //MPI_Reduce(&max_error_local, &max_error, 1, + // MPI_C_DOUBLE_COMPLEX, MPI_MAX, 0, Ippl::getComm()); + + return 0; +} From e526befeb7066ee98c5058029d3b711778149d60 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 14:04:55 +0100 Subject: [PATCH 059/117] few tweaks and cleanups but still lot of things need to be generalized --- CMakeModules/FindCUFINUFFT.cmake | 4 ++-- src/FFT/FFT.h | 8 ++++---- src/FFT/FFT.hpp | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake index 691eb510f..9098a6e7a 100644 --- a/CMakeModules/FindCUFINUFFT.cmake +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -2,14 +2,14 @@ # Find CUFINUFFT includes and library # # CUFINUFFT_INCLUDE_DIR - where to find cufinufft.h -# CUFINUFFT_LIBRARY - libcufinufft.a path +# CUFINUFFT_LIBRARY - libcufinufft.so path # CUFINUFFT_FOUND - do not attempt to use if "no" or undefined. FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include PATHS ENV CPP_INCLUDE_PATH ) - +#Static library has some issues and gives a cuda error at the end of compilation FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib $ENV{CUFINUFFT_DIR}/lib $ENV{CUFINUFFT}/lib ${PROJECT_SOURCE_DIR}/lib PATHS ENV LIBRARY_PATH diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 16fab61f3..cec240b8f 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -66,10 +66,12 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; +#ifdef KOKKOS_ENABLE_CUDA /** Tag classes for Non-uniform type of Fourier transforms */ class NUFFTransform {}; +#endif enum FFTComm { a2av = 0, @@ -337,6 +339,7 @@ namespace ippl { }; +#ifdef KOKKOS_ENABLE_CUDA /** Non-uniform FFT class */ @@ -346,7 +349,6 @@ namespace ippl { public: typedef FieldLayout Layout_t; - typedef std::complex StdComplex_t; typedef Kokkos::complex KokkosComplex_t; typedef Field ComplexField_t; @@ -369,9 +371,6 @@ namespace ippl { template void transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, ComplexField_t& f); - //template - //void transform(const ParticleAttrib< Vector>& R, - // ParticleAttrib>& Q, ComplexField_t& f); private: @@ -392,6 +391,7 @@ namespace ippl { } +#endif #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 4ef372730..c79b247a7 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -750,6 +750,7 @@ namespace ippl { } +#ifdef KOKKOS_ENABLE_CUDA //========================================================================= // FFT NUFFTransform Constructors //========================================================================= @@ -764,8 +765,6 @@ namespace ippl { int type, const ParameterList& params) { - - /** * cuFINUFFT requires to pass a 3D array even for 2D and * 1D FFTs we just have to fill in other @@ -823,6 +822,7 @@ namespace ippl { throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); } + //dim in cufinufft is int int dim = static_cast(Dim); ier_m = cufinufft_makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, maxbatchsize, &plan_m, &opts); @@ -943,6 +943,7 @@ namespace ippl { ier_m = cufinufft_destroy(plan_m); } +#endif } // vi: set et ts=4 sw=4 sts=4: From 9d7204ed3cc919f552cad5df6a6dc15b430eef4b Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 16:11:30 +0100 Subject: [PATCH 060/117] In the middle of changes --- CMakeModules/FindCUFINUFFT.cmake | 1 + src/CMakeLists.txt | 3 + src/Particle/ParticleAttrib.hpp | 271 +++++++++++++++++++++++++++++-- 3 files changed, 261 insertions(+), 14 deletions(-) diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake index 9098a6e7a..ce40536f1 100644 --- a/CMakeModules/FindCUFINUFFT.cmake +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -17,6 +17,7 @@ FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so IF(CUFINUFFT_INCLUDE_DIR AND CUFINUFFT_LIBRARY_DIR) SET( CUFINUFFT_FOUND "YES" ) + SET( CUFINUFFT_DIR $ENV{CUFINUFFT_DIR} ) ENDIF() IF (CUFINUFFT_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c96a6bc7..8b4330823 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,6 +96,9 @@ add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) if (ENABLE_NUFFT) + include_directories ( + BEFORE ${CUFINUFFT_INCLUDE_DIR} + ) target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) else() target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 4d3919df1..56030879f 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -204,13 +204,13 @@ namespace ippl { template template - void ParticleAttrib::scatterPIF(Field& f, Field& Sk, + void ParticleAttrib::scatterPIFNUDFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) const { - static IpplTimings::TimerRef scatterPIFTimer = IpplTimings::getTimer("ScatterPIF"); - IpplTimings::startTimer(scatterPIFTimer); + static IpplTimings::TimerRef scatterPIFNUDFTTimer = IpplTimings::getTimer("ScatterPIFNUDFT"); + IpplTimings::startTimer(scatterPIFNUDFTTimer); using view_type = typename Field::view_type; using vector_type = typename M::vector_type; @@ -246,7 +246,7 @@ namespace ippl { size_t flatN = N[0]*N[1]*N[2]; - Kokkos::parallel_for("ParticleAttrib::scatterPIF compute", + Kokkos::parallel_for("ParticleAttrib::scatterPIFNUDFT compute", team_policy(flatN, Kokkos::AUTO), KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { const size_t flatIndex = teamMember.league_rank(); @@ -293,7 +293,7 @@ namespace ippl { } ); - IpplTimings::stopTimer(scatterPIFTimer); + IpplTimings::stopTimer(scatterPIFNUDFTTimer); //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); //IpplTimings::startTimer(scatterAllReduceTimer); @@ -366,12 +366,12 @@ namespace ippl { template template - void ParticleAttrib::gatherPIF(Field& f, Field& Sk, + void ParticleAttrib::gatherPIFNUDFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) const { - static IpplTimings::TimerRef gatherPIFTimer = IpplTimings::getTimer("GatherPIF"); - IpplTimings::startTimer(gatherPIFTimer); + static IpplTimings::TimerRef gatherPIFNUDFTTimer = IpplTimings::getTimer("GatherPIFNUDFT"); + IpplTimings::startTimer(gatherPIFNUDFTTimer); using view_type = typename Field::view_type; using vector_type = typename M::vector_type; @@ -403,7 +403,7 @@ namespace ippl { size_t flatN = N[0]*N[1]*N[2]; - Kokkos::parallel_for("ParticleAttrib::gatherPIF", + Kokkos::parallel_for("ParticleAttrib::gatherPIFNUDFT", team_policy(Np, Kokkos::AUTO), KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { const size_t idx = teamMember.league_rank(); @@ -470,10 +470,253 @@ namespace ippl { ); - IpplTimings::stopTimer(gatherPIFTimer); + IpplTimings::stopTimer(gatherPIFNUDFTTimer); } +#ifdef KOKKOS_ENABLE_CUDA + + template + template + void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib< Vector, Properties... >& pp) + const + { + + static IpplTimings::TimerRef scatterPIFNUFFTTimer = IpplTimings::getTimer("ScatterPIFNUFFT"); + IpplTimings::startTimer(scatterPIFNUFFTTimer); + + using view_type = typename Field::view_type; + using vector_type = typename M::vector_type; + using value_type = typename ParticleAttrib::value_type; + view_type fview = f.getView(); + typename Field::view_type Skview = Sk.getView(); + const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const M& mesh = f.get_mesh(); + const vector_type& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + vector_type Len; + Vector N; + + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + + //using view_type_temp = typename detail::ViewType::view_type; + + //view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); + + double pi = std::acos(-1.0); + Kokkos::complex imag = {0.0, 1.0}; + + size_t Np = *(this->localNum_mp); + + size_t flatN = N[0]*N[1]*N[2]; + + Kokkos::parallel_for("ParticleAttrib::scatterPIFNUFFT compute", + team_policy(flatN, Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { + const size_t flatIndex = teamMember.league_rank(); + +#ifdef KOKKOS_ENABLE_CUDA + const int k = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (k * N[0] * N[1]); + const int i = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#else + + const int i = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + const int k = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#endif + + FT reducedValue = 0.0; + Vector iVec = {i, j, k}; + vector_type kVec; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + } + auto Sk = Skview(i+nghost, j+nghost, k+nghost); + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), + [=](const size_t idx, FT& innerReduce) + { + double arg = 0.0; + for(size_t d = 0; d < Dim; ++d) { + arg += kVec[d]*pp(idx)[d]; + } + const value_type& val = dview_m(idx); + + innerReduce += Sk * (Kokkos::Experimental::cos(arg) + - imag * Kokkos::Experimental::sin(arg)) * val; + }, Kokkos::Sum(reducedValue)); + + if(teamMember.team_rank() == 0) { + //viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; + fview(i+nghost,j+nghost,k+nghost) = reducedValue; + } + + } + ); + + IpplTimings::stopTimer(scatterPIFNUFFTTimer); + + //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); + //IpplTimings::startTimer(scatterAllReduceTimer); + //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + //MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + // MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); + //IpplTimings::stopTimer(scatterAllReduceTimer); + + } + + + template + template + void ParticleAttrib::gatherPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib< Vector, Properties... >& pp) + const + { + static IpplTimings::TimerRef gatherPIFNUFFTTimer = IpplTimings::getTimer("GatherPIFNUFFT"); + IpplTimings::startTimer(gatherPIFNUFFTTimer); + + using view_type = typename Field::view_type; + using vector_type = typename M::vector_type; + using value_type = typename ParticleAttrib::value_type; + view_type fview = f.getView(); + typename Field::view_type Skview = Sk.getView(); + const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const M& mesh = f.get_mesh(); + const vector_type& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + vector_type Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + double pi = std::acos(-1.0); + Kokkos::complex imag = {0.0, 1.0}; + + size_t Np = *(this->localNum_mp); + + size_t flatN = N[0]*N[1]*N[2]; + + Kokkos::parallel_for("ParticleAttrib::gatherPIFNUFFT", + team_policy(Np, Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { + const size_t idx = teamMember.league_rank(); + + value_type reducedValue = 0.0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), + [=](const size_t flatIndex, value_type& innerReduce) + { + +#ifdef KOKKOS_ENABLE_CUDA + const int k = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (k * N[0] * N[1]); + const int i = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#else + const int i = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + const int k = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#endif + + Vector iVec = {i, j, k}; + vector_type kVec; + double Dr = 0.0, arg = 0.0; + for(size_t d = 0; d < Dim; ++d) { + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * iVec[d]; + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); + Dr += kVec[d] * kVec[d]; + arg += kVec[d]*pp(idx)[d]; + } + + + FT Ek = 0.0; + value_type Ex = 0.0; + auto rho = fview(i+nghost,j+nghost,k+nghost); + auto Sk = Skview(i+nghost,j+nghost,k+nghost); + for(size_t d = 0; d < Dim; ++d) { + + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[d] * rho * factor); + + //Inverse Fourier transform when the lhs is real. Use when + //we choose k \in [0 K) instead of from [-K/2+1 K/2] + //Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) + // - Ek.imag() * Kokkos::Experimental::sin(arg)); + Ek *= Sk * (Kokkos::Experimental::cos(arg) + + imag * Kokkos::Experimental::sin(arg)); + Ex[d] = Ek.real(); + } + + innerReduce += Ex; + }, Kokkos::Sum(reducedValue)); + + teamMember.team_barrier(); + + if(teamMember.team_rank() == 0) { + dview_m(idx) = reducedValue; + } + + } + ); + + + IpplTimings::stopTimer(gatherPIFNUFFTTimer); + + } +#endif + + template + inline + void scatterPIFNUFFT(const ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp) + { +#ifdef KOKKOS_ENABLE_CUDA + attrib.scatterPIFNUFFT(f, Sk, pp); +#else + throw IpplException("scatterPIFNUFFT", + "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to + be compiled with CUDA. Otherwise use scatterPIFNUDFT."); +#endif + } + + template + inline + void gatherPIFNUFFT(const ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp) + { +#ifdef KOKKOS_ENABLE_CUDA + attrib.gatherPIFNUFFT(f, Sk, pp); +#else + throw IpplException("gatherPIFNUFFT", + "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to + be compiled with CUDA. Otherwise use gatherPIFNUDFT."); +#endif + } /* * Non-class function @@ -491,10 +734,10 @@ namespace ippl { template inline - void scatterPIF(const ParticleAttrib& attrib, Field& f, + void scatterPIFNUDFT(const ParticleAttrib& attrib, Field& f, Field& Sk, const ParticleAttrib, Properties...>& pp) { - attrib.scatterPIF(f, Sk, pp); + attrib.scatterPIFNUDFT(f, Sk, pp); } @@ -509,10 +752,10 @@ namespace ippl { template inline - void gatherPIF(const ParticleAttrib& attrib, Field& f, + void gatherPIFNUDFT(const ParticleAttrib& attrib, Field& f, Field& Sk, const ParticleAttrib, Properties...>& pp) { - attrib.gatherPIF(f, Sk, pp); + attrib.gatherPIFNUDFT(f, Sk, pp); } From 613a80a0da18c0da71a734ae877dcf7ce4b29495 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 16:25:18 +0100 Subject: [PATCH 061/117] include directories added --- CMakeModules/FindCUFINUFFT.cmake | 1 + src/CMakeLists.txt | 3 +++ test/FFT/CMakeLists.txt | 1 + 3 files changed, 5 insertions(+) diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake index 9098a6e7a..202a044a3 100644 --- a/CMakeModules/FindCUFINUFFT.cmake +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -17,6 +17,7 @@ FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so IF(CUFINUFFT_INCLUDE_DIR AND CUFINUFFT_LIBRARY_DIR) SET( CUFINUFFT_FOUND "YES" ) + SET( CUFINUFFT_DIR $ENV{CUFINUFFT_DIR} ) ENDIF() IF (CUFINUFFT_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c96a6bc7..8b4330823 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,6 +96,9 @@ add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) if (ENABLE_NUFFT) + include_directories ( + BEFORE ${CUFINUFFT_INCLUDE_DIR} + ) target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) else() target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 4d3e5fe90..834e9c762 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -3,6 +3,7 @@ message (STATUS "Adding test FFT found in ${_relPath}") include_directories ( ${CMAKE_SOURCE_DIR}/src + ${CUFINUFFT_INCLUDE_DIR} ) link_directories ( From 7198e2f7c13e536fa55acbbd24fae4e56f3c3005 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 16 Feb 2023 08:48:40 +0100 Subject: [PATCH 062/117] target_include_directories seems to work --- src/CMakeLists.txt | 4 +--- test/FFT/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8b4330823..bd6a2205b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,9 +96,7 @@ add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) if (ENABLE_NUFFT) - include_directories ( - BEFORE ${CUFINUFFT_INCLUDE_DIR} - ) + target_include_directories(ippl PUBLIC ${CUFINUFFT_INCLUDE_DIR}) target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) else() target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 834e9c762..4d3e5fe90 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -3,7 +3,6 @@ message (STATUS "Adding test FFT found in ${_relPath}") include_directories ( ${CMAKE_SOURCE_DIR}/src - ${CUFINUFFT_INCLUDE_DIR} ) link_directories ( From 47442818f95c9fea04193639b98f7dd69819ed76 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 16 Feb 2023 15:36:55 +0100 Subject: [PATCH 063/117] Almost done but have some compilation errors --- alpine/PinT/ChargedParticlesPinT.hpp | 4 +- alpine/PinT/LandauDampingPinT.cpp | 6 +- src/FFT/FFT.hpp | 28 +-- src/Particle/ParticleAttrib.h | 38 +++- src/Particle/ParticleAttrib.hpp | 260 ++++++++++----------------- 5 files changed, 143 insertions(+), 193 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index c827669a5..492df0c68 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -861,7 +861,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp); + gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); time_m = tStartMySlice; @@ -893,7 +893,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp); + gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); //kick Ptemp = Ptemp - 0.5 * dt * E; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index e80bed086..5662b2619 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -456,8 +456,10 @@ int main(int argc, char *argv[]){ Vector_t kw = {0.5, 0.5, 0.5}; //double alpha = 0.05; Vector_t alpha = {0.05, 0.05, 0.05}; - Vector_t rmin(0.0); - Vector_t rmax = 2 * pi / kw ; + //Vector_t rmin(0.0); + //Vector_t rmax = 2 * pi / kw ; + Vector_t rmin(-2.0 * pi); + Vector_t rmax = 2 * pi; Vector_t length = rmax - rmin; double dxPIC = length[0] / nrPIC[0]; double dyPIC = length[1] / nrPIC[1]; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index c87b10f0e..6a04da61b 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -855,15 +855,16 @@ namespace ippl { fview.extent(2) - 2*nghost); - //Vector, 3> tempR; + Kokkos::View tempR[3]; + //tempR = {NULL, NULL, NULL}; Kokkos::View tempRx("tempRx", localNp); Kokkos::View tempRy("tempRy", localNp); Kokkos::View tempRz("tempRz", localNp); - //for(size_t d = 0; d < Dim; ++d) { - // Kokkos::realloc(tempR[d], localNp); - //} + for(size_t d = 0; d < Dim; ++d) { + Kokkos::realloc(tempR[d], localNp); + } Kokkos::View tempQ("tempQ", localNp); @@ -891,20 +892,21 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - //for(size_t d = 0; d < Dim; ++d) { - // tempR[d](i) = Rview(i)[d]; - //} - tempRx(i) = Rview(i)[0]; - tempRy(i) = Rview(i)[1]; - tempRz(i) = Rview(i)[2]; + for(size_t d = 0; d < Dim; ++d) { + tempR[d](i) = Rview(i)[d]; + } + //tempRx(i) = Rview(i)[0]; + //tempRy(i) = Rview(i)[1]; + //tempRz(i) = Rview(i)[2]; tempQ(i).x = Qview(i).real(); + //tempQ(i).y = 0.0; tempQ(i).y = Qview(i).imag(); }); - //ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, - // NULL, NULL, NULL, plan_m); - ier_m = cufinufft_setpts(localNp, tempRx.data(), tempRy.data(), tempRz.data(), 0, + ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, NULL, NULL, NULL, plan_m); + //ier_m = cufinufft_setpts(localNp, tempRx.data(), tempRy.data(), tempRz.data(), 0, + // NULL, NULL, NULL, plan_m); ier_m = cufinufft_execute(tempQ.data(), tempField.data(), plan_m); diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 9b66e18c3..b33761d61 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -59,6 +59,12 @@ namespace ippl { using size_type = detail::size_type; +#ifdef KOKKOS_ENABLE_CUDA + //TODO: Remove hard-coded dimension by having Dim as template + //parameter. Does this need to be in CUDA ifdefs? + using FFT_t = FFT; +#endif + // Create storage for M particle attributes. The storage is uninitialized. // New items are appended to the end of the array. void create(size_type) override; @@ -156,20 +162,36 @@ namespace ippl { scatter(Field& f, const ParticleAttrib, Properties... >& pp) const; - template + template void - scatterPIF(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp) const; + scatterPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp) const; template void gather(Field& f, const ParticleAttrib, Properties...>& pp); - template + template + void + gatherPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp) const; + +#ifdef KOKKOS_ENABLE_CUDA + template + void initializeNUFFT(FieldLayout& layout, ParameterList& fftParams); + + template + void + scatterPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp) const; + + template void - gatherPIF(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp) const; + gatherPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp, + ParticleAttrib& q) const; +#endif T sum(); T max(); @@ -178,6 +200,10 @@ namespace ippl { private: view_type dview_m; +#ifdef KOKKOS_ENABLE_CUDA + std::shared_ptr fftType1_mp; + std::shared_ptr fftType2_mp; +#endif }; } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 56030879f..48d1a4f53 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -203,8 +203,8 @@ namespace ippl { template - template - void ParticleAttrib::scatterPIFNUDFT(Field& f, Field& Sk, + template + void ParticleAttrib::scatterPIFNUDFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) const { @@ -365,8 +365,8 @@ namespace ippl { } template - template - void ParticleAttrib::gatherPIFNUDFT(Field& f, Field& Sk, + template + void ParticleAttrib::gatherPIFNUDFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) const { @@ -477,95 +477,44 @@ namespace ippl { #ifdef KOKKOS_ENABLE_CUDA template - template - void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, + template + void initializeNUFFT(FieldLayout& layout, ParameterList& fftParams) { + + fftType1_mp = std::make_shared>(layout, 1, fftParams); + fftType2_mp = std::make_shared>(layout, 2, fftParams); + } + + + + template + template + void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) const { static IpplTimings::TimerRef scatterPIFNUFFTTimer = IpplTimings::getTimer("ScatterPIFNUFFT"); IpplTimings::startTimer(scatterPIFNUFFTTimer); + + fftType1_mp->transform(pp, *this, f); using view_type = typename Field::view_type; - using vector_type = typename M::vector_type; - using value_type = typename ParticleAttrib::value_type; view_type fview = f.getView(); typename Field::view_type Skview = Sk.getView(); const int nghost = f.getNghost(); - const FieldLayout& layout = f.getLayout(); - const M& mesh = f.get_mesh(); - const vector_type& dx = mesh.getMeshSpacing(); - const auto& domain = layout.getDomain(); - vector_type Len; - Vector N; - - - for (unsigned d=0; d < Dim; ++d) { - N[d] = domain[d].length(); - Len[d] = dx[d] * N[d]; - } - typedef Kokkos::TeamPolicy<> team_policy; - typedef Kokkos::TeamPolicy<>::member_type member_type; - - - //using view_type_temp = typename detail::ViewType::view_type; - - //view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); - - double pi = std::acos(-1.0); - Kokkos::complex imag = {0.0, 1.0}; - - size_t Np = *(this->localNum_mp); - - size_t flatN = N[0]*N[1]*N[2]; - - Kokkos::parallel_for("ParticleAttrib::scatterPIFNUFFT compute", - team_policy(flatN, Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { - const size_t flatIndex = teamMember.league_rank(); - -#ifdef KOKKOS_ENABLE_CUDA - const int k = (int)(flatIndex / (N[0] * N[1])); - const int flatIndex2D = flatIndex - (k * N[0] * N[1]); - const int i = flatIndex2D % N[0]; - const int j = (int)(flatIndex2D / N[0]); -#else - - const int i = (int)(flatIndex / (N[0] * N[1])); - const int flatIndex2D = flatIndex - (i * N[0] * N[1]); - const int k = flatIndex2D % N[0]; - const int j = (int)(flatIndex2D / N[0]); -#endif - - FT reducedValue = 0.0; - Vector iVec = {i, j, k}; - vector_type kVec; - for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - } - auto Sk = Skview(i+nghost, j+nghost, k+nghost); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), - [=](const size_t idx, FT& innerReduce) - { - double arg = 0.0; - for(size_t d = 0; d < Dim; ++d) { - arg += kVec[d]*pp(idx)[d]; - } - const value_type& val = dview_m(idx); - - innerReduce += Sk * (Kokkos::Experimental::cos(arg) - - imag * Kokkos::Experimental::sin(arg)) * val; - }, Kokkos::Sum(reducedValue)); - - if(teamMember.team_rank() == 0) { - //viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; - fview(i+nghost,j+nghost,k+nghost) = reducedValue; - } - - } - ); + using mdrange_type = Kokkos::MDRangePolicy>; + Kokkos::parallel_for("Multiply with shape functions", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) *= Skview(i, j, k); + }); IpplTimings::stopTimer(scatterPIFNUFFTTimer); @@ -580,22 +529,29 @@ namespace ippl { template - template - void ParticleAttrib::gatherPIFNUFFT(Field& f, Field& Sk, - const ParticleAttrib< Vector, Properties... >& pp) + template + void ParticleAttrib::gatherPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib< Vector, Properties... >& pp, + ParticleAttrib& q) const { static IpplTimings::TimerRef gatherPIFNUFFTTimer = IpplTimings::getTimer("GatherPIFNUFFT"); IpplTimings::startTimer(gatherPIFNUFFTTimer); + + Field tempField; + + const FieldLayout& layout = f.getLayout(); + const M& mesh = f.get_mesh(); + + tempField.initialize(mesh, layout); using view_type = typename Field::view_type; using vector_type = typename M::vector_type; - using value_type = typename ParticleAttrib::value_type; view_type fview = f.getView(); + view_type tempview = tempField.getView(); + auto qview = q.getView(); typename Field::view_type Skview = Sk.getView(); const int nghost = f.getNghost(); - const FieldLayout& layout = f.getLayout(); - const M& mesh = f.get_mesh(); const vector_type& dx = mesh.getMeshSpacing(); const auto& domain = layout.getDomain(); vector_type Len; @@ -607,82 +563,46 @@ namespace ippl { } - - typedef Kokkos::TeamPolicy<> team_policy; - typedef Kokkos::TeamPolicy<>::member_type member_type; - double pi = std::acos(-1.0); Kokkos::complex imag = {0.0, 1.0}; - size_t Np = *(this->localNum_mp); - size_t flatN = N[0]*N[1]*N[2]; - - Kokkos::parallel_for("ParticleAttrib::gatherPIFNUFFT", - team_policy(Np, Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { - const size_t idx = teamMember.league_rank(); - - value_type reducedValue = 0.0; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), - [=](const size_t flatIndex, value_type& innerReduce) - { - -#ifdef KOKKOS_ENABLE_CUDA - const int k = (int)(flatIndex / (N[0] * N[1])); - const int flatIndex2D = flatIndex - (k * N[0] * N[1]); - const int i = flatIndex2D % N[0]; - const int j = (int)(flatIndex2D / N[0]); -#else - const int i = (int)(flatIndex / (N[0] * N[1])); - const int flatIndex2D = flatIndex - (i * N[0] * N[1]); - const int k = flatIndex2D % N[0]; - const int j = (int)(flatIndex2D / N[0]); -#endif - - Vector iVec = {i, j, k}; - vector_type kVec; - double Dr = 0.0, arg = 0.0; - for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - //kVec[d] = 2 * pi / Len[d] * iVec[d]; - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); - Dr += kVec[d] * kVec[d]; - arg += kVec[d]*pp(idx)[d]; - } - + for(size_t gd = 0; gd < Dim; ++gd) { + Kokkos::parallel_for("Gather NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + Vector iVec = {i, j, k}; + Vector_t kVec; - FT Ek = 0.0; - value_type Ex = 0.0; - auto rho = fview(i+nghost,j+nghost,k+nghost); - auto Sk = Skview(i+nghost,j+nghost,k+nghost); - for(size_t d = 0; d < Dim; ++d) { - - bool isNotZero = (Dr != 0.0); - double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); - Ek = -(imag * kVec[d] * rho * factor); - - //Inverse Fourier transform when the lhs is real. Use when - //we choose k \in [0 K) instead of from [-K/2+1 K/2] - //Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) - // - Ek.imag() * Kokkos::Experimental::sin(arg)); - Ek *= Sk * (Kokkos::Experimental::cos(arg) - + imag * Kokkos::Experimental::sin(arg)); - Ex[d] = Ek.real(); - } - - innerReduce += Ex; - }, Kokkos::Sum(reducedValue)); + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + Dr += kVec[d] * kVec[d]; + } - teamMember.team_barrier(); + tempview(i, j, k) = fview(i, j, k); + + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + + tempview(i, j, k) *= -Skview(i, j, k) * (imag * kVec[gd] * factor); + }); - if(teamMember.team_rank() == 0) { - dview_m(idx) = reducedValue; - } + fftType2_mp->transform(pp, q, tempField); - } - ); + Kokkos::parallel_for("Assign E gather NUFFT", + Np, + KOKKOS_CLASS_LAMBDA(const size_t i) + { + dview_m(i)[gd] = qview(i); + }); + } IpplTimings::stopTimer(gatherPIFNUFFTTimer); @@ -690,31 +610,31 @@ namespace ippl { } #endif - template + template inline void scatterPIFNUFFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp) { #ifdef KOKKOS_ENABLE_CUDA attrib.scatterPIFNUFFT(f, Sk, pp); #else - throw IpplException("scatterPIFNUFFT", - "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to - be compiled with CUDA. Otherwise use scatterPIFNUDFT."); + //throw IpplException("scatterPIFNUFFT", "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to + // be compiled with CUDA. Otherwise use scatterPIFNUDFT."); #endif } - template + template inline void gatherPIFNUFFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp, + ParticleAttrib& q) { #ifdef KOKKOS_ENABLE_CUDA - attrib.gatherPIFNUFFT(f, Sk, pp); + attrib.gatherPIFNUFFT(f, Sk, pp, q); #else - throw IpplException("gatherPIFNUFFT", - "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to - be compiled with CUDA. Otherwise use gatherPIFNUDFT."); + //throw IpplException("gatherPIFNUFFT", + // "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to + // be compiled with CUDA. Otherwise use gatherPIFNUDFT."); #endif } @@ -732,10 +652,10 @@ namespace ippl { attrib.scatter(f, pp); } - template + template inline void scatterPIFNUDFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp) { attrib.scatterPIFNUDFT(f, Sk, pp); } @@ -750,10 +670,10 @@ namespace ippl { attrib.gather(f, pp); } - template + template inline void gatherPIFNUDFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp) { attrib.gatherPIFNUDFT(f, Sk, pp); } From c4f9d714e168679c6f337510b448a44dad6ce949 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 17 Feb 2023 09:34:29 +0100 Subject: [PATCH 064/117] Function pointers and C-style arrays introduced to solve the type and dimension issues --- src/FFT/FFT.h | 52 ++++++++++++++++++++++--------------------------- src/FFT/FFT.hpp | 36 ++++++++++++++-------------------- 2 files changed, 38 insertions(+), 50 deletions(-) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index cec240b8f..6544b54f2 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "FieldLayout/FieldLayout.h" @@ -126,32 +127,28 @@ namespace ippl { template <> struct CufinufftType { - //using makeplan = typename cufinufftf_makeplan; - //using setpts = typename cufinufftf_setpts; - //using execute = typename cufinufftf_execute; - //using destroy = typename cufinufftf_destroy; - //using plan_t = typename cufinufftf_plan; - - - //typedef typename cufinufftf_makeplan makeplan; - //typedef typename cufinufftf_setpts setpts; - //typedef typename cufinufftf_execute execute; - //typedef typename cufinufftf_destroy destroy; - //typedef typename cufinufftf_plan plan_t; + std::function makeplan = cufinufftf_makeplan; + std::function setpts = cufinufftf_setpts; + std::function execute = cufinufftf_execute; + std::function destroy = cufinufftf_destroy; + + using complexType = cuFloatComplex; + using plan_t = cufinufftf_plan; }; template <> struct CufinufftType { - //using makeplan = typename cufinufft_makeplan; - //using setpts = typename cufinufft_setpts; - //using execute = typename cufinufft_execute; - //using destroy = typename cufinufft_destroy; - //using plan_t = typename cufinufft_plan; - //typedef typename cufinufft_makeplan makeplan; - //typedef typename cufinufft_setpts setpts; - //typedef typename cufinufft_execute execute; - //typedef typename cufinufft_destroy destroy; - //typedef typename cufinufft_plan plan_t; + std::function makeplan = cufinufft_makeplan; + std::function setpts = cufinufft_setpts; + std::function execute = cufinufft_execute; + std::function destroy = cufinufft_destroy; + + using complexType = cuDoubleComplex; + using plan_t = cufinufft_plan; }; #endif } @@ -352,11 +349,8 @@ namespace ippl { typedef Kokkos::complex KokkosComplex_t; typedef Field ComplexField_t; - //using makeplan = typename detail::CufinufftType::makeplan; - //using setpts = typename detail::CufinufftType::setpts; - //using execute = typename detail::CufinufftType::execute; - //using destroy = typename detail::CufinufftType::destroy; - //using plan_t = typename detail::CufinufftType::plan_t; + using complexType = typename detail::CufinufftType::complexType; + using plan_t = typename detail::CufinufftType::plan_t; /** Create a new FFT object with the layout for the input Field, type * (1 or 2) for the NUFFT and parameters for cuFINUFFT. @@ -381,8 +375,8 @@ namespace ippl { void setup(std::array& nmodes, const ParameterList& params); - //plan_t plan_m; - cufinufft_plan plan_m; + detail::CufinufftType nufft_m; + plan_t plan_m; int ier_m; T tol_m; int type_m; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index c79b247a7..985e50ab4 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -824,7 +824,7 @@ namespace ippl { //dim in cufinufft is int int dim = static_cast(Dim); - ier_m = cufinufft_makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, + ier_m = nufft_m.makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, maxbatchsize, &plan_m, &opts); } @@ -849,24 +849,23 @@ namespace ippl { * cuFINUFFT's layout is left, hence we allocate the temporary * Kokkos views with the same layout */ - Kokkos::View + Kokkos::View tempField("tempField", fview.extent(0) - 2*nghost, fview.extent(1) - 2*nghost, fview.extent(2) - 2*nghost); - //Vector, 3> tempR; - Kokkos::View tempRx("tempRx", localNp); - Kokkos::View tempRy("tempRy", localNp); - Kokkos::View tempRz("tempRz", localNp); + //Initialize the pointers to NULL and fill only relevant dimensions + //CUFINUFFT requires the input like this. + Kokkos::View tempR[3] = {}; - //for(size_t d = 0; d < Dim; ++d) { - // Kokkos::realloc(tempR[d], localNp); - //} + for(size_t d = 0; d < Dim; ++d) { + Kokkos::realloc(tempR[d], localNp); + } - Kokkos::View tempQ("tempQ", localNp); + Kokkos::View tempQ("tempQ", localNp); using mdrange_type = Kokkos::MDRangePolicy>; @@ -891,22 +890,17 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - //for(size_t d = 0; d < Dim; ++d) { - // tempR[d](i) = Rview(i)[d]; - //} - tempRx(i) = Rview(i)[0]; - tempRy(i) = Rview(i)[1]; - tempRz(i) = Rview(i)[2]; + for(size_t d = 0; d < Dim; ++d) { + tempR[d](i) = Rview(i)[d]; + } tempQ(i).x = Qview(i).real(); tempQ(i).y = Qview(i).imag(); }); - //ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, - // NULL, NULL, NULL, plan_m); - ier_m = cufinufft_setpts(localNp, tempRx.data(), tempRy.data(), tempRz.data(), 0, + ier_m = nufft_m.setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, NULL, NULL, NULL, plan_m); - ier_m = cufinufft_execute(tempQ.data(), tempField.data(), plan_m); + ier_m = nufft_m.execute(tempQ.data(), tempField.data(), plan_m); if(type_m == 1) { @@ -940,7 +934,7 @@ namespace ippl { template FFT::~FFT() { - ier_m = cufinufft_destroy(plan_m); + ier_m = nufft_m.destroy(plan_m); } #endif From 70cd2c4a9cf8da3508f4481e46ccc40b498555a8 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 17 Feb 2023 17:18:59 +0100 Subject: [PATCH 065/117] Code compiles and runs but the results are wrong. Need to see. --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 25 ++++--- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 28 ++++++-- alpine/PinT/ChargedParticlesPinT.hpp | 4 +- src/FFT/FFT.h | 4 +- src/FFT/FFT.hpp | 8 +-- src/Particle/ParticleAttrib.h | 40 ++++++----- src/Particle/ParticleAttrib.hpp | 66 ++++++++++--------- test/FFT/TestNUFFT1.cpp | 13 ++-- test/FFT/TestNUFFT2.cpp | 16 ++--- 9 files changed, 110 insertions(+), 94 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index b76152f3b..465180ef6 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -63,6 +63,8 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double Q_m; + size_type Np_m; + double time_m; double rhoNorm_m; @@ -95,12 +97,14 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector_t rmin, Vector_t rmax, ippl::e_dim_tag decomp[Dim], - double Q) + double Q, + size_type Np) : ippl::ParticleBase(pl) , hr_m(hr) , rmin_m(rmin) , rmax_m(rmax) , Q_m(Q) + , Np_m(Np) { // register the particle attributes this->addAttribute(q); @@ -119,7 +123,11 @@ class ChargedParticlesPIF : public ippl::ParticleBase { void gather() { - gatherPIF(this->E, rho_m, Sk_m, this->R); + gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, this->q); + + //Set the charge back to original as we used this view as a + //temporary buffer during gather + this->q = Q_m / Np_m; } @@ -127,7 +135,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Inform m("scatter "); rho_m = {0.0, 0.0}; - scatterPIF(q, rho_m, Sk_m, this->R); + scatterPIFNUFFT(q, rho_m, Sk_m, this->R); rho_m = rho_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -390,18 +398,17 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - //kVec[d] = 2 * pi / Len[d] * iVec[d]; + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } Kokkos::complex Ek = {0.0, 0.0}; double myVal = 0.0; + auto rho = rhoview(i+nghost,j+nghost,k+nghost); for(size_t d = 0; d < Dim; ++d) { - if(Dr != 0.0) { - Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - } + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[d] * rho * factor); myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); } diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 5bbcfd57b..2baa8eef4 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -193,11 +193,12 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; double alpha = 0.05; - Vector_t rmin(0.0); - Vector_t rmax = 2 * pi / kw ; - double dx = rmax[0] / nr[0]; - double dy = rmax[1] / nr[1]; - double dz = rmax[2] / nr[2]; + Vector_t rmin(-2.0 * pi); + Vector_t rmax = 2 * pi; + Vector_t length = rmax - rmin; + double dx = length[0] / nr[0]; + double dy = length[1] / nr[1]; + double dz = length[2] / nr[2]; Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; @@ -208,8 +209,8 @@ int main(int argc, char *argv[]){ PLayout_t PL(FL, mesh); //Q = -\int\int f dx dv - double Q = -rmax[0] * rmax[1] * rmax[2]; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + double Q = -length[0] * length[1] * length[2]; + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,totalP); P->nr_m = nr; @@ -263,6 +264,19 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(initializeShapeFunctionPIF); + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-10); + + fftParams.add("use_cufinufft_defaults", false); + + P->q.initializeNUFFT(FL, 1, fftParams); + P->E.initializeNUFFT(FL, 2, fftParams); + + P->scatter(); P->gather(); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 492df0c68..6bd2360a4 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -925,7 +925,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp); + gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); time_m = tStartMySlice; @@ -982,7 +982,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp); + gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); //kick auto R2view = Rtemp.getView(); diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 6544b54f2..2b41a9495 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -38,7 +38,7 @@ #include "FieldLayout/FieldLayout.h" #include "Field/Field.h" -#include "Particle/ParticleAttrib.h" +//#include "Particle/ParticleAttrib.h" #include "Utility/ParameterList.h" #include "Utility/IpplException.h" @@ -51,6 +51,8 @@ namespace heffte { namespace ippl { + template class ParticleAttrib; + /** Tag classes for CC type of Fourier transforms */ diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 4214d4b13..59f8d3ca6 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -893,9 +893,8 @@ namespace ippl { for(size_t d = 0; d < Dim; ++d) { tempR[d](i) = Rview(i)[d]; } - tempQ(i).x = Qview(i).real(); - //tempQ(i).y = 0.0; - tempQ(i).y = Qview(i).imag(); + tempQ(i).x = Qview(i); + tempQ(i).y = 0.0; }); ier_m = nufft_m.setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, @@ -926,8 +925,7 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - Qview(i).real() = tempQ(i).x; - Qview(i).imag() = tempQ(i).y; + Qview(i) = tempQ(i).x; }); } } diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index b33761d61..a50bb9007 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -31,6 +31,8 @@ #include "Expression/IpplExpressions.h" #include "Particle/ParticleAttribBase.h" +#include "FFT/FFT.h" +#include "Utility/ParameterList.h" namespace Kokkos { //reduction identity must be defined in Kokkos namespace template<> @@ -59,11 +61,6 @@ namespace ippl { using size_type = detail::size_type; -#ifdef KOKKOS_ENABLE_CUDA - //TODO: Remove hard-coded dimension by having Dim as template - //parameter. Does this need to be in CUDA ifdefs? - using FFT_t = FFT; -#endif // Create storage for M particle attributes. The storage is uninitialized. // New items are appended to the end of the array. @@ -162,35 +159,35 @@ namespace ippl { scatter(Field& f, const ParticleAttrib, Properties... >& pp) const; - template + template void - scatterPIFNUDFT(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp) const; + scatterPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp) const; template void gather(Field& f, const ParticleAttrib, Properties...>& pp); - template + template void - gatherPIFNUDFT(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp) const; + gatherPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp); #ifdef KOKKOS_ENABLE_CUDA template - void initializeNUFFT(FieldLayout& layout, ParameterList& fftParams); + void initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams); - template + template void - scatterPIFNUFFT(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp) const; + scatterPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp) const; - template + template void - gatherPIFNUFFT(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp, - ParticleAttrib& q) const; + gatherPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp, + ParticleAttrib& q); #endif T sum(); @@ -201,8 +198,9 @@ namespace ippl { private: view_type dview_m; #ifdef KOKKOS_ENABLE_CUDA - std::shared_ptr fftType1_mp; - std::shared_ptr fftType2_mp; + //TODO: Remove hard-coded dimension by having Dim as template + //parameter. Does this need to be in CUDA ifdefs? + std::shared_ptr> fftType_mp; #endif }; } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 48d1a4f53..fc6fe1430 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -203,8 +203,8 @@ namespace ippl { template - template - void ParticleAttrib::scatterPIFNUDFT(Field& f, Field& Sk, + template + void ParticleAttrib::scatterPIFNUDFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) const { @@ -365,10 +365,9 @@ namespace ippl { } template - template - void ParticleAttrib::gatherPIFNUDFT(Field& f, Field& Sk, + template + void ParticleAttrib::gatherPIFNUDFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) - const { static IpplTimings::TimerRef gatherPIFNUDFTTimer = IpplTimings::getTimer("GatherPIFNUDFT"); IpplTimings::startTimer(gatherPIFNUDFTTimer); @@ -478,17 +477,16 @@ namespace ippl { template template - void initializeNUFFT(FieldLayout& layout, ParameterList& fftParams) { + void ParticleAttrib::initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams) { - fftType1_mp = std::make_shared>(layout, 1, fftParams); - fftType2_mp = std::make_shared>(layout, 2, fftParams); + fftType_mp = std::make_shared>(layout, type, fftParams); } template - template - void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, + template + void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp) const { @@ -496,7 +494,9 @@ namespace ippl { static IpplTimings::TimerRef scatterPIFNUFFTTimer = IpplTimings::getTimer("ScatterPIFNUFFT"); IpplTimings::startTimer(scatterPIFNUFFTTimer); - fftType1_mp->transform(pp, *this, f); + auto q = *this; + + fftType_mp->transform(pp, q, f); using view_type = typename Field::view_type; view_type fview = f.getView(); @@ -529,19 +529,18 @@ namespace ippl { template - template - void ParticleAttrib::gatherPIFNUFFT(Field& f, Field& Sk, + template + void ParticleAttrib::gatherPIFNUFFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp, ParticleAttrib& q) - const { static IpplTimings::TimerRef gatherPIFNUFFTTimer = IpplTimings::getTimer("GatherPIFNUFFT"); IpplTimings::startTimer(gatherPIFNUFFTTimer); Field tempField; - const FieldLayout& layout = f.getLayout(); - const M& mesh = f.get_mesh(); + FieldLayout& layout = f.getLayout(); + M& mesh = f.get_mesh(); tempField.initialize(mesh, layout); @@ -567,18 +566,21 @@ namespace ippl { Kokkos::complex imag = {0.0, 1.0}; size_t Np = *(this->localNum_mp); + + using mdrange_type = Kokkos::MDRangePolicy>; + for(size_t gd = 0; gd < Dim; ++gd) { Kokkos::parallel_for("Gather NUFFT", mdrange_type({nghost, nghost, nghost}, {fview.extent(0) - nghost, fview.extent(1) - nghost, fview.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) + KOKKOS_LAMBDA(const int i, + const int j, + const int k) { Vector iVec = {i, j, k}; - Vector_t kVec; + Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { @@ -594,7 +596,7 @@ namespace ippl { tempview(i, j, k) *= -Skview(i, j, k) * (imag * kVec[gd] * factor); }); - fftType2_mp->transform(pp, q, tempField); + fftType_mp->transform(pp, q, tempField); Kokkos::parallel_for("Assign E gather NUFFT", Np, @@ -610,10 +612,10 @@ namespace ippl { } #endif - template + template inline void scatterPIFNUFFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp) { #ifdef KOKKOS_ENABLE_CUDA attrib.scatterPIFNUFFT(f, Sk, pp); @@ -623,11 +625,11 @@ namespace ippl { #endif } - template + template inline - void gatherPIFNUFFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp, - ParticleAttrib& q) + void gatherPIFNUFFT(ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp, + ParticleAttrib& q) { #ifdef KOKKOS_ENABLE_CUDA attrib.gatherPIFNUFFT(f, Sk, pp, q); @@ -652,10 +654,10 @@ namespace ippl { attrib.scatter(f, pp); } - template + template inline void scatterPIFNUDFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp) { attrib.scatterPIFNUDFT(f, Sk, pp); } @@ -670,10 +672,10 @@ namespace ippl { attrib.gather(f, pp); } - template + template inline - void gatherPIFNUDFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + void gatherPIFNUDFT(ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp) { attrib.gatherPIFNUDFT(f, Sk, pp); } diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index 06ac71234..73629a0e0 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -19,7 +19,7 @@ struct Bunch : public ippl::ParticleBase ~Bunch(){ } - typedef ippl::ParticleAttrib> charge_container_type; + typedef ippl::ParticleAttrib charge_container_type; charge_container_type Q; }; @@ -29,11 +29,11 @@ struct generate_random { using view_type = typename ippl::detail::ViewType::view_type; using value_type = typename T::value_type; - using view_type_complex = typename ippl::detail::ViewType, 1>::view_type; + using view_type_scalar = typename ippl::detail::ViewType::view_type; // Output View for the random numbers view_type x; - view_type_complex Q; + view_type_scalar Q; // The GeneratorPool GeneratorPool rand_pool; @@ -41,7 +41,7 @@ struct generate_random { T minU, maxU; // Initialize all members - generate_random(view_type x_,view_type_complex Q_, GeneratorPool rand_pool_, + generate_random(view_type x_,view_type_scalar Q_, GeneratorPool rand_pool_, T& minU_, T& maxU_) : x(x_), Q(Q_), rand_pool(rand_pool_), minU(minU_), maxU(maxU_) {} @@ -54,8 +54,7 @@ struct generate_random { for (unsigned d = 0; d < Dim; ++d) { x(i)[d] = rand_gen.drand(minU[d], maxU[d]); } - Q(i).real() = rand_gen.drand(0.0, 1.0); - Q(i).imag() = rand_gen.drand(0.0, 1.0); + Q(i) = rand_gen.drand(0.0, 1.0); // Give the state back, which will allow another thread to acquire it rand_pool.free_state(rand_gen); @@ -147,8 +146,6 @@ int main(int argc, char *argv[]) { auto field_result = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), field.getView()); - Kokkos::complex max_error_abs(0.0, 0.0); - Kokkos::complex max_error_rel(0.0, 0.0); //Pick some mode to check. We choose it same as cuFINUFFT testcase cufinufft3d1_test.cu ippl::Vector kVec; diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp index 147c2ba74..f5063b84c 100644 --- a/test/FFT/TestNUFFT2.cpp +++ b/test/FFT/TestNUFFT2.cpp @@ -19,7 +19,7 @@ struct Bunch : public ippl::ParticleBase ~Bunch(){ } - typedef ippl::ParticleAttrib> charge_container_type; + typedef ippl::ParticleAttrib charge_container_type; charge_container_type Q; }; @@ -177,8 +177,6 @@ int main(int argc, char *argv[]) { auto Q_result = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), bunch.Q.getView()); - Kokkos::complex max_error_abs(0.0, 0.0); - Kokkos::complex max_error_rel(0.0, 0.0); //Pick some target point to check. We choose it same as cuFINUFFT testcase cufinufft3d2_test.cu @@ -210,15 +208,15 @@ int main(int argc, char *argv[]) { + imag * Kokkos::Experimental::sin(arg)) * fview(i + nghost, j + nghost, k + nghost); }, Kokkos::Sum>(reducedValue)); - double abs_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()); - double rel_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()) /std::fabs(reducedValue.real()); - double abs_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()); - double rel_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()) /std::fabs(reducedValue.imag()); + double abs_error_real = std::fabs(reducedValue.real() - Q_result(idx)); + double rel_error_real = std::fabs(reducedValue.real() - Q_result(idx)) /std::fabs(reducedValue.real()); + //double abs_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()); + //double rel_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()) /std::fabs(reducedValue.imag()); std::cout << "Abs Error in real part: " << std::setprecision(16) << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; - std::cout << "Abs Error in imag part: " << std::setprecision(16) - << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; + //std::cout << "Abs Error in imag part: " << std::setprecision(16) + // << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; //Kokkos::complex max_error(0.0, 0.0); From f06bb87e22e5db0a07020436db3ac0e51c0a32d8 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 18 Feb 2023 12:41:55 +0100 Subject: [PATCH 066/117] some more modifications --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 18 ++++++++++++------ alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 6 ++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 465180ef6..27d0b4133 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -123,7 +123,8 @@ class ChargedParticlesPIF : public ippl::ParticleBase { void gather() { - gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, this->q); + //gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, this->q); + gatherPIFNUDFT(this->E, rho_m, Sk_m, this->R); //Set the charge back to original as we used this view as a //temporary buffer during gather @@ -135,7 +136,8 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Inform m("scatter "); rho_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rho_m, Sk_m, this->R); + //scatterPIFNUFFT(q, rho_m, Sk_m, this->R); + scatterPIFNUDFT(q, rho_m, Sk_m, this->R); rho_m = rho_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -185,13 +187,15 @@ class ChargedParticlesPIF : public ippl::ParticleBase { for(size_t d = 0; d < Dim; ++d) { bool shift = (iVec[d] > (N[d]/2)); kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } Kokkos::complex Ek = {0.0, 0.0}; - if(Dr != 0.0) { - Ek = -(imag * kVec[0] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - } + auto rho = rhoview(i+nghost,j+nghost,k+nghost); + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[0] * rho * factor); double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); tlSum += myVal; @@ -398,7 +402,9 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + bool shift = (iVec[d] > (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 2baa8eef4..7c3c5714e 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -193,8 +193,10 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; double alpha = 0.05; - Vector_t rmin(-2.0 * pi); - Vector_t rmax = 2 * pi; + //Vector_t rmin(-2.0 * pi); + //Vector_t rmax = 2 * pi; + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw; Vector_t length = rmax - rmin; double dx = length[0] / nr[0]; double dy = length[1] / nr[1]; From f1934f150dd52461e6a0911168b733c140260341 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 21 Feb 2023 15:58:19 +0100 Subject: [PATCH 067/117] PIF with NUFFT now seems to be working on 1 GPU. Need to test more --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 79 +++++++++++++--- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 16 +++- src/FFT/FFT.hpp | 16 +++- src/Particle/ParticleAttrib.hpp | 16 ++-- test/FFT/TestNUFFT1.cpp | 92 ++++++++++++++++++- test/FFT/TestNUFFT2.cpp | 8 +- 6 files changed, 194 insertions(+), 33 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 27d0b4133..a3a797823 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -42,6 +42,8 @@ typedef Field Field_t; typedef Field, Dim> CxField_t; typedef Field VField_t; +typedef ippl::FFT FFT_type; + const double pi = std::acos(-1.0); // Test programs have to define this variable for VTK dump purposes @@ -51,6 +53,7 @@ template class ChargedParticlesPIF : public ippl::ParticleBase { public: CxField_t rho_m; + CxField_t rhoDFT_m; Field_t Sk_m; Vector nr_m; @@ -73,6 +76,8 @@ class ChargedParticlesPIF : public ippl::ParticleBase { int shapedegree_m; + std::shared_ptr fft; + public: ParticleAttrib q; // charge typename ippl::ParticleBase::particle_position_type P; // particle velocity @@ -123,8 +128,8 @@ class ChargedParticlesPIF : public ippl::ParticleBase { void gather() { - //gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, this->q); - gatherPIFNUDFT(this->E, rho_m, Sk_m, this->R); + gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, this->q); + //gatherPIFNUDFT(this->E, rho_m, Sk_m, this->R); //Set the charge back to original as we used this view as a //temporary buffer during gather @@ -136,10 +141,15 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Inform m("scatter "); rho_m = {0.0, 0.0}; - //scatterPIFNUFFT(q, rho_m, Sk_m, this->R); - scatterPIFNUDFT(q, rho_m, Sk_m, this->R); + scatterPIFNUFFT(q, rho_m, Sk_m, this->R); + //fft->transform(this->R, q, rho_m); + //rhoDFT_m = {0.0, 0.0}; + //scatterPIFNUDFT(q, rho_m, Sk_m, this->R); + + //dumpFieldData(); rho_m = rho_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + //rhoDFT_m = rhoDFT_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); } @@ -185,9 +195,9 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -402,9 +412,9 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -483,6 +493,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { using mdrange_type = Kokkos::MDRangePolicy>; auto Skview = Sk_m.getView(); auto N = nr_m; + const int nghost = Sk_m.getNghost(); const Mesh_t& mesh = rho_m.get_mesh(); const Vector_t& dx = mesh.getMeshSpacing(); const Vector_t& Len = rmax_m - rmin_m; @@ -508,8 +519,9 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); double kh = kVec[d] * dx[d]; bool isNotZero = (kh != 0.0); double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); @@ -518,7 +530,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { //Fourier transform of CIC Sk *= std::pow(arg, order); } - Skview(i, j, k) = Sk; + Skview(i+nghost, j+nghost, k+nghost) = Sk; }); } @@ -592,6 +604,47 @@ class ChargedParticlesPIF : public ippl::ParticleBase { // Ippl::Comm->barrier(); //} + void dumpFieldData() { + + typename CxField_t::HostMirror rhoNUFFT_host = rho_m.getHostMirror(); + typename CxField_t::HostMirror rhoNUDFT_host = rhoDFT_m.getHostMirror(); + Kokkos::deep_copy(rhoNUFFT_host, rho_m.getView()); + Kokkos::deep_copy(rhoNUDFT_host, rhoDFT_m.getView()); + const int nghost = rho_m.getNghost(); + std::stringstream pname; + pname << "data/FieldFFT_"; + pname << Ippl::Comm->rank(); + pname << ".csv"; + Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout.precision(10); + pcsvout.setf(std::ios::scientific, std::ios::floatfield); + pcsvout << "rho" << endl; + for (int i = 0; i< nr_m[0]; i++) { + for (int j = 0; j< nr_m[1]; j++) { + for (int k = 0; k< nr_m[2]; k++) { + pcsvout << rhoNUFFT_host(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + std::stringstream pname2; + pname2 << "data/FieldDFT_"; + pname2 << Ippl::Comm->rank(); + pname2 << ".csv"; + Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout2.precision(10); + pcsvout2.setf(std::ios::scientific, std::ios::floatfield); + pcsvout2 << "rho" << endl; + for (int i = 0; i< nr_m[0]; i++) { + for (int j = 0; j< nr_m[1]; j++) { + for (int k = 0; k< nr_m[2]; k++) { + pcsvout2 << rhoNUDFT_host(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + Ippl::Comm->barrier(); + } + + //void dumpParticleData() { // typename ParticleAttrib::HostMirror R_host = this->R.getHostMirror(); diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 7c3c5714e..08e187ec8 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -192,9 +192,10 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; + //Vector_t kw = {1.0, 1.0, 1.0}; double alpha = 0.05; - //Vector_t rmin(-2.0 * pi); - //Vector_t rmax = 2 * pi; + //Vector_t rmin(-pi); + //Vector_t rmax(pi); Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw; Vector_t length = rmax - rmin; @@ -212,11 +213,13 @@ int main(int argc, char *argv[]){ //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; + //double Q = -64.0 * pi * pi * pi; P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,totalP); P->nr_m = nr; P->rho_m.initialize(mesh, FL); + P->rhoDFT_m.initialize(mesh, FL); P->Sk_m.initialize(mesh, FL); P->time_m = 0.0; @@ -232,8 +235,10 @@ int main(int argc, char *argv[]){ Vector_t minU, maxU; //int myRank = Ippl::Comm->rank(); for (unsigned d = 0; d size(); @@ -275,6 +280,9 @@ int main(int argc, char *argv[]){ fftParams.add("use_cufinufft_defaults", false); + + P->fft = std::make_shared(FL, 1, fftParams); + P->q.initializeNUFFT(FL, 1, fftParams); P->E.initializeNUFFT(FL, 2, fftParams); diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 59f8d3ca6..b28196de7 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -845,6 +845,20 @@ namespace ippl { auto localNp = R.getParticleCount(); + const Layout_t& layout = f.getLayout(); + const UniformCartesian& mesh = f.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + const double pi = std::acos(-1.0); + /** * cuFINUFFT's layout is left, hence we allocate the temporary * Kokkos views with the same layout @@ -891,7 +905,7 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i) { for(size_t d = 0; d < Dim; ++d) { - tempR[d](i) = Rview(i)[d]; + tempR[d](i) = Rview(i)[d] * (2.0 * pi / Len[d]); } tempQ(i).x = Qview(i); tempQ(i).y = 0.0; diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index fc6fe1430..429758b36 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -268,8 +268,9 @@ namespace ippl { Vector iVec = {i, j, k}; vector_type kVec; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); } auto Sk = Skview(i+nghost, j+nghost, k+nghost); Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), @@ -428,10 +429,10 @@ namespace ippl { vector_type kVec; double Dr = 0.0, arg = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); //kVec[d] = 2 * pi / Len[d] * iVec[d]; - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); Dr += kVec[d] * kVec[d]; arg += kVec[d]*pp(idx)[d]; } @@ -497,6 +498,8 @@ namespace ippl { auto q = *this; fftType_mp->transform(pp, q, f); + + //std::cout << "NUFFT transform done" << std::endl; using view_type = typename Field::view_type; view_type fview = f.getView(); @@ -579,12 +582,13 @@ namespace ippl { const int j, const int k) { - Vector iVec = {i, j, k}; + Vector iVec = {i-nghost, j-nghost, k-nghost}; Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + //kVec[d] = (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index 73629a0e0..c2cf66d09 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -73,7 +73,7 @@ int main(int argc, char *argv[]) { typedef Bunch bunch_type; - std::array pt = {256, 256, 256}; + ippl::Vector pt = {32, 32, 32}; ippl::Index I(pt[0]); ippl::Index J(pt[1]); ippl::Index K(pt[2]); @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) { typedef ippl::Vector Vector_t; Vector_t hx = {dx[0], dx[1], dx[2]}; - Vector_t origin = {-pi, -pi, -pi}; + Vector_t origin = {-2.0 * pi, -2.0 * pi, -2.0 * pi}; ippl::UniformCartesian mesh(owned, hx, origin); playout_type pl(layout, mesh); @@ -105,18 +105,19 @@ int main(int argc, char *argv[]) { using size_type = ippl::detail::size_type; - size_type Np = std::pow(256,3) * 8; + size_type Np = std::pow(32,3) * 20; typedef ippl::Field, dim> field_type; field_type field(mesh, layout); + field_type field_dft(mesh, layout); ippl::ParameterList fftParams; fftParams.add("gpu_method", 1); fftParams.add("gpu_sort", 1); fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-6); + fftParams.add("tolerance", 1e-10); fftParams.add("use_cufinufft_defaults", false); @@ -166,7 +167,85 @@ int main(int argc, char *argv[]) { auto Qview = bunch.Q.getView(); Kokkos::complex imag = {0.0, 1.0}; - + size_t flatN = pt[0] * pt[1] * pt[2]; + auto fview = field_dft.getView(); + + + + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + Kokkos::parallel_for("NUDFT type 1", + team_policy(flatN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type& teamMember) { + const size_t flatIndex = teamMember.league_rank(); + + const int k = (int)(flatIndex / (pt[0] * pt[1])); + const int flatIndex2D = flatIndex - (k * pt[0] * pt[1]); + const int i = flatIndex2D % pt[0]; + const int j = (int)(flatIndex2D / pt[0]); + + Kokkos::complex reducedValue = 0.0; + ippl::Vector iVec = {i, j, k}; + ippl::VectorkVec; + for(size_t d = 0; d < 3; ++d) { + kVec[d] = (2.0 * pi / (maxU[d] - minU[d])) * (iVec[d] - (pt[d] / 2)); + } + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, nloc), + [=](const size_t idx, Kokkos::complex& innerReduce) + { + double arg = 0.0; + for(size_t d = 0; d < 3; ++d) { + arg += kVec[d]*Rview(idx)[d]; + } + const double& val = Qview(idx); + + innerReduce += (Kokkos::Experimental::cos(arg) + - imag * Kokkos::Experimental::sin(arg)) * val; + }, Kokkos::Sum>(reducedValue)); + + if(teamMember.team_rank() == 0) { + fview(i+nghost,j+nghost,k+nghost) = reducedValue; + } + + }); + + typename field_type::HostMirror rhoNUDFT_host = field_dft.getHostMirror(); + Kokkos::deep_copy(rhoNUDFT_host, field_dft.getView()); + std::stringstream pname; + pname << "data/FieldFFT_"; + pname << Ippl::Comm->rank(); + pname << ".csv"; + Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout.precision(10); + pcsvout.setf(std::ios::scientific, std::ios::floatfield); + pcsvout << "rho" << endl; + for (int i = 0; i< pt[0]; i++) { + for (int j = 0; j< pt[1]; j++) { + for (int k = 0; k< pt[2]; k++) { + pcsvout << field_result(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + std::stringstream pname2; + pname2 << "data/FieldDFT_"; + pname2 << Ippl::Comm->rank(); + pname2 << ".csv"; + Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout2.precision(10); + pcsvout2.setf(std::ios::scientific, std::ios::floatfield); + pcsvout2 << "rho" << endl; + for (int i = 0; i< pt[0]; i++) { + for (int j = 0; j< pt[1]; j++) { + for (int k = 0; k< pt[2]; k++) { + pcsvout2 << rhoNUDFT_host(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + Ippl::Comm->barrier(); + + + Kokkos::parallel_reduce("NUDFT type1", nloc, KOKKOS_LAMBDA(const size_t idx, Kokkos::complex& valL) { @@ -188,6 +267,9 @@ int main(int argc, char *argv[]) { << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; std::cout << "Abs Error in imag part: " << std::setprecision(16) << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; + std::cout << "Field result: " << std::setprecision(16) + << field_result(iInd,jInd,kInd).real() << " " << std::setprecision(16) << field_result(iInd,jInd,kInd).imag() + << "index: " << iInd << "," << jInd << "," << kInd << std::endl; //Kokkos::complex max_error(0.0, 0.0); diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp index f5063b84c..56ac68622 100644 --- a/test/FFT/TestNUFFT2.cpp +++ b/test/FFT/TestNUFFT2.cpp @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) { //typedef ippl::Vector, 3> CxVector_t; Vector_t hx = {dx[0], dx[1], dx[2]}; - Vector_t origin = {-pi, -pi, -pi}; + Vector_t origin = {-2.0 * pi, -2.0 * pi, -2.0 * pi}; ippl::UniformCartesian mesh(owned, hx, origin); playout_type pl(layout, mesh); @@ -127,7 +127,7 @@ int main(int argc, char *argv[]) { using size_type = ippl::detail::size_type; - size_type Np = std::pow(32,3) * 10; + size_type Np = std::pow(32,3) * 20; typedef ippl::Field, dim> field_type; @@ -138,7 +138,7 @@ int main(int argc, char *argv[]) { fftParams.add("gpu_method", 1); fftParams.add("gpu_sort", 1); fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-12); + fftParams.add("tolerance", 1e-10); fftParams.add("use_cufinufft_defaults", false); @@ -150,10 +150,10 @@ int main(int argc, char *argv[]) { fft = std::make_unique(layout, type, fftParams); + Vector_t minU = {-pi, -pi, -pi}; Vector_t maxU = {pi, pi, pi}; - size_type nloc = Np/Ippl::Comm->size(); const int nghost = field.getNghost(); From 8778100258e149ddd8013f39b0b1879575a3a8ce Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 21 Feb 2023 16:03:21 +0100 Subject: [PATCH 068/117] bugs in the origin corrected in TestNUFFT1 and TestNUFFT2 --- test/FFT/TestNUFFT1.cpp | 2 +- test/FFT/TestNUFFT2.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index c2cf66d09..a244e7816 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) { typedef ippl::Vector Vector_t; Vector_t hx = {dx[0], dx[1], dx[2]}; - Vector_t origin = {-2.0 * pi, -2.0 * pi, -2.0 * pi}; + Vector_t origin = {-pi, -pi, -pi}; ippl::UniformCartesian mesh(owned, hx, origin); playout_type pl(layout, mesh); diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp index 56ac68622..d48abe9fd 100644 --- a/test/FFT/TestNUFFT2.cpp +++ b/test/FFT/TestNUFFT2.cpp @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) { //typedef ippl::Vector, 3> CxVector_t; Vector_t hx = {dx[0], dx[1], dx[2]}; - Vector_t origin = {-2.0 * pi, -2.0 * pi, -2.0 * pi}; + Vector_t origin = {-pi, -pi, -pi}; ippl::UniformCartesian mesh(owned, hx, origin); playout_type pl(layout, mesh); From 65664ff5c2df9c4c3f1c7ded9b814094759b542f Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 22 Feb 2023 10:34:39 +0100 Subject: [PATCH 069/117] PinT also works with NUFFT. Need to do space-time parallel now. --- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 2 +- alpine/PinT/ChargedParticlesPinT.hpp | 53 +++++++++++++------- alpine/PinT/LandauDampingPinT.cpp | 22 +++++--- src/Particle/ParticleAttrib.hpp | 29 +++++++---- 4 files changed, 71 insertions(+), 35 deletions(-) diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 08e187ec8..74d921320 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -276,7 +276,7 @@ int main(int argc, char *argv[]){ fftParams.add("gpu_method", 1); fftParams.add("gpu_sort", 1); fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-10); + fftParams.add("tolerance", 1e-2); fftParams.add("use_cufinufft_defaults", false); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 6bd2360a4..91d835be0 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -69,6 +69,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Q_m; + size_type Np_m; + std::shared_ptr solver_mp; double time_m; @@ -110,12 +112,14 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector_t rmin, Vector_t rmax, ippl::e_dim_tag decomp[Dim], - double Q) + double Q, + size_type Np) : ippl::ParticleBase(pl) , hr_m(hr) , rmin_m(rmin) , rmax_m(rmax) , Q_m(Q) + , Np_m(Np) { // register the particle attributes this->addAttribute(q); @@ -262,8 +266,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -346,8 +351,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -432,8 +438,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); //kVec[d] = 2 * pi / Len[d] * iVec[d]; Dr += kVec[d] * kVec[d]; } @@ -655,8 +662,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector kVec; double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); double kh = kVec[d] * dx[d]; bool isNotZero = (kh != 0.0); double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); @@ -856,12 +864,14 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; - scatterPIF(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + + q = Q_m / Np_m; time_m = tStartMySlice; @@ -888,13 +898,15 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; - scatterPIF(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); - + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + + q = Q_m / Np_m; + //kick Ptemp = Ptemp - 0.5 * dt * E; @@ -920,13 +932,15 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; - scatterPIF(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); - + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + + q = Q_m / Np_m; + time_m = tStartMySlice; if((time_m == 0.0)) { @@ -977,13 +991,14 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; - scatterPIF(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIF(E, rhoPIF_m, Sk_m, Rtemp, q); + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + q = Q_m / Np_m; //kick auto R2view = Rtemp.getView(); auto P2view = Ptemp.getView(); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 5662b2619..985c2c3c3 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -456,10 +456,8 @@ int main(int argc, char *argv[]){ Vector_t kw = {0.5, 0.5, 0.5}; //double alpha = 0.05; Vector_t alpha = {0.05, 0.05, 0.05}; - //Vector_t rmin(0.0); - //Vector_t rmax = 2 * pi / kw ; - Vector_t rmin(-2.0 * pi); - Vector_t rmax = 2 * pi; + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw ; Vector_t length = rmax - rmin; double dxPIC = length[0] / nrPIC[0]; double dyPIC = length[1] / nrPIC[1]; @@ -482,7 +480,7 @@ int main(int argc, char *argv[]){ //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,totalP); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -622,7 +620,19 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeShapeFunctionPIF); Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - + + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-6); + + fftParams.add("use_cufinufft_defaults", false); + + Pcoarse->q.initializeNUFFT(FLPIF, 1, fftParams); + Pcoarse->E.initializeNUFFT(FLPIF, 2, fftParams); + //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 429758b36..b7b5b92fe 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -496,16 +496,35 @@ namespace ippl { IpplTimings::startTimer(scatterPIFNUFFTTimer); auto q = *this; + + //Field tempField; + + //FieldLayout& layout = f.getLayout(); + //M& mesh = f.get_mesh(); + //tempField.initialize(mesh, layout); + // + //fftType_mp->transform(pp, q, tempField); fftType_mp->transform(pp, q, f); - //std::cout << "NUFFT transform done" << std::endl; using view_type = typename Field::view_type; view_type fview = f.getView(); + //view_type viewLocal = tempField.getView(); typename Field::view_type Skview = Sk.getView(); const int nghost = f.getNghost(); + IpplTimings::stopTimer(scatterPIFNUFFTTimer); + + //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); + //IpplTimings::startTimer(scatterAllReduceTimer); + //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + //MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + // MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); + //IpplTimings::stopTimer(scatterAllReduceTimer); + + //IpplTimings::startTimer(scatterPIFNUFFTTimer); + using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for("Multiply with shape functions", mdrange_type({nghost, nghost, nghost}, @@ -520,14 +539,6 @@ namespace ippl { }); IpplTimings::stopTimer(scatterPIFNUFFTTimer); - - //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); - //IpplTimings::startTimer(scatterAllReduceTimer); - //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); - //MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, - // MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); - //IpplTimings::stopTimer(scatterAllReduceTimer); - } From 9474ff8d3e85676f802061b86f7923dcd7afec4a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 24 Feb 2023 08:24:33 +0100 Subject: [PATCH 070/117] Twostream instability and Penning trap ran with higher no. of modes and particles --- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 2 +- alpine/PinT/BumponTailInstabilityPinT.cpp | 19 ++++++++++++------- alpine/PinT/ChargedParticlesPinT.hpp | 15 +++++++++++++++ alpine/PinT/LandauDampingPinT.cpp | 19 +++++-------------- alpine/PinT/PenningTrapPinT.cpp | 11 +++++++++-- src/Particle/ParticleAttrib.hpp | 2 +- 6 files changed, 43 insertions(+), 25 deletions(-) diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 74d921320..d0e9d3b92 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -276,7 +276,7 @@ int main(int argc, char *argv[]){ fftParams.add("gpu_method", 1); fftParams.add("gpu_sort", 1); fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-2); + fftParams.add("tolerance", 1e-4); fftParams.add("use_cufinufft_defaults", false); diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 389f619d9..9abdc69bf 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -507,9 +507,17 @@ int main(int argc, char *argv[]){ FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); PLayout_t PL(FLPIC, meshPIC); + + double factorVelBulk = 1.0 - epsilon; + double factorVelBeam = 1.0 - factorVelBulk; + size_type nlocBulk = (size_type)(factorVelBulk * totalP); + size_type nlocBeam = (size_type)(factorVelBeam * totalP); + size_type nloc = nlocBulk + nlocBeam; + + //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,nloc); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -534,11 +542,6 @@ int main(int argc, char *argv[]){ maxU[d] = CDF(rmax[d], delta, kw[d], d); } - double factorVelBulk = 1.0 - epsilon; - double factorVelBeam = 1.0 - factorVelBulk; - size_type nlocBulk = (size_type)(factorVelBulk * totalP); - size_type nlocBeam = (size_type)(factorVelBeam * totalP); - size_type nloc = nlocBulk + nlocBeam; Pcoarse->create(nloc); Pbegin->create(nloc); @@ -673,7 +676,9 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeShapeFunctionPIF); Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - + + Pcoarse->initNUFFT(FLPIF); + for (unsigned int it=0; it { solver_mp->setLhs(EfieldPIC_m); } + + void initNUFFT(FieldLayout_t& FLPIF) { + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-6); + + fftParams.add("use_cufinufft_defaults", false); + + q.initializeNUFFT(FLPIF, 1, fftParams); + E.initializeNUFFT(FLPIF, 2, fftParams); + } + void dumpLandauPIC() { const int nghostE = EfieldPIC_m.getNghost(); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 985c2c3c3..f08a275b4 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -478,9 +478,12 @@ int main(int argc, char *argv[]){ FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); PLayout_t PL(FLPIC, meshPIC); + + size_type nloc = totalP; + //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,totalP); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,nloc); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -507,7 +510,6 @@ int main(int argc, char *argv[]){ //maxU[d] = rmax[d]; } - size_type nloc = totalP; Pcoarse->create(nloc); Pbegin->create(nloc); @@ -621,18 +623,7 @@ int main(int argc, char *argv[]){ Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - ippl::ParameterList fftParams; - - fftParams.add("gpu_method", 1); - fftParams.add("gpu_sort", 1); - fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-6); - - fftParams.add("use_cufinufft_defaults", false); - - Pcoarse->q.initializeNUFFT(FLPIF, 1, fftParams); - Pcoarse->E.initializeNUFFT(FLPIF, 2, fftParams); - + Pcoarse->initNUFFT(FLPIF); //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 016351a19..f18fc3aa2 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -471,9 +471,12 @@ int main(int argc, char *argv[]){ FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); PLayout_t PL(FLPIC, meshPIC); + size_type nloc = totalP; + + double Q = -1562.5; double Bext = 5.0; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,nloc); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -498,7 +501,6 @@ int main(int argc, char *argv[]){ maxU[d] = CDF(rmax[d], mu[d], sd[d]); } - size_type nloc = totalP; Pcoarse->create(nloc); Pbegin->create(nloc); @@ -630,6 +632,11 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeShapeFunctionPIF); Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); + + + Pcoarse->initNUFFT(FLPIF); + + for (unsigned int it=0; ittransform(pp, q, tempField); fftType_mp->transform(pp, q, f); From 5f508e40e99de9bdb5ac79ac306cc0d4c08905e3 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 3 Mar 2023 15:34:01 +0100 Subject: [PATCH 071/117] Block parareal first version done --- alpine/PinT/ChargedParticlesPinT.hpp | 37 +++ alpine/PinT/PenningTrapPinT.cpp | 335 ++++++++++++++++----------- 2 files changed, 240 insertions(+), 132 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 7596d987d..963192044 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -175,6 +175,43 @@ class ChargedParticlesPinT : public ippl::ParticleBase { E.initializeNUFFT(FLPIF, 2, fftParams); } + void initializeParareal(ParticleAttrib& Rbegin, + ParticleAttrib& Pbegin, + bool& isConverged, + bool& isPreviousDomainConverged, + const unsigned int& ntCoarse, + const double& dtCoarse, + const double& tStartMySlice, + const double& Bext) { + + //Copy initial conditions as they are needed later + Kokkos::deep_copy(R0.getView(), R.getView()); + Kokkos::deep_copy(P0.getView(), P.getView()); + + //Get initial guess for ranks other than 0 by propagating the coarse solver + if (Ippl::Comm->rank() > 0) { + BorisPIC(R, P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); + } + + Ippl::Comm->barrier(); + + Kokkos::deep_copy(Rbegin.getView(), R.getView()); + Kokkos::deep_copy(Pbegin.getView(), P.getView()); + + + //Run the coarse integrator to get the values at the end of the time slice + Pcoarse->BorisPIC(R, P, ntCoarse, dtCoarse, tStartMySlice, Bext); + + isConverged = false; + if(Ippl::Comm->rank() == 0) { + isPreviousDomainConverged = true; + } + else { + isPreviousDomainConverged = false; + } + } + + void dumpLandauPIC() { const int nghostE = EfieldPIC_m.getNghost(); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index f18fc3aa2..d76885630 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -406,15 +406,16 @@ int main(int argc, char *argv[]){ const size_type totalP = std::atoll(argv[7]); const double tEnd = std::atof(argv[8]); - const double dtSlice = tEnd / Ippl::Comm->size(); + const unsigned int maxCycles = std::atoi(argv[12]); + double tEndCycle = tEnd / maxCycles; + const double dtSlice = tEndCycle / Ippl::Comm->size(); const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); const unsigned int ntFine = std::ceil(dtSlice / dtFine); const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); - const unsigned int maxIter = std::atoi(argv[12]); + //const unsigned int maxIter = std::atoi(argv[12]); - const double tStartMySlice = Ippl::Comm->rank() * dtSlice; //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; @@ -491,7 +492,6 @@ int main(int argc, char *argv[]){ //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); - Pcoarse->time_m = tStartMySlice; IpplTimings::startTimer(particleCreation); @@ -509,9 +509,41 @@ int main(int argc, char *argv[]){ using buffer_type = ippl::Communicate::buffer_type; int tag; #ifdef KOKKOS_ENABLE_CUDA + //If we don't do the following even with the same seed the initial + //condition is not the same on different GPUs + //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + //if(Ippl::Comm->rank() == 0) { + // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + // Kokkos::parallel_for(nloc, + // generate_random, Dim>( + // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, + // minU, maxU)); + + + // Kokkos::fence(); + // size_type bufSize = Pbegin->packedSize(nloc); + // std::vector requests(0); + // int sends = 0; + // for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); + // requests.resize(requests.size() + 1); + // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); + // buf->resetWritePos(); + // ++sends; + // } + // MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + //} + //else { + // size_type bufSize = Pbegin->packedSize(nloc); + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + // buf->resetReadPos(); + //} + //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(Ippl::Comm->rank() == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); Kokkos::parallel_for(nloc, @@ -521,24 +553,24 @@ int main(int argc, char *argv[]){ Kokkos::fence(); - size_type bufSize = Pbegin->packedSize(nloc); - std::vector requests(0); - int sends = 0; - for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); - requests.resize(requests.size() + 1); - Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); - buf->resetWritePos(); - ++sends; - } - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } else { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); } + + + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pbegin, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + Ippl::Comm->barrier(); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); @@ -565,7 +597,8 @@ int main(int argc, char *argv[]){ << "No. of coarse time steps: " << ntCoarse << endl << "Tolerance: " << tol - << " Max. iterations: " << maxIter + //<< " Max. iterations: " << maxIter + << " Max. cycles: " << maxCycles << endl << "Np= " << nloc << " Fourier modes = " << nmPIF @@ -578,55 +611,59 @@ int main(int argc, char *argv[]){ msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); - //Get initial guess for ranks other than 0 by propagating the coarse solver - IpplTimings::startTimer(coarsePropagator); - if (Ippl::Comm->rank() > 0) { - Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); - } - - Ippl::Comm->barrier(); - - IpplTimings::stopTimer(coarsePropagator); + ////Get initial guess for ranks other than 0 by propagating the coarse solver + //IpplTimings::startTimer(coarsePropagator); + //if (Ippl::Comm->rank() > 0) { + // Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); + //} + // + //Ippl::Comm->barrier(); + // + //IpplTimings::stopTimer(coarsePropagator); - msg << "First Boris PIC done " << endl; + //msg << "First Boris PIC done " << endl; - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); + // + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); - //Run the coarse integrator to get the values at the end of the time slice - IpplTimings::startTimer(coarsePropagator); - Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); - IpplTimings::stopTimer(coarsePropagator); - msg << "Second Boris PIC done " << endl; + ////Run the coarse integrator to get the values at the end of the time slice + //IpplTimings::startTimer(coarsePropagator); + //Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); + //IpplTimings::stopTimer(coarsePropagator); + //msg << "Second Boris PIC done " << endl; - //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + ////Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + + ////The following might not be needed + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + + //msg << "Starting parareal iterations ..." << endl; + //bool isConverged = false; + //bool isPreviousDomainConverged; + //if(Ippl::Comm->rank() == 0) { + // isPreviousDomainConverged = true; + //} + //else { + // isPreviousDomainConverged = false; + //} + + bool isConverged, isPreviousDomainConverged; - //The following might not be needed - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); - msg << "Starting parareal iterations ..." << endl; - bool isConverged = false; - bool isPreviousDomainConverged; - if(Ippl::Comm->rank() == 0) { - isPreviousDomainConverged = true; - } - else { - isPreviousDomainConverged = false; - } - Pcoarse->shapetype_m = argv[13]; Pcoarse->shapedegree_m = std::atoi(argv[14]); IpplTimings::startTimer(initializeShapeFunctionPIF); @@ -636,105 +673,139 @@ int main(int argc, char *argv[]){ Pcoarse->initNUFFT(FLPIF); - - for (unsigned int it=0; itBorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1, Bext); - IpplTimings::stopTimer(finePropagator); - + + unsigned int it = 0; + for (unsigned int nc=0; nc < maxCycles; nc++) { + double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); + Pcoarse->time_m = tStartMySlice; + Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, + isPreviousDomainConverged, ntCoarse, + dtCoarse, tStartMySlice, Bext); + while ((!isPreviousDomainConverged) || (!isConverged)) { + //for (unsigned int it=0; it < maxIter; it++) { + + //Run fine integrator in parallel + IpplTimings::startTimer(finePropagator); + Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1, Bext); + IpplTimings::stopTimer(finePropagator); + - //Difference = Fine - Coarse - Pend->R = Pbegin->R - Pcoarse->R; - Pend->P = Pbegin->P - Pcoarse->P; + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); - //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); + //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); - - IpplTimings::startTimer(timeCommunication); - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - - if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { - size_type bufSize = Pbegin->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); - buf->resetReadPos(); - MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, - Ippl::getComm(), MPI_STATUS_IGNORE); IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); IpplTimings::stopTimer(deepCopy); - } - IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(timeCommunication); + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, + Ippl::getComm(), MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + } + IpplTimings::stopTimer(timeCommunication); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); - Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); - IpplTimings::startTimer(coarsePropagator); - Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); - IpplTimings::stopTimer(coarsePropagator); + IpplTimings::startTimer(coarsePropagator); + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); + IpplTimings::stopTimer(coarsePropagator); - Pend->R = Pend->R + Pcoarse->R; - Pend->P = Pend->P + Pcoarse->P; + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); - PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); - IpplTimings::startTimer(computeErrors); - double localRerror, localPerror; - double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - - IpplTimings::stopTimer(computeErrors); + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); + IpplTimings::startTimer(computeErrors); + double localRerror, localPerror; + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + IpplTimings::stopTimer(computeErrors); - if((Rerror <= tol) && (Perror <= tol)) { - isConverged = true; - } + if((Rerror <= tol) && (Perror <= tol)) { + isConverged = true; + } + IpplTimings::startTimer(timeCommunication); + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); + } + IpplTimings::stopTimer(timeCommunication); + + + msg << "Finished iteration: " << it+1 + << " in cycle: " << nc+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << endl; + + IpplTimings::startTimer(dumpData); + //Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(localRerror, localPerror, it+1); + //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); + IpplTimings::stopTimer(dumpData); + + it += 1; + //if(isConverged && isPreviousDomainConverged) { + // break; + //} + } + + Ippl::Comm->barrier(); IpplTimings::startTimer(timeCommunication); + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); + buf->resetReadPos(); + } + if(Ippl::Comm->rank() > 0) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); } IpplTimings::stopTimer(timeCommunication); - - - msg << "Finished iteration: " << it+1 - << " Rerror: " << Rerror - << " Perror: " << Perror - << endl; - - IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, it+1); - //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - IpplTimings::stopTimer(dumpData); - - if(isConverged && isPreviousDomainConverged) { - break; - } + Ippl::Comm->barrier(); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); } - - Ippl::Comm->barrier(); msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); From 1331f37f619b81075da7b75edd876f0408f45ec0 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 4 Mar 2023 08:03:52 +0100 Subject: [PATCH 072/117] Output writing changed for block parareal. Need to compile and test --- alpine/PinT/ChargedParticlesPinT.hpp | 20 +++++++++++++------- alpine/PinT/PenningTrapPinT.cpp | 17 ++++++++--------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 963192044..07f823292 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -450,7 +450,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { - void dumpEnergy(size_type /*totalP*/, const unsigned int& iter, ParticleAttrib& Ptemp) { + void dumpEnergy(size_type /*totalP*/, const unsigned int& nc, + const unsigned int& iter, ParticleAttrib& Ptemp) { double potentialEnergy, kineticEnergy; @@ -543,8 +544,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { kineticEnergy = globaltemp; std::stringstream fname; - fname << "data/Energy_"; + fname << "data/Energy_rank_"; fname << Ippl::Comm->rank(); + fname << "_nc_"; + fname << nc; fname << "_iter_"; fname << iter; fname << ".csv"; @@ -592,11 +595,13 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } } - void writelocalError(double Rerror, double Perror, unsigned int iter) { + void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter) { std::stringstream fname; - fname << "data/localError_"; + fname << "data/localError_rank_"; fname << Ippl::Comm->rank(); + fname << "_nc_"; + fname << nc; fname << ".csv"; Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); @@ -977,7 +982,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, const double& dt, const bool& /*isConverged*/, - const double& tStartMySlice, const unsigned int& iter, const double& Bext) { + const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, const double& Bext) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); @@ -997,7 +1003,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), iter, Ptemp); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); IpplTimings::stopTimer(dumpData); } double alpha = -0.5 * dt; @@ -1074,7 +1080,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), iter, Ptemp); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); IpplTimings::stopTimer(dumpData); } diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index d76885630..1c03e69b9 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -6,7 +6,7 @@ // European Conference on Parallel Processing. Springer, Cham, 2017. // // Usage: -// srun ./PenningTrapPinT +// srun ./PenningTrapPinT // --info 5 // nmx = No. of Fourier modes in the x-direction // nmy = No. of Fourier modes in the y-direction @@ -15,6 +15,7 @@ // ny = No. of grid points in the y-direction // nz = No. of grid points in the z-direction // Np = Total no. of macro-particles in the simulation +// nCycles = No. of Parareal blocks/cycles // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) // Example: @@ -406,8 +407,8 @@ int main(int argc, char *argv[]){ const size_type totalP = std::atoll(argv[7]); const double tEnd = std::atof(argv[8]); - const unsigned int maxCycles = std::atoi(argv[12]); - double tEndCycle = tEnd / maxCycles; + const unsigned int nCycles = std::atoi(argv[12]); + double tEndCycle = tEnd / nCycles; const double dtSlice = tEndCycle / Ippl::Comm->size(); const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); @@ -598,7 +599,7 @@ int main(int argc, char *argv[]){ << endl << "Tolerance: " << tol //<< " Max. iterations: " << maxIter - << " Max. cycles: " << maxCycles + << " Max. cycles: " << nCycles << endl << "Np= " << nloc << " Fourier modes = " << nmPIF @@ -662,8 +663,6 @@ int main(int argc, char *argv[]){ bool isConverged, isPreviousDomainConverged; - - Pcoarse->shapetype_m = argv[13]; Pcoarse->shapedegree_m = std::atoi(argv[14]); IpplTimings::startTimer(initializeShapeFunctionPIF); @@ -675,7 +674,7 @@ int main(int argc, char *argv[]){ unsigned int it = 0; - for (unsigned int nc=0; nc < maxCycles; nc++) { + for (unsigned int nc=0; nc < nCycles; nc++) { double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); Pcoarse->time_m = tStartMySlice; Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, @@ -686,7 +685,7 @@ int main(int argc, char *argv[]){ //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1, Bext); + Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, nc+1, it+1, Bext); IpplTimings::stopTimer(finePropagator); @@ -771,7 +770,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, it+1); + Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); From 2b46726cf3e1ed6d953a843a6f95299fc4e447fb Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 6 Mar 2023 22:24:17 +0100 Subject: [PATCH 073/117] multiCycle Parareal implemented for PenningTrap and TSI. Need to postprocess and verify --- alpine/PinT/BumponTailInstabilityPinT.cpp | 364 +++++++++++++--------- alpine/PinT/ChargedParticlesPinT.hpp | 61 +++- alpine/PinT/PenningTrapPinT.cpp | 67 ++-- 3 files changed, 304 insertions(+), 188 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 9abdc69bf..188760492 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -420,16 +420,18 @@ int main(int argc, char *argv[]){ const size_type totalP = std::atoll(argv[7]); const double tEnd = std::atof(argv[8]); - const double dtSlice = tEnd / Ippl::Comm->size(); + const unsigned int nCycles = std::atoi(argv[12]); + double tEndCycle = tEnd / nCycles; + const double dtSlice = tEndCycle / Ippl::Comm->size(); const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); const unsigned int ntFine = std::ceil(dtSlice / dtFine); const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); - const unsigned int maxIter = std::atoi(argv[12]); + //const unsigned int maxIter = std::atoi(argv[12]); - const double tStartMySlice = Ippl::Comm->rank() * dtSlice; + //const double tStartMySlice = Ippl::Comm->rank() * dtSlice; //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; @@ -532,7 +534,6 @@ int main(int argc, char *argv[]){ //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); - Pcoarse->time_m = tStartMySlice; IpplTimings::startTimer(particleCreation); @@ -552,7 +553,42 @@ int main(int argc, char *argv[]){ #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs + //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + //if(Ippl::Comm->rank() == 0) { + // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + // Kokkos::parallel_for(nloc, + // generate_random, Dim>( + // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, delta, kw, + // sigma, muBulk, muBeam, nlocBulk, minU, maxU)); + + + // Kokkos::fence(); + // size_type bufSize = Pbegin->packedSize(nloc); + // std::vector requests(0); + // int sends = 0; + // for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); + // requests.resize(requests.size() + 1); + // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); + // buf->resetWritePos(); + // ++sends; + // } + // MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + //} + //else { + // size_type bufSize = Pbegin->packedSize(nloc); + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + // buf->resetReadPos(); + //} + //Ippl::Comm->barrier(); + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + //IpplTimings::stopTimer(deepCopy); + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(Ippl::Comm->rank() == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); Kokkos::parallel_for(nloc, @@ -562,25 +598,25 @@ int main(int argc, char *argv[]){ Kokkos::fence(); - size_type bufSize = Pbegin->packedSize(nloc); - std::vector requests(0); - int sends = 0; - for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); - requests.resize(requests.size() + 1); - Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); - buf->resetWritePos(); - ++sends; - } - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } else { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); } - Ippl::Comm->barrier(); + + + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pbegin, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + + //Ippl::Comm->barrier(); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); @@ -609,7 +645,7 @@ int main(int argc, char *argv[]){ << "No. of coarse time steps: " << ntCoarse << endl << "Tolerance: " << tol - << " Max. iterations: " << maxIter + << " No. of cycles: " << nCycles << endl << "Np= " << nloc << " Fourier modes = " << nmPIF @@ -622,55 +658,57 @@ int main(int argc, char *argv[]){ msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); - - //Get initial guess for ranks other than 0 by propagating the coarse solver - IpplTimings::startTimer(coarsePropagator); - if (Ippl::Comm->rank() > 0) { - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); - } - - Ippl::Comm->barrier(); - - IpplTimings::stopTimer(coarsePropagator); - - msg << "First Leap frog PIC done " << endl; - - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); - - - //Run the coarse integrator to get the values at the end of the time slice - IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - IpplTimings::stopTimer(coarsePropagator); - msg << "Second Leap frog PIC done " << endl; - - //Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - - //The following might not be needed - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); - + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + ////Get initial guess for ranks other than 0 by propagating the coarse solver + //IpplTimings::startTimer(coarsePropagator); + //if (Ippl::Comm->rank() > 0) { + // Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); + //} + // + //Ippl::Comm->barrier(); + // + //IpplTimings::stopTimer(coarsePropagator); + + //msg << "First Leap frog PIC done " << endl; + + // + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + + ////Run the coarse integrator to get the values at the end of the time slice + //IpplTimings::startTimer(coarsePropagator); + //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + //IpplTimings::stopTimer(coarsePropagator); + //msg << "Second Leap frog PIC done " << endl; + + ////Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); + + ////The following might not be needed + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + + //msg << "Starting parareal iterations ..." << endl; + //bool isConverged = false; + //bool isPreviousDomainConverged; + //if(Ippl::Comm->rank() == 0) { + // isPreviousDomainConverged = true; + //} + //else { + // isPreviousDomainConverged = false; + //} + + bool isConverged, isPreviousDomainConverged; - msg << "Starting parareal iterations ..." << endl; - bool isConverged = false; - bool isPreviousDomainConverged; - if(Ippl::Comm->rank() == 0) { - isPreviousDomainConverged = true; - } - else { - isPreviousDomainConverged = false; - } - Pcoarse->shapetype_m = argv[13]; Pcoarse->shapedegree_m = std::atoi(argv[14]); IpplTimings::startTimer(initializeShapeFunctionPIF); @@ -679,108 +717,140 @@ int main(int argc, char *argv[]){ Pcoarse->initNUFFT(FLPIF); - for (unsigned int it=0; itLeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1); - IpplTimings::stopTimer(finePropagator); + for (unsigned int nc=0; nc < nCycles; nc++) { + double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); + Pcoarse->time_m = tStartMySlice; + Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, + isPreviousDomainConverged, ntCoarse, + dtCoarse, tStartMySlice); + unsigned int it = 0; + while (!isConverged) { + //Run fine integrator in parallel + IpplTimings::startTimer(finePropagator); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, nc+1, it+1); + IpplTimings::stopTimer(finePropagator); - //Difference = Fine - Coarse - Pend->R = Pbegin->R - Pcoarse->R; - Pend->P = Pbegin->P - Pcoarse->P; + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); - //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); + //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); - - IpplTimings::startTimer(timeCommunication); - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - - if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { - size_type bufSize = Pbegin->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); - buf->resetReadPos(); - MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, - Ippl::getComm(), MPI_STATUS_IGNORE); IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); IpplTimings::stopTimer(deepCopy); - } - IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(timeCommunication); + tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, + Ippl::getComm(), MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + } + IpplTimings::stopTimer(timeCommunication); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); - Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); - IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - IpplTimings::stopTimer(coarsePropagator); + IpplTimings::startTimer(coarsePropagator); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + IpplTimings::stopTimer(coarsePropagator); - Pend->R = Pend->R + Pcoarse->R; - Pend->P = Pend->P + Pcoarse->P; + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); + //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); - PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); - double localRerror, localPerror; - - IpplTimings::startTimer(computeErrors); - double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); + double localRerror, localPerror; + + IpplTimings::startTimer(computeErrors); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - IpplTimings::stopTimer(computeErrors); - //} + IpplTimings::stopTimer(computeErrors); + //} - if((Rerror <= tol) && (Perror <= tol)) { - isConverged = true; - } + if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { + isConverged = true; + } - IpplTimings::startTimer(timeCommunication); - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { - size_type bufSize = Pend->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); - MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); - buf->resetWritePos(); - MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); + IpplTimings::startTimer(timeCommunication); + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); + } + IpplTimings::stopTimer(timeCommunication); + + + msg << "Finished iteration: " << it+1 + << " in cycle: " << nc+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << endl; + + IpplTimings::startTimer(dumpData); + //Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1); + //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { + //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); + //} + IpplTimings::stopTimer(dumpData); + + it += 1; } - IpplTimings::stopTimer(timeCommunication); - - msg << "Finished iteration: " << it+1 - << " Rerror: " << Rerror - << " Perror: " << Perror - << endl; - - IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, it+1); - //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { - //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - //} - IpplTimings::stopTimer(dumpData); - - if(isConverged && isPreviousDomainConverged) { - break; + Ippl::Comm->barrier(); + if((nCycles > 1) && (nc < (nCycles - 1))) { + IpplTimings::startTimer(timeCommunication); + tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); + buf->resetReadPos(); + } + if(Ippl::Comm->rank() > 0) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); } } - - Ippl::Comm->barrier(); msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 07f823292..bb27e0201 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -185,22 +185,22 @@ class ChargedParticlesPinT : public ippl::ParticleBase { const double& Bext) { //Copy initial conditions as they are needed later - Kokkos::deep_copy(R0.getView(), R.getView()); + Kokkos::deep_copy(R0.getView(), this->R.getView()); Kokkos::deep_copy(P0.getView(), P.getView()); //Get initial guess for ranks other than 0 by propagating the coarse solver if (Ippl::Comm->rank() > 0) { - BorisPIC(R, P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); + BorisPIC(this->R, P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); } - Ippl::Comm->barrier(); + //Ippl::Comm->barrier(); - Kokkos::deep_copy(Rbegin.getView(), R.getView()); + Kokkos::deep_copy(Rbegin.getView(), this->R.getView()); Kokkos::deep_copy(Pbegin.getView(), P.getView()); //Run the coarse integrator to get the values at the end of the time slice - Pcoarse->BorisPIC(R, P, ntCoarse, dtCoarse, tStartMySlice, Bext); + BorisPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice, Bext); isConverged = false; if(Ippl::Comm->rank() == 0) { @@ -211,6 +211,40 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } } + void initializeParareal(ParticleAttrib& Rbegin, + ParticleAttrib& Pbegin, + bool& isConverged, + bool& isPreviousDomainConverged, + const unsigned int& ntCoarse, + const double& dtCoarse, + const double& tStartMySlice) { + + //Copy initial conditions as they are needed later + Kokkos::deep_copy(R0.getView(), this->R.getView()); + Kokkos::deep_copy(P0.getView(), P.getView()); + + //Get initial guess for ranks other than 0 by propagating the coarse solver + if (Ippl::Comm->rank() > 0) { + LeapFrogPIC(this->R, P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); + } + + //Ippl::Comm->barrier(); + + Kokkos::deep_copy(Rbegin.getView(), this->R.getView()); + Kokkos::deep_copy(Pbegin.getView(), P.getView()); + + + //Run the coarse integrator to get the values at the end of the time slice + LeapFrogPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice); + + isConverged = false; + if(Ippl::Comm->rank() == 0) { + isPreviousDomainConverged = true; + } + else { + isPreviousDomainConverged = false; + } + } void dumpLandauPIC() { @@ -362,7 +396,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { << ExAmp << endl; } - void dumpBumponTail(const unsigned int& iter) { + void dumpBumponTail(const unsigned int& nc, const unsigned int& iter) { double fieldEnergy = 0.0; @@ -430,8 +464,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { std::stringstream fname; - fname << "data/FieldBumponTail_"; + fname << "data/FieldBumponTail_rank_"; fname << Ippl::Comm->rank(); + fname << "_nc_"; + fname << nc; fname << "_iter_"; fname << iter; fname << ".csv"; @@ -914,7 +950,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, const double& dt, const bool& /*isConverged*/, - const double& tStartMySlice, const unsigned int& iter) { + const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); @@ -935,8 +972,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); //dumpLandau(iter); - dumpBumponTail(iter); - dumpEnergy(this->getLocalNum(), iter, Ptemp); + dumpBumponTail(nc, iter); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { IpplTimings::startTimer(dumpData); //dumpLandau(iter); - dumpBumponTail(iter); - dumpEnergy(this->getLocalNum(), iter, Ptemp); + dumpBumponTail(nc, iter); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); IpplTimings::stopTimer(dumpData); } diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 1c03e69b9..1f3f411c6 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -572,7 +572,7 @@ int main(int argc, char *argv[]){ MPI_Wait(&request, MPI_STATUS_IGNORE); } - Ippl::Comm->barrier(); + //Ippl::Comm->barrier(); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); @@ -599,7 +599,7 @@ int main(int argc, char *argv[]){ << endl << "Tolerance: " << tol //<< " Max. iterations: " << maxIter - << " Max. cycles: " << nCycles + << " No. of cycles: " << nCycles << endl << "Np= " << nloc << " Fourier modes = " << nmPIF @@ -673,14 +673,15 @@ int main(int argc, char *argv[]){ Pcoarse->initNUFFT(FLPIF); - unsigned int it = 0; for (unsigned int nc=0; nc < nCycles; nc++) { double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); Pcoarse->time_m = tStartMySlice; Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, isPreviousDomainConverged, ntCoarse, dtCoarse, tStartMySlice, Bext); - while ((!isPreviousDomainConverged) || (!isConverged)) { + unsigned int it = 0; + while (!isConverged) { + //while ((!isPreviousDomainConverged) || (!isConverged)) { //for (unsigned int it=0; it < maxIter; it++) { //Run fine integrator in parallel @@ -703,8 +704,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(timeCommunication); - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); @@ -744,7 +745,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(computeErrors); - if((Rerror <= tol) && (Perror <= tol)) { + if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { isConverged = true; } @@ -780,30 +781,38 @@ int main(int argc, char *argv[]){ //} } + //std::cout << "Before barrier in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; Ippl::Comm->barrier(); - IpplTimings::startTimer(timeCommunication); - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { - size_type bufSize = Pend->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); - buf->resetReadPos(); - } - if(Ippl::Comm->rank() > 0) { - size_type bufSize = Pend->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); - MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); - buf->resetWritePos(); - MPI_Wait(&request, MPI_STATUS_IGNORE); + //msg << "Communication started in cycle: " << nc+1 << endl; + //std::cout << "Communication started in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; + if((nCycles > 1) && (nc < (nCycles - 1))) { + IpplTimings::startTimer(timeCommunication); + tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); + buf->resetReadPos(); + } + if(Ippl::Comm->rank() > 0) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + //std::cout << "Communication finished in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; + //Ippl::Comm->barrier(); + + //msg << "Communication finished in cycle: " << nc+1 << endl; + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); } - IpplTimings::stopTimer(timeCommunication); - Ippl::Comm->barrier(); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); - IpplTimings::stopTimer(deepCopy); } msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); From d890e035291ad6712e2fb7cb6149c862d8c6faff Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 12 Apr 2023 14:30:26 +0200 Subject: [PATCH 074/117] Kokkos::Experimenta->numbers for v 4.0.0 --- src/Particle/ParticleAttrib.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index f453f7294..6de534ae1 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -282,8 +282,8 @@ namespace ippl { } const value_type& val = dview_m(idx); - innerReduce += Sk * (Kokkos::Experimental::cos(arg) - - imag * Kokkos::Experimental::sin(arg)) * val; + innerReduce += Sk * (Kokkos::numbers::cos(arg) + - imag * Kokkos::numbers::sin(arg)) * val; }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { @@ -450,10 +450,10 @@ namespace ippl { //Inverse Fourier transform when the lhs is real. Use when //we choose k \in [0 K) instead of from [-K/2+1 K/2] - //Ex[d] = 2.0 * (Ek.real() * Kokkos::Experimental::cos(arg) - // - Ek.imag() * Kokkos::Experimental::sin(arg)); - Ek *= Sk * (Kokkos::Experimental::cos(arg) - + imag * Kokkos::Experimental::sin(arg)); + //Ex[d] = 2.0 * (Ek.real() * Kokkos::numbers::cos(arg) + // - Ek.imag() * Kokkos::numbers::sin(arg)); + Ek *= Sk * (Kokkos::numbers::cos(arg) + + imag * Kokkos::numbers::sin(arg)); Ex[d] = Ek.real(); } From a06118ea90c91e0d2d4cc4585b4272579b768cd7 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 12 Apr 2023 14:55:45 +0200 Subject: [PATCH 075/117] Kokkos::numbers removed for v 4.0.0 --- alpine/PinT/ChargedParticlesPinT.hpp | 2 +- src/Particle/ParticleAttrib.hpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index bb27e0201..f324d2e26 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -761,7 +761,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double kh = kVec[d] * dx[d]; bool isNotZero = (kh != 0.0); double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); - double arg = isNotZero * (Kokkos::Experimental::sin(kh) * factor) + + double arg = isNotZero * (Kokkos::sin(kh) * factor) + (!isNotZero) * 1.0; //Fourier transform of CIC Sk *= std::pow(arg, order); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 6de534ae1..39ba19e3b 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -282,8 +282,8 @@ namespace ippl { } const value_type& val = dview_m(idx); - innerReduce += Sk * (Kokkos::numbers::cos(arg) - - imag * Kokkos::numbers::sin(arg)) * val; + innerReduce += Sk * (Kokkos::cos(arg) + - imag * Kokkos::sin(arg)) * val; }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { @@ -450,10 +450,10 @@ namespace ippl { //Inverse Fourier transform when the lhs is real. Use when //we choose k \in [0 K) instead of from [-K/2+1 K/2] - //Ex[d] = 2.0 * (Ek.real() * Kokkos::numbers::cos(arg) - // - Ek.imag() * Kokkos::numbers::sin(arg)); - Ek *= Sk * (Kokkos::numbers::cos(arg) - + imag * Kokkos::numbers::sin(arg)); + //Ex[d] = 2.0 * (Ek.real() * Kokkos::cos(arg) + // - Ek.imag() * Kokkos::sin(arg)); + Ek *= Sk * (Kokkos::cos(arg) + + imag * Kokkos::sin(arg)); Ex[d] = Ek.real(); } From b27bc5dc7244c06bfe226369d544687f197ecd05 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 12 Apr 2023 15:34:10 +0200 Subject: [PATCH 076/117] LandauDamping modified for multiBlock Parareal --- .../BumponTailInstabilityPIF.cpp | 2 +- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 2 +- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 2 +- alpine/PinT/BumponTailInstabilityPinT.cpp | 5 +- alpine/PinT/LandauDampingPinT.cpp | 401 ++++++++++-------- alpine/PinT/PenningTrapPinT.cpp | 2 +- test/FFT/TestNUFFT1.cpp | 8 +- test/FFT/TestNUFFT2.cpp | 4 +- 8 files changed, 243 insertions(+), 183 deletions(-) diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp index 2ac5b18f4..5ddbd6c46 100644 --- a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -242,7 +242,7 @@ int main(int argc, char *argv[]){ //Q = -\int\int f dx dv double Q = -rmax[0] * rmax[1] * rmax[2]; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,totalP); P->nr_m = nr; diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index a3a797823..e2688105f 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -525,7 +525,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double kh = kVec[d] * dx[d]; bool isNotZero = (kh != 0.0); double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); - double arg = isNotZero * (Kokkos::Experimental::sin(kh) * factor) + + double arg = isNotZero * (Kokkos::sin(kh) * factor) + (!isNotZero) * 1.0; //Fourier transform of CIC Sk *= std::pow(arg, order); diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index 8c5613b1b..1c81783b9 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -209,7 +209,7 @@ int main(int argc, char *argv[]){ double Q = -1562.5; double Bext = 5.0; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,totalP); P->nr_m = nr; diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 188760492..cf1a2c8e3 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -7,7 +7,7 @@ // // Usage: // srun ./BumponTailInstabilityPinT -// --info 5 +// --info 5 // nmx = No. of Fourier modes in the x-direction // nmy = No. of Fourier modes in the y-direction // nmz = No. of Fourier modes in the z-direction @@ -15,10 +15,11 @@ // ny = No. of grid points in the y-direction // nz = No. of grid points in the z-direction // Np = Total no. of macro-particles in the simulation +// nCycles = No. of Parareal blocks/cycles // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) // Example: -// srun ./BumponTailInstabilityPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 100 B-spline 1 --info 5 +// srun ./BumponTailInstabilityPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 4 B-spline 1 --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index f08a275b4..da2491d49 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -7,7 +7,7 @@ // // Usage: // srun ./LandauDampingPinT -// --info 5 +// --info 5 // nmx = No. of Fourier modes in the x-direction // nmy = No. of Fourier modes in the y-direction // nmz = No. of Fourier modes in the z-direction @@ -15,10 +15,11 @@ // ny = No. of grid points in the y-direction // nz = No. of grid points in the z-direction // Np = Total no. of macro-particles in the simulation +// nCycles = No. of Parareal blocks/cycles // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) // Example: -// srun ./LandauDampingPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 100 B-spline 1 --info 5 +// srun ./LandauDampingPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 4 B-spline 1 --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -404,33 +405,18 @@ int main(int argc, char *argv[]){ const size_type totalP = std::atoll(argv[7]); const double tEnd = std::atof(argv[8]); - const double dtSlice = tEnd / Ippl::Comm->size(); + const unsigned int nCycles = std::atoi(argv[12]); + double tEndCycle = tEnd / nCycles; + const double dtSlice = tEndCycle / Ippl::Comm->size(); const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); const unsigned int ntFine = std::ceil(dtSlice / dtFine); const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); - const unsigned int maxIter = std::atoi(argv[12]); - const double tStartMySlice = Ippl::Comm->rank() * dtSlice; + //const double tStartMySlice = Ippl::Comm->rank() * dtSlice; //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; - msg << "Parareal " - << TestName - << endl - << "Slice dT: " << dtSlice - << endl - << "No. of fine time steps: " << ntFine - << endl - << "No. of coarse time steps: " << ntCoarse - << endl - << "Tolerance: " << tol - << " Max. iterations: " << maxIter - << endl - << "Np= " << totalP - << " Fourier modes = " << nmPIF - << " Grid points = " << nrPIC - << endl; using bunch_type = ChargedParticlesPinT; using states_begin_type = StatesBeginSlice; @@ -498,7 +484,6 @@ int main(int argc, char *argv[]){ //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); - Pcoarse->time_m = tStartMySlice; IpplTimings::startTimer(particleCreation); @@ -506,8 +491,6 @@ int main(int argc, char *argv[]){ for (unsigned d = 0; d next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + //if(Ippl::Comm->rank() == 0) { + // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + // Kokkos::parallel_for(nloc, + // generate_random, Dim>( + // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + + // Kokkos::fence(); + // size_type bufSize = Pbegin->packedSize(nloc); + // std::vector requests(0); + // int sends = 0; + // for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); + // requests.resize(requests.size() + 1); + // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); + // buf->resetWritePos(); + // ++sends; + // } + // MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + //} + //else { + // size_type bufSize = Pbegin->packedSize(nloc); + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + // buf->resetReadPos(); + //} + //Ippl::Comm->barrier(); + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(Ippl::Comm->rank() == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); Kokkos::parallel_for(nloc, generate_random, Dim>( Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + Kokkos::fence(); - size_type bufSize = Pbegin->packedSize(nloc); - std::vector requests(0); - int sends = 0; - for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); - requests.resize(requests.size() + 1); - Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); - buf->resetWritePos(); - ++sends; - } - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } else { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); buf->resetReadPos(); } - Ippl::Comm->barrier(); + + + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pbegin, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + + //Ippl::Comm->barrier(); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); @@ -561,6 +579,23 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); #endif + msg << "Parareal " + << TestName + << endl + << "Slice dT: " << dtSlice + << endl + << "No. of fine time steps: " << ntFine + << endl + << "No. of coarse time steps: " << ntCoarse + << endl + << "Tolerance: " << tol + << " No. of cycles: " << nCycles + << endl + << "Np= " << totalP + << " Fourier modes = " << nmPIF + << " Grid points = " << nrPIC + << endl; + Pcoarse->q = Pcoarse->Q_m/nloc; IpplTimings::stopTimer(particleCreation); @@ -568,54 +603,56 @@ int main(int argc, char *argv[]){ msg << "particles created and initial conditions assigned " << endl; //Copy initial conditions as they are needed later - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + + ////Get initial guess for ranks other than 0 by propagating the coarse solver + //IpplTimings::startTimer(coarsePropagator); + //if (Ippl::Comm->rank() > 0) { + // Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); + //} + // + //Ippl::Comm->barrier(); + // + //IpplTimings::stopTimer(coarsePropagator); + //msg << "First Leap frog PIC done " << endl; - //Get initial guess for ranks other than 0 by propagating the coarse solver - IpplTimings::startTimer(coarsePropagator); - if (Ippl::Comm->rank() > 0) { - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); - } - - Ippl::Comm->barrier(); - - IpplTimings::stopTimer(coarsePropagator); - - msg << "First Leap frog PIC done " << endl; + // + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); + ////Run the coarse integrator to get the values at the end of the time slice + //IpplTimings::startTimer(coarsePropagator); + //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + //IpplTimings::stopTimer(coarsePropagator); + //msg << "Second Leap frog PIC done " << endl; - //Run the coarse integrator to get the values at the end of the time slice - IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - IpplTimings::stopTimer(coarsePropagator); - msg << "Second Leap frog PIC done " << endl; + ////The following might not be needed + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); + //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); + //IpplTimings::stopTimer(deepCopy); - //The following might not be needed - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); + //msg << "Starting parareal iterations ..." << endl; + //bool isConverged = false; + //bool isPreviousDomainConverged; + //if(Ippl::Comm->rank() == 0) { + // isPreviousDomainConverged = true; + //} + //else { + // isPreviousDomainConverged = false; + //} - msg << "Starting parareal iterations ..." << endl; - bool isConverged = false; - bool isPreviousDomainConverged; - if(Ippl::Comm->rank() == 0) { - isPreviousDomainConverged = true; - } - else { - isPreviousDomainConverged = false; - } + bool isConverged, isPreviousDomainConverged; Pcoarse->shapetype_m = argv[13]; Pcoarse->shapedegree_m = std::atoi(argv[14]); @@ -625,113 +662,135 @@ int main(int argc, char *argv[]){ Pcoarse->initNUFFT(FLPIF); - //Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R0.getView()); - //Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P0.getView()); - //Pcoarse->LeapFrogPIF(Pcoarse->RprevIter, Pcoarse->PprevIter, (Ippl::Comm->rank()+1)*ntFine, - // dtFine, isConverged, tStartMySlice, 0); - //Ippl::Comm->barrier(); + for (unsigned int nc=0; nc < nCycles; nc++) { + double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); + Pcoarse->time_m = tStartMySlice; + Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, + isPreviousDomainConverged, ntCoarse, + dtCoarse, tStartMySlice); + unsigned int it = 0; + while (!isConverged) { + //Run fine integrator in parallel + IpplTimings::startTimer(finePropagator); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, nc+1, it+1); + IpplTimings::stopTimer(finePropagator); - //unsigned int maxIterRank; - for (unsigned int it=0; itLeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, it+1); - IpplTimings::stopTimer(finePropagator); - + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; - //Difference = Fine - Coarse - Pend->R = Pbegin->R - Pcoarse->R; - Pend->P = Pbegin->P - Pcoarse->P; + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + buf->resetReadPos(); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, + Ippl::getComm(), MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + } + IpplTimings::stopTimer(timeCommunication); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); - Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); - IpplTimings::stopTimer(deepCopy); - - IpplTimings::startTimer(timeCommunication); - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - int tagbool = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - - if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { - size_type bufSize = Pbegin->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); - buf->resetReadPos(); - MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, - Ippl::getComm(), MPI_STATUS_IGNORE); IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - } - IpplTimings::stopTimer(timeCommunication); - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); - Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); - Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - IpplTimings::stopTimer(deepCopy); - - IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - IpplTimings::stopTimer(coarsePropagator); - - Pend->R = Pend->R + Pcoarse->R; - Pend->P = Pend->P + Pcoarse->P; - - PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); - IpplTimings::startTimer(computeErrors); - double localRerror, localPerror; - double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + + IpplTimings::startTimer(coarsePropagator); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + IpplTimings::stopTimer(coarsePropagator); + + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; + + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); + IpplTimings::startTimer(computeErrors); + double localRerror, localPerror; + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - //double Rerror = computeRL2Error(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - //double Perror = computePL2Error(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - //double EfieldError = 0; - //if(it > 0) { - // EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); - //} - IpplTimings::stopTimer(computeErrors); - - if((Rerror <= tol) && (Perror <= tol)) { - isConverged = true; - } - - - IpplTimings::startTimer(timeCommunication); - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { - size_type bufSize = Pend->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); - MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); - buf->resetWritePos(); - MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); - } - IpplTimings::stopTimer(timeCommunication); + //double Rerror = computeRL2Error(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); + //double Perror = computePL2Error(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + //double EfieldError = 0; + //if(it > 0) { + // EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); + //} + IpplTimings::stopTimer(computeErrors); + + + if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { + isConverged = true; + } + + IpplTimings::startTimer(timeCommunication); + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); + } + IpplTimings::stopTimer(timeCommunication); + + + msg << "Finished iteration: " << it+1 + << " in cycle: " << nc+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << endl; + IpplTimings::startTimer(dumpData); + //Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1); + IpplTimings::stopTimer(dumpData); + + it += 1; + } - msg << "Finished iteration: " << it+1 - << " Rerror: " << Rerror - << " Perror: " << Perror - << endl; - - IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, it+1); - IpplTimings::stopTimer(dumpData); - - if(isConverged && isPreviousDomainConverged) { - //maxIterRank = it+1; - break; + Ippl::Comm->barrier(); + if((nCycles > 1) && (nc < (nCycles - 1))) { + IpplTimings::startTimer(timeCommunication); + tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); + buf->resetReadPos(); + } + if(Ippl::Comm->rank() > 0) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); } - } - //std::cout << "Rank " << Ippl::Comm->rank() << " is out of the loop in iteration: " << maxIterRank << std::endl; - Ippl::Comm->barrier(); msg << TestName << " Parareal: End." << endl; IpplTimings::stopTimer(mainTimer); IpplTimings::print(); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 1f3f411c6..730f63d68 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -19,7 +19,7 @@ // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) // Example: -// srun ./PenningTrapPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 100 B-spline 1 --info 5 +// srun ./PenningTrapPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 4 B-spline 1 --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index a244e7816..a020c4c79 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -200,8 +200,8 @@ int main(int argc, char *argv[]) { } const double& val = Qview(idx); - innerReduce += (Kokkos::Experimental::cos(arg) - - imag * Kokkos::Experimental::sin(arg)) * val; + innerReduce += (Kokkos::cos(arg) + - imag * Kokkos::sin(arg)) * val; }, Kokkos::Sum>(reducedValue)); if(teamMember.team_rank() == 0) { @@ -254,8 +254,8 @@ int main(int argc, char *argv[]) { arg += kVec[d]*Rview(idx)[d]; } - valL += (Kokkos::Experimental::cos(arg) - - imag * Kokkos::Experimental::sin(arg)) * Qview(idx); + valL += (Kokkos::cos(arg) + - imag * Kokkos::sin(arg)) * Qview(idx); }, Kokkos::Sum>(reducedValue)); double abs_error_real = std::fabs(reducedValue.real() - field_result(iInd, jInd, kInd).real()); diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp index d48abe9fd..8ffaf6827 100644 --- a/test/FFT/TestNUFFT2.cpp +++ b/test/FFT/TestNUFFT2.cpp @@ -204,8 +204,8 @@ int main(int argc, char *argv[]) { arg += (iVec[d] - (pt[d]/2)) * Rview(idx)[d]; } - valL += (Kokkos::Experimental::cos(arg) - + imag * Kokkos::Experimental::sin(arg)) * fview(i + nghost, j + nghost, k + nghost); + valL += (Kokkos::cos(arg) + + imag * Kokkos::sin(arg)) * fview(i + nghost, j + nghost, k + nghost); }, Kokkos::Sum>(reducedValue)); double abs_error_real = std::fabs(reducedValue.real() - Q_result(idx)); From 62dba0f14e654edb0a6d4b9194c382f9b334157c Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 17 Apr 2023 09:23:39 +0200 Subject: [PATCH 077/117] Added missing critical Kokkos::fence() after NUFFT as otherwise we have issues with Kokkos 4.0.00 --- alpine/PinT/ChargedParticlesPinT.hpp | 3 +-- src/FFT/FFT.hpp | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index f324d2e26..dedc3050d 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -555,7 +555,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { valL += myVal; }, Kokkos::Sum(temp)); - double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); potentialEnergy = 0.5 * temp * volume; @@ -1113,7 +1112,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); P2view(j)[2] += alpha * E2view(j)[2]; }); - + time_m += dt; IpplTimings::startTimer(dumpData); diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index b28196de7..b1b4bea19 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -838,6 +838,9 @@ namespace ippl { ParticleAttrib& Q, typename FFT::ComplexField_t& f) { + + //Inform m("FFT "); + auto fview = f.getView(); auto Rview = R.getView(); auto Qview = Q.getView(); @@ -915,6 +918,7 @@ namespace ippl { NULL, NULL, NULL, plan_m); ier_m = nufft_m.execute(tempQ.data(), tempField.data(), plan_m); + Kokkos::fence(); if(type_m == 1) { From 456f146a271e1571741f8c9ebe5030f2e09c082d Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 17 Apr 2023 09:25:03 +0200 Subject: [PATCH 078/117] Cleanup commented Inform --- src/FFT/FFT.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index b1b4bea19..0698e1ceb 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -839,7 +839,6 @@ namespace ippl { typename FFT::ComplexField_t& f) { - //Inform m("FFT "); auto fview = f.getView(); auto Rview = R.getView(); From f3e550fdac2ecebaec002d8978a4f64f7f9c4a4c Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 17 Apr 2023 12:19:33 +0200 Subject: [PATCH 079/117] Add ifdefs for NUFFT --- src/FFT/FFT.h | 10 +++++++++- src/FFT/FFT.hpp | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 2b41a9495..890dfb381 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -30,7 +30,9 @@ #include #include -#include +#ifdef ENABLE_NUFFT + #include +#endif #include #include #include @@ -69,11 +71,13 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; +#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA /** Tag classes for Non-uniform type of Fourier transforms */ class NUFFTransform {}; +#endif #endif enum FFTComm { @@ -123,6 +127,7 @@ namespace ippl { #endif #endif +#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA template struct CufinufftType {}; @@ -152,6 +157,7 @@ namespace ippl { using complexType = cuDoubleComplex; using plan_t = cufinufft_plan; }; +#endif #endif } @@ -338,6 +344,7 @@ namespace ippl { }; +#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA /** Non-uniform FFT class @@ -388,6 +395,7 @@ namespace ippl { } #endif +#endif #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 0698e1ceb..a933089ef 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -750,6 +750,7 @@ namespace ippl { } +#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA //========================================================================= // FFT NUFFTransform Constructors @@ -954,6 +955,7 @@ namespace ippl { } #endif +#endif } // vi: set et ts=4 sw=4 sts=4: From 846e3da19dab37a85e0a4ab53adfcc48b255c841 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 18 Apr 2023 15:18:40 +0200 Subject: [PATCH 080/117] Revert "Add ifdefs for NUFFT" This reverts commit f3e550fdac2ecebaec002d8978a4f64f7f9c4a4c. --- src/FFT/FFT.h | 10 +--------- src/FFT/FFT.hpp | 2 -- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 890dfb381..2b41a9495 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -30,9 +30,7 @@ #include #include -#ifdef ENABLE_NUFFT - #include -#endif +#include #include #include #include @@ -71,13 +69,11 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; -#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA /** Tag classes for Non-uniform type of Fourier transforms */ class NUFFTransform {}; -#endif #endif enum FFTComm { @@ -127,7 +123,6 @@ namespace ippl { #endif #endif -#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA template struct CufinufftType {}; @@ -157,7 +152,6 @@ namespace ippl { using complexType = cuDoubleComplex; using plan_t = cufinufft_plan; }; -#endif #endif } @@ -344,7 +338,6 @@ namespace ippl { }; -#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA /** Non-uniform FFT class @@ -395,7 +388,6 @@ namespace ippl { } #endif -#endif #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index a933089ef..0698e1ceb 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -750,7 +750,6 @@ namespace ippl { } -#ifdef ENABLE_NUFFT #ifdef KOKKOS_ENABLE_CUDA //========================================================================= // FFT NUFFTransform Constructors @@ -955,7 +954,6 @@ namespace ippl { } #endif -#endif } // vi: set et ts=4 sw=4 sts=4: From d3f288ae6d393cf29cc9d4ece7c9c7cc70ed4c6f Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 25 Apr 2023 07:41:11 +0200 Subject: [PATCH 081/117] PenningTrap PIF with NUFFT made --- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index 1c81783b9..2f40467b4 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -261,6 +261,20 @@ int main(int argc, char *argv[]){ P->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-4); + + fftParams.add("use_cufinufft_defaults", false); + + + P->fft = std::make_shared(FL, 1, fftParams); + + P->q.initializeNUFFT(FL, 1, fftParams); + P->E.initializeNUFFT(FL, 2, fftParams); P->scatter(); From b7d6c2991dea4027bafb73c5d950f1ef2aebcfed Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 27 Apr 2023 10:34:06 +0200 Subject: [PATCH 082/117] tolerance changed in PenningTrap PIF and NUFFT init moved to a separate function --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 19 ++++++++++++++----- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 17 +---------------- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 15 +-------------- alpine/PinT/PenningTrapPinT.cpp | 5 ++++- 4 files changed, 20 insertions(+), 36 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index e2688105f..51c308f8f 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -42,7 +42,6 @@ typedef Field Field_t; typedef Field, Dim> CxField_t; typedef Field VField_t; -typedef ippl::FFT FFT_type; const double pi = std::acos(-1.0); @@ -76,7 +75,6 @@ class ChargedParticlesPIF : public ippl::ParticleBase { int shapedegree_m; - std::shared_ptr fft; public: ParticleAttrib q; // charge @@ -126,6 +124,20 @@ class ChargedParticlesPIF : public ippl::ParticleBase { setBCAllPeriodic(); } + void initNUFFT(FieldLayout_t& FL) { + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-6); + + fftParams.add("use_cufinufft_defaults", false); + + q.initializeNUFFT(FL, 1, fftParams); + E.initializeNUFFT(FL, 2, fftParams); + } + void gather() { gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, this->q); @@ -142,15 +154,12 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Inform m("scatter "); rho_m = {0.0, 0.0}; scatterPIFNUFFT(q, rho_m, Sk_m, this->R); - //fft->transform(this->R, q, rho_m); //rhoDFT_m = {0.0, 0.0}; //scatterPIFNUDFT(q, rho_m, Sk_m, this->R); //dumpFieldData(); rho_m = rho_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); - //rhoDFT_m = rhoDFT_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); - } diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index d0e9d3b92..0ed48fc0e 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -270,22 +270,7 @@ int main(int argc, char *argv[]){ P->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - - ippl::ParameterList fftParams; - - fftParams.add("gpu_method", 1); - fftParams.add("gpu_sort", 1); - fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-4); - - fftParams.add("use_cufinufft_defaults", false); - - - P->fft = std::make_shared(FL, 1, fftParams); - - P->q.initializeNUFFT(FL, 1, fftParams); - P->E.initializeNUFFT(FL, 2, fftParams); - + P->initNUFFT(FL); P->scatter(); diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index 2f40467b4..dfe082298 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -261,20 +261,7 @@ int main(int argc, char *argv[]){ P->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - ippl::ParameterList fftParams; - - fftParams.add("gpu_method", 1); - fftParams.add("gpu_sort", 1); - fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-4); - - fftParams.add("use_cufinufft_defaults", false); - - - P->fft = std::make_shared(FL, 1, fftParams); - - P->q.initializeNUFFT(FL, 1, fftParams); - P->E.initializeNUFFT(FL, 2, fftParams); + P->initNUFFT(FL); P->scatter(); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 730f63d68..5285d992a 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -402,6 +402,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); + static IpplTimings::TimerRef initializeCycles = IpplTimings::getTimer("initializeCycles"); IpplTimings::startTimer(mainTimer); @@ -585,7 +586,7 @@ int main(int argc, char *argv[]){ minU, maxU)); Kokkos::fence(); - Ippl::Comm->barrier(); + //Ippl::Comm->barrier(); #endif @@ -676,9 +677,11 @@ int main(int argc, char *argv[]){ for (unsigned int nc=0; nc < nCycles; nc++) { double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); Pcoarse->time_m = tStartMySlice; + IpplTimings::startTimer(initializeCycles); Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, isPreviousDomainConverged, ntCoarse, dtCoarse, tStartMySlice, Bext); + IpplTimings::stopTimer(initializeCycles); unsigned int it = 0; while (!isConverged) { //while ((!isPreviousDomainConverged) || (!isConverged)) { From ed6d72185faf5575a18c9da406a736af54b6d4a0 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 30 May 2023 08:17:21 +0200 Subject: [PATCH 083/117] Initial space-time distributed parallel code made. Need to compile and test. --- alpine/PinT/ChargedParticlesPinT.hpp | 36 ++++----- alpine/PinT/PenningTrapPinT.cpp | 110 +++++++++++++++++++-------- src/Communicate/Communicate.cpp | 8 +- src/Communicate/Communicate.h | 13 ++-- src/Particle/ParticleAttrib.hpp | 62 +++++++++------ 5 files changed, 150 insertions(+), 79 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index dedc3050d..a86b385d5 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -630,27 +630,29 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } } - void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter) { + void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter, int rankTime) { - std::stringstream fname; - fname << "data/localError_rank_"; - fname << Ippl::Comm->rank(); - fname << "_nc_"; - fname << nc; - fname << ".csv"; - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); + if(Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/localError_rank_"; + fname << rankTime; + fname << "_nc_"; + fname << nc; + fname << ".csv"; + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, rankTime); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(iter == 1) { + csvout << "Iter, Rerror, Perror" << endl; + } - if(iter == 1) { - csvout << "Iter, Rerror, Perror" << endl; + csvout << iter << " " + << Rerror << " " + << Perror << endl; } - csvout << iter << " " - << Rerror << " " - << Perror << endl; - } diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 5285d992a..09a09d888 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -147,8 +147,7 @@ double CDF(const double& x, const double& mu, const double& sigma) { } double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - Vector_t& length) { + Vector_t& length) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -174,16 +173,19 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - lError = std::sqrt(localError)/std::sqrt(localNorm); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //lError = std::sqrt(localError)/std::sqrt(localNorm); - double relError = lError;//absError / std::sqrt(globaltemp); + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); return relError; } -double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -200,9 +202,13 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - lError = std::sqrt(localError)/std::sqrt(localNorm); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //lError = std::sqrt(localError)/std::sqrt(localNorm); - double relError = lError;//absError / std::sqrt(globaltemp); + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); return relError; @@ -376,7 +382,31 @@ double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { const char* TestName = "PenningTrapPinT"; int main(int argc, char *argv[]){ - Ippl ippl(argc, argv); + + int rankWorld, sizeWorld; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld); + + int spaceColor, timeColor; + MPI_Comm spaceComm, timeComm; + + int spaceProcs = std::atoi(argv[13]); + int timeProcs = std::atoi(argv[14]); + spaceColor = rankWorld / spaceProcs; + timeColor = rankWorld % spaceProcs; + + MPI_Comm_split(MPI_COMM_WORLD, spaceColor, rankWorld, &spaceComm); + MPI_Comm_split(MPI_COMM_WORLD, timeColor, rankWorld, &timeComm); + + int rankSpace, sizeSpace, rankTime, sizeTime; + MPI_Comm_rank(spaceComm, &rankSpace); + MPI_Comm_size(spaceComm, &sizeSpace); + + MPI_Comm_rank(timeComm, &rankTime); + MPI_Comm_size(timeComm, &sizeTime); + + Ippl ippl(argc, argv, spaceComm); Inform msg(TestName, Ippl::Comm->size()-1); Inform msg2all(TestName,INFORM_ALL_NODES); @@ -410,7 +440,7 @@ int main(int argc, char *argv[]){ const double tEnd = std::atof(argv[8]); const unsigned int nCycles = std::atoi(argv[12]); double tEndCycle = tEnd / nCycles; - const double dtSlice = tEndCycle / Ippl::Comm->size(); + const double dtSlice = tEndCycle / sizeTime; const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); const unsigned int ntFine = std::ceil(dtSlice / dtFine); @@ -474,8 +504,18 @@ int main(int argc, char *argv[]){ FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); PLayout_t PL(FLPIC, meshPIC); - size_type nloc = totalP; + size_type nloc = (size_type)(totalP / sizeSpace); + + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + int rest = (int) (totalP - Total_particles); + + if ( Ippl::Comm->rank() < rest ) { + ++nloc; + } double Q = -1562.5; double Bext = 5.0; @@ -546,8 +586,9 @@ int main(int argc, char *argv[]){ //condition is not the same on different GPUs tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if(Ippl::Comm->rank() == 0) { - Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + //if(Ippl::Comm->rank() == 0) { + if(rankTime == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); Kokkos::parallel_for(nloc, generate_random, Dim>( Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, @@ -559,16 +600,16 @@ int main(int argc, char *argv[]){ else { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + if(rankTime < sizeTime-1) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pbegin, *buf, request, nloc); + Ippl::Comm->isend(rankTime+1, tag, *Pbegin, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } @@ -675,7 +716,7 @@ int main(int argc, char *argv[]){ for (unsigned int nc=0; nc < nCycles; nc++) { - double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); + double tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); Pcoarse->time_m = tStartMySlice; IpplTimings::startTimer(initializeCycles); Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, @@ -710,13 +751,13 @@ int main(int argc, char *argv[]){ tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { + if((rankTime > 0) && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); - MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, - Ippl::getComm(), MPI_STATUS_IGNORE); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-1, tagbool, + timeComm, MPI_STATUS_IGNORE); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); @@ -742,9 +783,9 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); - double localRerror, localPerror; - double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + //double localRerror, localPerror; + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter); IpplTimings::stopTimer(computeErrors); @@ -754,14 +795,14 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(timeCommunication); - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + if(rankTime < (sizeTime-1)) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); + MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+1, tagbool, timeComm); } IpplTimings::stopTimer(timeCommunication); @@ -774,10 +815,12 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1); + Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1, rankTime); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); + MPI_Barrier(spaceComm); + it += 1; //if(isConverged && isPreviousDomainConverged) { // break; @@ -785,24 +828,25 @@ int main(int argc, char *argv[]){ } //std::cout << "Before barrier in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; - Ippl::Comm->barrier(); + //Ippl::Comm->barrier(); + MPI_Barrier(MPI_COMM_WORLD); //msg << "Communication started in cycle: " << nc+1 << endl; //std::cout << "Communication started in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; if((nCycles > 1) && (nc < (nCycles - 1))) { IpplTimings::startTimer(timeCommunication); tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + if(rankTime < (sizeTime-1)) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime+1, tag, *Pend, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } - if(Ippl::Comm->rank() > 0) { + if(rankTime > 0) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); + Ippl::Comm->isend(rankTime-1, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } diff --git a/src/Communicate/Communicate.cpp b/src/Communicate/Communicate.cpp index 78bf9bb82..63314a1ef 100644 --- a/src/Communicate/Communicate.cpp +++ b/src/Communicate/Communicate.cpp @@ -25,7 +25,13 @@ namespace ippl { Communicate::Communicate(int& argc, char**& argv, const MPI_Comm& comm) : comm_m(comm) { - MPI_Init(&argc, &argv); + int isInitialized; + MPI_Initialized(&isInitialized); + + if (!isInitialized) { + MPI_Init(&argc, &argv); + } + MPI_Comm_rank(comm_m, &rank_m); MPI_Comm_size(comm_m, &size_m); } diff --git a/src/Communicate/Communicate.h b/src/Communicate/Communicate.h index 7024982e2..841868a05 100644 --- a/src/Communicate/Communicate.h +++ b/src/Communicate/Communicate.h @@ -123,14 +123,14 @@ namespace ippl { */ template void recv(int src, int tag, Buffer& buffer, archive_type& ar, - size_type msize, size_type nrecvs); + size_type msize, size_type nrecvs, const MPI_Comm& comm = comm_m); /*! * \warning Only works with default spaces! */ template void isend(int dest, int tag, Buffer& buffer, archive_type&, - MPI_Request&, size_type nsends); + MPI_Request&, size_type nsends, const MPI_Comm& comm = comm_m); /*! * \warning Only works with default spaces! @@ -158,7 +158,7 @@ namespace ippl { template void Communicate::recv(int src, int tag, Buffer& buffer, archive_type& ar, - size_type msize, size_type nrecvs) + size_type msize, size_type nrecvs, const MPI_Comm& comm) { // Temporary fix. MPI communication seems to have problems when the // count argument exceeds the range of int, so large messages should @@ -169,14 +169,15 @@ namespace ippl { } MPI_Status status; MPI_Recv(ar.getBuffer(), msize, - MPI_BYTE, src, tag, comm_m, &status); + MPI_BYTE, src, tag, comm, &status); buffer.deserialize(ar, nrecvs); } template void Communicate::isend(int dest, int tag, Buffer& buffer, - archive_type& ar, MPI_Request& request, size_type nsends) + archive_type& ar, MPI_Request& request, size_type nsends, + const MPI_Comm& comm) { if (ar.getSize() > INT_MAX) { std::cerr << "Message size exceeds range of int" << std::endl; @@ -184,7 +185,7 @@ namespace ippl { } buffer.serialize(ar, nsends); MPI_Isend(ar.getBuffer(), ar.getSize(), - MPI_BYTE, dest, tag, comm_m, &request); + MPI_BYTE, dest, tag, comm, &request); } } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 39ba19e3b..673516ffc 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -160,6 +160,15 @@ namespace ippl { const NDIndex& lDom = layout.getLocalNDIndex(); const int nghost = f.getNghost(); + + Field tempField; + + tempField.initialize(mesh, layout); + + tempField = 0.0; + + view_type viewLocal = tempField.getView(); + Kokkos::parallel_for( "ParticleAttrib::scatter", *(this->localNum_mp), @@ -183,14 +192,14 @@ namespace ippl { // scatter const value_type& val = dview_m(idx); - Kokkos::atomic_add(&view(i-1, j-1, k-1), wlo[0] * wlo[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i-1, j-1, k ), wlo[0] * wlo[1] * whi[2] * val); - Kokkos::atomic_add(&view(i-1, j, k-1), wlo[0] * whi[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i-1, j, k ), wlo[0] * whi[1] * whi[2] * val); - Kokkos::atomic_add(&view(i, j-1, k-1), whi[0] * wlo[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i, j-1, k ), whi[0] * wlo[1] * whi[2] * val); - Kokkos::atomic_add(&view(i, j, k-1), whi[0] * whi[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i, j, k ), whi[0] * whi[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j-1, k-1), wlo[0] * wlo[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j-1, k ), wlo[0] * wlo[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j, k-1), wlo[0] * whi[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j, k ), wlo[0] * whi[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i, j-1, k-1), whi[0] * wlo[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i, j-1, k ), whi[0] * wlo[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i, j, k-1), whi[0] * whi[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i, j, k ), whi[0] * whi[1] * whi[2] * val); } ); IpplTimings::stopTimer(scatterPICTimer); @@ -198,7 +207,14 @@ namespace ippl { //static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("AccumulateHalo"); //IpplTimings::startTimer(accumulateHaloTimer); f.accumulateHalo(); - //IpplTimings::stopTimer(accumulateHaloTimer); + //IpplTimings::stopTimer(accumulateHaloTimer); + + static IpplTimings::TimerRef scatterAllReducePICTimer = IpplTimings::getTimer("scatterAllReducePIC"); + IpplTimings::startTimer(scatterAllReducePICTimer); + int viewSize = view.extent(0) * view.extent(1) * view.extent(2); + MPI_Allreduce(viewLocal.data(), view.data(), viewSize, + MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + IpplTimings::stopTimer(scatterAllReducePICTimer); } @@ -497,31 +513,33 @@ namespace ippl { auto q = *this; - //Field tempField; + Field tempField; + + FieldLayout& layout = f.getLayout(); + M& mesh = f.get_mesh(); - //FieldLayout& layout = f.getLayout(); - //M& mesh = f.get_mesh(); + tempField.initialize(mesh, layout); - //tempField.initialize(mesh, layout); + tempField = 0.0; - //fftType_mp->transform(pp, q, tempField); - fftType_mp->transform(pp, q, f); + fftType_mp->transform(pp, q, tempField); + //fftType_mp->transform(pp, q, f); using view_type = typename Field::view_type; view_type fview = f.getView(); - //view_type viewLocal = tempField.getView(); + view_type viewLocal = tempField.getView(); typename Field::view_type Skview = Sk.getView(); const int nghost = f.getNghost(); IpplTimings::stopTimer(scatterPIFNUFFTTimer); - //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); - //IpplTimings::startTimer(scatterAllReduceTimer); - //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); - //MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, - // MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); - //IpplTimings::stopTimer(scatterAllReduceTimer); + static IpplTimings::TimerRef scatterAllReducePIFTimer = IpplTimings::getTimer("scatterAllReducePIF"); + IpplTimings::startTimer(scatterAllReducePIFTimer); + int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); + IpplTimings::stopTimer(scatterAllReducePIFTimer); //IpplTimings::startTimer(scatterPIFNUFFTTimer); From b708c671b4982de1c70f3bbc0da213a5008a0b0b Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 30 May 2023 08:21:04 +0200 Subject: [PATCH 084/117] MPI_Comm_free added for space and time communicators --- alpine/PinT/PenningTrapPinT.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 09a09d888..9b07e1106 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -866,5 +866,8 @@ int main(int argc, char *argv[]){ IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); + MPI_Comm_free(spaceComm); + MPI_Comm_free(timeComm); + return 0; } From dfe3c75c842c1ecabe3e0339cc04b0651b61e262 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 30 May 2023 08:54:05 +0200 Subject: [PATCH 085/117] Compilation errors removed. Need to run and test --- alpine/PinT/PenningTrapPinT.cpp | 6 +++--- src/Communicate/Communicate.h | 4 ++-- src/Particle/ParticleAttrib.hpp | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 9b07e1106..c503d9445 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -815,7 +815,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1, rankTime); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); @@ -866,8 +866,8 @@ int main(int argc, char *argv[]){ IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); - MPI_Comm_free(spaceComm); - MPI_Comm_free(timeComm); + MPI_Comm_free(&spaceComm); + MPI_Comm_free(&timeComm); return 0; } diff --git a/src/Communicate/Communicate.h b/src/Communicate/Communicate.h index 841868a05..423bd08b1 100644 --- a/src/Communicate/Communicate.h +++ b/src/Communicate/Communicate.h @@ -123,14 +123,14 @@ namespace ippl { */ template void recv(int src, int tag, Buffer& buffer, archive_type& ar, - size_type msize, size_type nrecvs, const MPI_Comm& comm = comm_m); + size_type msize, size_type nrecvs, const MPI_Comm& comm = MPI_COMM_WORLD); /*! * \warning Only works with default spaces! */ template void isend(int dest, int tag, Buffer& buffer, archive_type&, - MPI_Request&, size_type nsends, const MPI_Comm& comm = comm_m); + MPI_Request&, size_type nsends, const MPI_Comm& comm = MPI_COMM_WORLD); /*! * \warning Only works with default spaces! diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 673516ffc..458fba563 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -147,7 +147,7 @@ namespace ippl { typename Field::view_type view = f.getView(); - const M& mesh = f.get_mesh(); + M& mesh = f.get_mesh(); using vector_type = typename M::vector_type; using value_type = typename ParticleAttrib::value_type; @@ -156,18 +156,18 @@ namespace ippl { const vector_type& origin = mesh.getOrigin(); const vector_type invdx = 1.0 / dx; - const FieldLayout& layout = f.getLayout(); + FieldLayout& layout = f.getLayout(); const NDIndex& lDom = layout.getLocalNDIndex(); const int nghost = f.getNghost(); - Field tempField; + Field tempField; tempField.initialize(mesh, layout); tempField = 0.0; - view_type viewLocal = tempField.getView(); + typename Field::view_type viewLocal = tempField.getView(); Kokkos::parallel_for( "ParticleAttrib::scatter", From 384256a8cb2405df59efb2e2fc576910225fabba Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 30 May 2023 16:43:37 +0200 Subject: [PATCH 086/117] space-time parallelism seems to work. Need to run more tests and confirm. --- alpine/PinT/ChargedParticlesPinT.hpp | 86 +++++++++++++++------------- alpine/PinT/PenningTrapPinT.cpp | 67 +++++++++++++--------- src/Particle/ParticleAttrib.h | 6 +- src/Particle/ParticleAttrib.hpp | 22 ++++--- 4 files changed, 103 insertions(+), 78 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index a86b385d5..d552442a1 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -182,15 +182,17 @@ class ChargedParticlesPinT : public ippl::ParticleBase { const unsigned int& ntCoarse, const double& dtCoarse, const double& tStartMySlice, - const double& Bext) { + const double& Bext, + const int& rankTime, + MPI_Comm& spaceComm) { //Copy initial conditions as they are needed later Kokkos::deep_copy(R0.getView(), this->R.getView()); Kokkos::deep_copy(P0.getView(), P.getView()); //Get initial guess for ranks other than 0 by propagating the coarse solver - if (Ippl::Comm->rank() > 0) { - BorisPIC(this->R, P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); + if (rankTime > 0) { + BorisPIC(this->R, P, rankTime*ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); } //Ippl::Comm->barrier(); @@ -200,10 +202,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //Run the coarse integrator to get the values at the end of the time slice - BorisPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice, Bext); + BorisPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); isConverged = false; - if(Ippl::Comm->rank() == 0) { + if(rankTime == 0) { isPreviousDomainConverged = true; } else { @@ -487,7 +489,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void dumpEnergy(size_type /*totalP*/, const unsigned int& nc, - const unsigned int& iter, ParticleAttrib& Ptemp) { + const unsigned int& iter, ParticleAttrib& Ptemp, + int rankTime, int rankSpace, const MPI_Comm& spaceComm = MPI_COMM_WORLD) { double potentialEnergy, kineticEnergy; @@ -572,32 +575,34 @@ class ChargedParticlesPinT : public ippl::ParticleBase { }, Kokkos::Sum(temp)); temp *= 0.5; - //globaltemp = 0.0; - double globaltemp = temp; - //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double globaltemp = 0.0; + //double globaltemp = temp; + MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, spaceComm); kineticEnergy = globaltemp; - std::stringstream fname; - fname << "data/Energy_rank_"; - fname << Ippl::Comm->rank(); - fname << "_nc_"; - fname << nc; - fname << "_iter_"; - fname << iter; - fname << ".csv"; + if(rankSpace == 0) { + std::stringstream fname; + fname << "data/Energy_rank_"; + fname << rankTime; + fname << "_nc_"; + fname << nc; + fname << "_iter_"; + fname << iter; + fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); - //csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + //csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; - csvout << time_m << " " - << potentialEnergy << " " - << kineticEnergy << " " - << potentialEnergy + kineticEnergy << endl; + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << endl; + } } @@ -630,9 +635,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } } - void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter, int rankTime) { + void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter, int rankTime, int rankSpace) { - if(Ippl::Comm->rank() == 0) { + //if(Ippl::Comm->rank() == 0) { + if(rankSpace == 0) { std::stringstream fname; fname << "data/localError_rank_"; fname << rankTime; @@ -640,7 +646,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { fname << nc; fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, rankTime); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); @@ -840,16 +846,15 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } - void BorisPIC(ParticleAttrib& Rtemp, - ParticleAttrib& Ptemp, const unsigned int nt, - const double dt, const double& tStartMySlice, const double& Bext) { + void BorisPIC(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, + const double dt, const double& tStartMySlice, const double& Bext, MPI_Comm& spaceComm) { static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIC_m = 0.0; - scatter(q, rhoPIC_m, Rtemp); + scatter(q, rhoPIC_m, Rtemp, spaceComm); rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); @@ -906,7 +911,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //scatter the charge onto the underlying grid rhoPIC_m = 0.0; - scatter(q, rhoPIC_m, Rtemp); + scatter(q, rhoPIC_m, Rtemp, spaceComm); rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); @@ -1019,16 +1024,17 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const bool& /*isConverged*/, - const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter, const double& Bext) { + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, const double& Bext, + int rankTime, int rankSpace, + MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -1041,7 +1047,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } double alpha = -0.5 * dt; @@ -1087,7 +1093,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -1118,7 +1124,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index c503d9445..73802f9a0 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -147,7 +147,7 @@ double CDF(const double& x, const double& mu, const double& sigma) { } double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - Vector_t& length) { + Vector_t& length, MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -174,9 +174,11 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp Kokkos::fence(); double globalError = 0.0; - MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); double globalNorm = 0.0; - MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); //lError = std::sqrt(localError)/std::sqrt(localNorm); double relError = std::sqrt(globalError) / std::sqrt(globalNorm); @@ -185,7 +187,7 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp } -double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter) { +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -203,9 +205,11 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp Kokkos::fence(); double globalError = 0.0; - MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); double globalNorm = 0.0; - MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + //MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); //lError = std::sqrt(localError)/std::sqrt(localNorm); double relError = std::sqrt(globalError) / std::sqrt(globalNorm); @@ -383,21 +387,27 @@ const char* TestName = "PenningTrapPinT"; int main(int argc, char *argv[]){ - int rankWorld, sizeWorld; - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); - MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld); + Ippl ippl(argc, argv); + + //int rankWorld, sizeWorld; + //MPI_Init(&argc, &argv); + //MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + //MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld); int spaceColor, timeColor; MPI_Comm spaceComm, timeComm; - int spaceProcs = std::atoi(argv[13]); - int timeProcs = std::atoi(argv[14]); - spaceColor = rankWorld / spaceProcs; - timeColor = rankWorld % spaceProcs; + int spaceProcs = std::atoi(argv[15]); + int timeProcs = std::atoi(argv[16]); + //spaceColor = rankWorld / spaceProcs; + //timeColor = rankWorld % spaceProcs; + spaceColor = Ippl::Comm->rank() / spaceProcs; + timeColor = Ippl::Comm->rank() % spaceProcs; - MPI_Comm_split(MPI_COMM_WORLD, spaceColor, rankWorld, &spaceComm); - MPI_Comm_split(MPI_COMM_WORLD, timeColor, rankWorld, &timeComm); + //MPI_Comm_split(MPI_COMM_WORLD, spaceColor, rankWorld, &spaceComm); + //MPI_Comm_split(MPI_COMM_WORLD, timeColor, rankWorld, &timeComm); + MPI_Comm_split(Ippl::getComm(), spaceColor, Ippl::Comm->rank(), &spaceComm); + MPI_Comm_split(Ippl::getComm(), timeColor, Ippl::Comm->rank(), &timeComm); int rankSpace, sizeSpace, rankTime, sizeTime; MPI_Comm_rank(spaceComm, &rankSpace); @@ -406,8 +416,9 @@ int main(int argc, char *argv[]){ MPI_Comm_rank(timeComm, &rankTime); MPI_Comm_size(timeComm, &sizeTime); - Ippl ippl(argc, argv, spaceComm); + //Ippl ippl(argc, argv, spaceComm); + //Inform msg(TestName, sizeSpace-1); Inform msg(TestName, Ippl::Comm->size()-1); Inform msg2all(TestName,INFORM_ALL_NODES); @@ -508,8 +519,10 @@ int main(int argc, char *argv[]){ size_type Total_particles = 0; + //MPI_Allreduce(&nloc, &Total_particles, 1, + // MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); MPI_Allreduce(&nloc, &Total_particles, 1, - MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); int rest = (int) (totalP - Total_particles); @@ -519,7 +532,7 @@ int main(int argc, char *argv[]){ double Q = -1562.5; double Bext = 5.0; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,nloc); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,totalP); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -643,12 +656,12 @@ int main(int argc, char *argv[]){ //<< " Max. iterations: " << maxIter << " No. of cycles: " << nCycles << endl - << "Np= " << nloc + << "Np= " << totalP << " Fourier modes = " << nmPIF << " Grid points = " << nrPIC << endl; - Pcoarse->q = Pcoarse->Q_m/nloc; + Pcoarse->q = Pcoarse->Q_m/totalP; IpplTimings::stopTimer(particleCreation); msg << "particles created and initial conditions assigned " << endl; @@ -721,7 +734,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeCycles); Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, isPreviousDomainConverged, ntCoarse, - dtCoarse, tStartMySlice, Bext); + dtCoarse, tStartMySlice, Bext, rankTime, spaceComm); IpplTimings::stopTimer(initializeCycles); unsigned int it = 0; while (!isConverged) { @@ -730,7 +743,7 @@ int main(int argc, char *argv[]){ //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, nc+1, it+1, Bext); + Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, Bext, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -773,7 +786,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -784,8 +797,8 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); //double localRerror, localPerror; - double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length); - double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); IpplTimings::stopTimer(computeErrors); @@ -815,7 +828,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index a50bb9007..aeb0df5f0 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -157,7 +157,8 @@ namespace ippl { template void scatter(Field& f, - const ParticleAttrib, Properties... >& pp) const; + const ParticleAttrib, Properties... >& pp, + const MPI_Comm& spaceComm) const; template void @@ -181,7 +182,8 @@ namespace ippl { template void scatterPIFNUFFT(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp) const; + const ParticleAttrib, Properties... >& pp, + const MPI_Comm& spaceComm) const; template void diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 458fba563..6a4047742 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -139,7 +139,8 @@ namespace ippl { template template void ParticleAttrib::scatter(Field& f, - const ParticleAttrib< Vector, Properties... >& pp) + const ParticleAttrib< Vector, Properties... >& pp, + const MPI_Comm& spaceComm) const { static IpplTimings::TimerRef scatterPICTimer = IpplTimings::getTimer("ScatterPIC"); @@ -206,14 +207,14 @@ namespace ippl { //static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("AccumulateHalo"); //IpplTimings::startTimer(accumulateHaloTimer); - f.accumulateHalo(); + tempField.accumulateHalo(); //IpplTimings::stopTimer(accumulateHaloTimer); static IpplTimings::TimerRef scatterAllReducePICTimer = IpplTimings::getTimer("scatterAllReducePIC"); IpplTimings::startTimer(scatterAllReducePICTimer); int viewSize = view.extent(0) * view.extent(1) * view.extent(2); MPI_Allreduce(viewLocal.data(), view.data(), viewSize, - MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_DOUBLE, MPI_SUM, spaceComm); IpplTimings::stopTimer(scatterAllReducePICTimer); } @@ -504,7 +505,8 @@ namespace ippl { template template void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, - const ParticleAttrib< Vector, Properties... >& pp) + const ParticleAttrib< Vector, Properties... >& pp, + const MPI_Comm& spaceComm) const { @@ -538,7 +540,7 @@ namespace ippl { IpplTimings::startTimer(scatterAllReducePIFTimer); int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, - MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); + MPI_C_DOUBLE_COMPLEX, MPI_SUM, spaceComm); IpplTimings::stopTimer(scatterAllReducePIFTimer); //IpplTimings::startTimer(scatterPIFNUFFTTimer); @@ -648,10 +650,11 @@ namespace ippl { template inline void scatterPIFNUFFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp, + const MPI_Comm& spaceComm = MPI_COMM_WORLD) { #ifdef KOKKOS_ENABLE_CUDA - attrib.scatterPIFNUFFT(f, Sk, pp); + attrib.scatterPIFNUFFT(f, Sk, pp, spaceComm); #else //throw IpplException("scatterPIFNUFFT", "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to // be compiled with CUDA. Otherwise use scatterPIFNUDFT."); @@ -682,9 +685,10 @@ namespace ippl { template inline void scatter(const ParticleAttrib& attrib, Field& f, - const ParticleAttrib, Properties...>& pp) + const ParticleAttrib, Properties...>& pp, + const MPI_Comm& spaceComm = MPI_COMM_WORLD) { - attrib.scatter(f, pp); + attrib.scatter(f, pp, spaceComm); } template From 18498227d6a7c78b0710b9ff007ad984eb92920c Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 1 Jun 2023 11:45:22 +0200 Subject: [PATCH 087/117] In the middle of debugging --- alpine/PinT/ChargedParticlesPinT.hpp | 37 +++-- alpine/PinT/PenningTrapPinT.cpp | 211 ++++++++++++++++++++++----- 2 files changed, 198 insertions(+), 50 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index d552442a1..7e86b6324 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -177,6 +177,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void initializeParareal(ParticleAttrib& Rbegin, ParticleAttrib& Pbegin, + ParticleAttrib& Rcoarse, + ParticleAttrib& Pcoarse, + ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, bool& isConverged, bool& isPreviousDomainConverged, const unsigned int& ntCoarse, @@ -187,22 +191,33 @@ class ChargedParticlesPinT : public ippl::ParticleBase { MPI_Comm& spaceComm) { //Copy initial conditions as they are needed later - Kokkos::deep_copy(R0.getView(), this->R.getView()); - Kokkos::deep_copy(P0.getView(), P.getView()); + //Kokkos::deep_copy(R0.getView(), this->R.getView()); + //Kokkos::deep_copy(P0.getView(), P.getView()); + Kokkos::deep_copy(Rtemp.getView(), Rcoarse.getView()); + Kokkos::deep_copy(Ptemp.getView(), Pcoarse.getView()); //Get initial guess for ranks other than 0 by propagating the coarse solver if (rankTime > 0) { - BorisPIC(this->R, P, rankTime*ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + //BorisPIC(this->R, P, rankTime*ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + BorisPIC(Rcoarse, Pcoarse, rankTime*ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); } - + + //Copy initial conditions as they are needed later + //Kokkos::deep_copy(R0.getView(), this->R.getView()); + //Kokkos::deep_copy(P0.getView(), P.getView()); + + //Ippl::Comm->barrier(); - Kokkos::deep_copy(Rbegin.getView(), this->R.getView()); - Kokkos::deep_copy(Pbegin.getView(), P.getView()); + //Kokkos::deep_copy(Rbegin.getView(), this->R.getView()); + //Kokkos::deep_copy(Pbegin.getView(), P.getView()); + Kokkos::deep_copy(Rbegin.getView(), Rcoarse.getView()); + Kokkos::deep_copy(Pbegin.getView(), Pcoarse.getView()); //Run the coarse integrator to get the values at the end of the time slice - BorisPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + //BorisPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + BorisPIC(Rcoarse, Pcoarse, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); isConverged = false; if(rankTime == 0) { @@ -860,6 +875,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); //Field solve + EfieldPIC_m = 0.0; solver_mp->solve(); // gather E field @@ -919,6 +935,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //Field solve IpplTimings::startTimer(fieldSolvePIC); + EfieldPIC_m = 0.0; solver_mp->solve(); IpplTimings::stopTimer(fieldSolvePIC); @@ -1031,7 +1048,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); - //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); @@ -1045,7 +1062,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - if((time_m == 0.0)) { + if((time_m == 1000.0)) { IpplTimings::startTimer(dumpData); dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); @@ -1124,7 +1141,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 73802f9a0..8bef5a309 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -561,8 +561,19 @@ int main(int argc, char *argv[]){ Pbegin->create(nloc); Pend->create(nloc); + Pcoarse->q = Pcoarse->Q_m/totalP; using buffer_type = ippl::Communicate::buffer_type; int tag; + + Pcoarse->shapetype_m = argv[13]; + Pcoarse->shapedegree_m = std::atoi(argv[14]); + IpplTimings::startTimer(initializeShapeFunctionPIF); + Pcoarse->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + + Pcoarse->initNUFFT(FLPIF); + #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs @@ -617,7 +628,49 @@ int main(int argc, char *argv[]){ buf->resetReadPos(); } + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + Kokkos::fence(); + + if(rankTime == 0) { + unsigned int stepsToRun = 2*ntCoarse; + Pcoarse->BorisPIC(Pend->R, Pend->P, stepsToRun, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + Pcoarse->BorisPIC(Pbegin->R, Pbegin->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + Pcoarse->BorisPIC(Pbegin->R, Pbegin->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + //Pcoarse->BorisPIF(Pend->R, Pend->P, stepsToRun, dtFine, rankTime * dtSlice, 0, 0, Bext, rankTime, rankSpace, spaceComm); + //Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, rankTime * dtSlice, 0, 0, Bext, rankTime, rankSpace, spaceComm); + //Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, rankTime * dtSlice, 0, 0, Bext, rankTime, rankSpace, spaceComm); + Pcoarse->dumpParticleData(0, Pend->R, Pend->P, "cont"); + Pcoarse->dumpParticleData(0, Pbegin->R, Pbegin->P, "sep"); + } + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + Kokkos::fence(); + if(rankTime < sizeTime-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + Ippl::Comm->barrier(); + + if(rankTime > 0) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + } + if(rankTime < sizeTime-1) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -627,11 +680,41 @@ int main(int argc, char *argv[]){ MPI_Wait(&request, MPI_STATUS_IGNORE); } - //Ippl::Comm->barrier(); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - IpplTimings::stopTimer(deepCopy); + + if(rankTime == 1) { + unsigned int stepsToRun = (rankTime+1) * ntCoarse; + // std::cout << "Rank: " << Ippl::Comm->rank() << "needs to run " << stepsToRun << " steps" << std::endl; + Pcoarse->BorisPIC(Pbegin->R, Pbegin->P, stepsToRun, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + } + + //Pcoarse->dumpParticleData(0, Pcoarse->R, Pcoarse->P, "new"); + //Pcoarse->dumpParticleData(0, Pbegin->R, Pbegin->P, "old"); + double Rerror2 = computeRL2Error(Pbegin->R, Pcoarse->R, length, spaceComm); + double Perror2 = computePL2Error(Pbegin->P, Pcoarse->P, spaceComm); + std::cout << "Rank: " << Ippl::Comm->rank() << " Rerror: " << Rerror2 << " Perror: " << Perror2 << std::endl; + Pbegin->R = Pbegin->R - Pcoarse->R; + Pbegin->P = Pbegin->P - Pcoarse->P; + //Pcoarse->dumpParticleData(0, Pbegin->R, Pbegin->P, "diff"); + + + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); + //Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + + //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + //Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + //IpplTimings::stopTimer(deepCopy); + + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + //IpplTimings::stopTimer(deepCopy); #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, @@ -661,7 +744,6 @@ int main(int argc, char *argv[]){ << " Grid points = " << nrPIC << endl; - Pcoarse->q = Pcoarse->Q_m/totalP; IpplTimings::stopTimer(particleCreation); msg << "particles created and initial conditions assigned " << endl; @@ -716,26 +798,44 @@ int main(int argc, char *argv[]){ // isPreviousDomainConverged = false; //} - bool isConverged, isPreviousDomainConverged; - Pcoarse->shapetype_m = argv[13]; - Pcoarse->shapedegree_m = std::atoi(argv[14]); - IpplTimings::startTimer(initializeShapeFunctionPIF); - Pcoarse->initializeShapeFunctionPIF(); - IpplTimings::stopTimer(initializeShapeFunctionPIF); - - Pcoarse->initNUFFT(FLPIF); - - + int sign = 1; for (unsigned int nc=0; nc < nCycles; nc++) { - double tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); - Pcoarse->time_m = tStartMySlice; - IpplTimings::startTimer(initializeCycles); - Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, - isPreviousDomainConverged, ntCoarse, - dtCoarse, tStartMySlice, Bext, rankTime, spaceComm); - IpplTimings::stopTimer(initializeCycles); + + double tStartMySlice; + bool sendCriteria, recvCriteria; + bool isConverged = false; + bool isPreviousDomainConverged = false; + + //IpplTimings::startTimer(initializeCycles); + //Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, Pcoarse->R, Pcoarse->P, Pcoarse->R0, + // Pcoarse->P0, isConverged, + // isPreviousDomainConverged, ntCoarse, + // dtCoarse, tStartMySlice, Bext, rankTime, spaceComm); + //IpplTimings::stopTimer(initializeCycles); + //even cycles + if(nc % 2 == 0) { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + if(rankTime == 0) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + msg.setPrintNode(Ippl::Comm->size()-1); + } + //odd cycles + else { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + if(rankTime == (sizeTime - 1)) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + msg.setPrintNode(0); + } + //Pcoarse->time_m = tStartMySlice; + unsigned int it = 0; while (!isConverged) { //while ((!isPreviousDomainConverged) || (!isConverged)) { @@ -743,7 +843,8 @@ int main(int argc, char *argv[]){ //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, Bext, rankTime, rankSpace, spaceComm); + Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, + Bext, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -764,12 +865,12 @@ int main(int argc, char *argv[]){ tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if((rankTime > 0) && (!isPreviousDomainConverged)) { + if(recvCriteria && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + Ippl::Comm->recv(rankTime-sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); - MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-1, tagbool, + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-sign, tagbool, timeComm, MPI_STATUS_IGNORE); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); @@ -808,14 +909,14 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(timeCommunication); - if(rankTime < (sizeTime-1)) { + if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); + Ippl::Comm->isend(rankTime+sign, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+1, tagbool, timeComm); + MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+sign, tagbool, timeComm); } IpplTimings::stopTimer(timeCommunication); @@ -828,7 +929,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); @@ -848,18 +949,47 @@ int main(int argc, char *argv[]){ if((nCycles > 1) && (nc < (nCycles - 1))) { IpplTimings::startTimer(timeCommunication); tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + //send, receive criteria and tStartMySlice are reversed at the end of the cycle + if(nc % 2 == 0) { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + } + //odd cycles + else { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + } - if(rankTime < (sizeTime-1)) { - size_type bufSize = Pend->packedSize(nloc); + if(recvCriteria) { + size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(rankTime+1, tag, *Pend, *buf, bufSize, nloc, timeComm); + Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } - if(rankTime > 0) { + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(rankTime-1, tag, *Pend, *buf, request, nloc, timeComm); + Ippl::Comm->isend(rankTime-sign, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } @@ -868,10 +998,11 @@ int main(int argc, char *argv[]){ //Ippl::Comm->barrier(); //msg << "Communication finished in cycle: " << nc+1 << endl; - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); - IpplTimings::stopTimer(deepCopy); + //IpplTimings::startTimer(deepCopy); + //Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + //Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + //IpplTimings::stopTimer(deepCopy); + sign *= -1; } } msg << TestName << " Parareal: End." << endl; From cb970966f0ad67ead3fe7892dddcdfce2ec90982 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 1 Jun 2023 17:37:48 +0200 Subject: [PATCH 088/117] bug in Penning trap corrected --- alpine/ElectrostaticPIC/PenningTrap.cpp | 31 +++++----- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 29 ++++----- alpine/PinT/ChargedParticlesPinT.hpp | 66 ++++++++++---------- alpine/PinT/PenningTrapPinT.cpp | 70 +--------------------- 4 files changed, 68 insertions(+), 128 deletions(-) diff --git a/alpine/ElectrostaticPIC/PenningTrap.cpp b/alpine/ElectrostaticPIC/PenningTrap.cpp index e8c641b56..f2b408cb2 100644 --- a/alpine/ElectrostaticPIC/PenningTrap.cpp +++ b/alpine/ElectrostaticPIC/PenningTrap.cpp @@ -352,6 +352,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpData(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -381,13 +382,13 @@ int main(int argc, char *argv[]){ double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - Eview(j)[0] += Eext_x; - Eview(j)[1] += Eext_y; - Eview(j)[2] += Eext_z; + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; - Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); - Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); - Pview(j)[2] += alpha * Eview(j)[2]; + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; }); IpplTimings::stopTimer(PTimer); @@ -434,20 +435,22 @@ int main(int argc, char *argv[]){ double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - E2view(j)[0] += Eext_x; - E2view(j)[1] += Eext_y; - E2view(j)[2] += Eext_z; - P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] - + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); - P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] - - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); - P2view(j)[2] += alpha * E2view(j)[2]; + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; }); IpplTimings::stopTimer(PTimer); P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); P->dumpData(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index dfe082298..1ae3ab415 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -296,13 +296,13 @@ int main(int argc, char *argv[]){ double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - Eview(j)[0] += Eext_x; - Eview(j)[1] += Eext_y; - Eview(j)[2] += Eext_z; + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; - Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); - Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); - Pview(j)[2] += alpha * Eview(j)[2]; + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; }); IpplTimings::stopTimer(PTimer); @@ -333,14 +333,15 @@ int main(int argc, char *argv[]){ double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - E2view(j)[0] += Eext_x; - E2view(j)[1] += Eext_y; - E2view(j)[2] += Eext_z; - P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] - + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); - P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] - - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); - P2view(j)[2] += alpha * E2view(j)[2]; + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; }); IpplTimings::stopTimer(PTimer); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 7e86b6324..117016ef4 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -909,13 +909,13 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - Eview(j)[0] += Eext_x; - Eview(j)[1] += Eext_y; - Eview(j)[2] += Eext_z; + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; - Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); - Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); - Pview(j)[2] += alpha * Eview(j)[2]; + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; }); //drift @@ -951,15 +951,16 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - - E2view(j)[0] += Eext_x; - E2view(j)[1] += Eext_y; - E2view(j)[2] += Eext_z; - P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] - + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); - P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] - - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); - P2view(j)[2] += alpha * E2view(j)[2]; + + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; }); time_m += dt; @@ -1048,7 +1049,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); - PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); @@ -1062,7 +1063,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - if((time_m == 1000.0)) { + if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); @@ -1092,13 +1093,13 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - Eview(j)[0] += Eext_x; - Eview(j)[1] += Eext_y; - Eview(j)[2] += Eext_z; + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; - Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); - Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); - Pview(j)[2] += alpha * Eview(j)[2]; + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; }); //drift @@ -1128,20 +1129,21 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - E2view(j)[0] += Eext_x; - E2view(j)[1] += Eext_y; - E2view(j)[2] += Eext_z; - P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] - + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); - P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] - - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); - P2view(j)[2] += alpha * E2view(j)[2]; + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; }); time_m += dt; IpplTimings::startTimer(dumpData); - //dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 8bef5a309..e0981e81c 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -610,7 +610,6 @@ int main(int argc, char *argv[]){ //condition is not the same on different GPUs tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - //if(Ippl::Comm->rank() == 0) { if(rankTime == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); Kokkos::parallel_for(nloc, @@ -634,25 +633,13 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Kokkos::fence(); - if(rankTime == 0) { - unsigned int stepsToRun = 2*ntCoarse; - Pcoarse->BorisPIC(Pend->R, Pend->P, stepsToRun, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - Pcoarse->BorisPIC(Pbegin->R, Pbegin->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - Pcoarse->BorisPIC(Pbegin->R, Pbegin->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - //Pcoarse->BorisPIF(Pend->R, Pend->P, stepsToRun, dtFine, rankTime * dtSlice, 0, 0, Bext, rankTime, rankSpace, spaceComm); - //Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, rankTime * dtSlice, 0, 0, Bext, rankTime, rankSpace, spaceComm); - //Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, rankTime * dtSlice, 0, 0, Bext, rankTime, rankSpace, spaceComm); - Pcoarse->dumpParticleData(0, Pend->R, Pend->P, "cont"); - Pcoarse->dumpParticleData(0, Pbegin->R, Pbegin->P, "sep"); - } + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); - Kokkos::fence(); if(rankTime < sizeTime-1) { size_type bufSize = Pend->packedSize(nloc); @@ -662,59 +649,6 @@ int main(int argc, char *argv[]){ buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } - Ippl::Comm->barrier(); - - if(rankTime > 0) { - size_type bufSize = Pbegin->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); - buf->resetReadPos(); - } - - if(rankTime < sizeTime-1) { - size_type bufSize = Pbegin->packedSize(nloc); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); - MPI_Request request; - Ippl::Comm->isend(rankTime+1, tag, *Pbegin, *buf, request, nloc, timeComm); - buf->resetWritePos(); - MPI_Wait(&request, MPI_STATUS_IGNORE); - } - - - if(rankTime == 1) { - unsigned int stepsToRun = (rankTime+1) * ntCoarse; - // std::cout << "Rank: " << Ippl::Comm->rank() << "needs to run " << stepsToRun << " steps" << std::endl; - Pcoarse->BorisPIC(Pbegin->R, Pbegin->P, stepsToRun, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - } - - //Pcoarse->dumpParticleData(0, Pcoarse->R, Pcoarse->P, "new"); - //Pcoarse->dumpParticleData(0, Pbegin->R, Pbegin->P, "old"); - double Rerror2 = computeRL2Error(Pbegin->R, Pcoarse->R, length, spaceComm); - double Perror2 = computePL2Error(Pbegin->P, Pcoarse->P, spaceComm); - std::cout << "Rank: " << Ippl::Comm->rank() << " Rerror: " << Rerror2 << " Perror: " << Perror2 << std::endl; - Pbegin->R = Pbegin->R - Pcoarse->R; - Pbegin->P = Pbegin->P - Pcoarse->P; - //Pcoarse->dumpParticleData(0, Pbegin->R, Pbegin->P, "diff"); - - - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); - //Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); - //Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - //IpplTimings::stopTimer(deepCopy); #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, @@ -929,7 +863,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); From 01e1ab949e0e864eb65e3f3f0933485fed621d07 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 12 Jun 2023 17:01:56 +0200 Subject: [PATCH 089/117] TSI and Landau damping modified and a bug in multiblock corrected --- alpine/PinT/BumponTailInstabilityPinT.cpp | 241 ++++++++++++++++------ alpine/PinT/ChargedParticlesPinT.hpp | 56 ++--- alpine/PinT/LandauDampingPinT.cpp | 241 +++++++++++++++------- alpine/PinT/PenningTrapPinT.cpp | 28 +-- 4 files changed, 385 insertions(+), 181 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index cf1a2c8e3..e965bf997 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -160,8 +160,7 @@ double CDF(const double& x, const double& delta, const double& k, } double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - Vector_t& length) { + Vector_t& length, MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -187,16 +186,18 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - lError = std::sqrt(localError)/std::sqrt(localNorm); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); - double relError = lError;//absError / std::sqrt(globaltemp); + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); return relError; } -double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -213,9 +214,12 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - lError = std::sqrt(localError)/std::sqrt(localNorm); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); - double relError = lError;//absError / std::sqrt(globaltemp); + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); return relError; @@ -391,7 +395,26 @@ const char* TestName = "TwoStreamInstability"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); - + + int spaceColor, timeColor; + MPI_Comm spaceComm, timeComm; + + int spaceProcs = std::atoi(argv[15]); + int timeProcs = std::atoi(argv[16]); + spaceColor = Ippl::Comm->rank() / spaceProcs; + timeColor = Ippl::Comm->rank() % spaceProcs; + + MPI_Comm_split(Ippl::getComm(), spaceColor, Ippl::Comm->rank(), &spaceComm); + MPI_Comm_split(Ippl::getComm(), timeColor, Ippl::Comm->rank(), &timeComm); + + int rankSpace, sizeSpace, rankTime, sizeTime; + MPI_Comm_rank(spaceComm, &rankSpace); + MPI_Comm_size(spaceComm, &sizeSpace); + + MPI_Comm_rank(timeComm, &rankTime); + MPI_Comm_size(timeComm, &sizeTime); + + Inform msg(TestName, Ippl::Comm->size()-1); Inform msg2all(TestName,INFORM_ALL_NODES); @@ -423,7 +446,7 @@ int main(int argc, char *argv[]){ const double tEnd = std::atof(argv[8]); const unsigned int nCycles = std::atoi(argv[12]); double tEndCycle = tEnd / nCycles; - const double dtSlice = tEndCycle / Ippl::Comm->size(); + const double dtSlice = tEndCycle / sizeTime; const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); const unsigned int ntFine = std::ceil(dtSlice / dtFine); @@ -513,14 +536,30 @@ int main(int argc, char *argv[]){ double factorVelBulk = 1.0 - epsilon; double factorVelBeam = 1.0 - factorVelBulk; - size_type nlocBulk = (size_type)(factorVelBulk * totalP); - size_type nlocBeam = (size_type)(factorVelBeam * totalP); + double factorConf = 1.0 / sizeSpace; + size_type nlocBulk = (size_type)(factorConf * factorVelBulk * totalP); + size_type nlocBeam = (size_type)(factorConf * factorVelBeam * totalP); size_type nloc = nlocBulk + nlocBeam; - + + size_type Total_particles = 0; + + //MPI_Allreduce(&nloc, &Total_particles, 1, + // MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); + + //int rest = (int) (totalP - Total_particles); + + //if ( (rankTime == 0) && (rankSpace < rest) ) { + // ++nloc; + //} + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); + + //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,nloc); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -548,9 +587,20 @@ int main(int argc, char *argv[]){ Pcoarse->create(nloc); Pbegin->create(nloc); Pend->create(nloc); + + Pcoarse->q = Pcoarse->Q_m/Total_particles; using buffer_type = ippl::Communicate::buffer_type; int tag; + + + Pcoarse->shapetype_m = argv[13]; + Pcoarse->shapedegree_m = std::atoi(argv[14]); + IpplTimings::startTimer(initializeShapeFunctionPIF); + Pcoarse->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + Pcoarse->initNUFFT(FLPIF); #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs @@ -590,8 +640,8 @@ int main(int argc, char *argv[]){ tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if(Ippl::Comm->rank() == 0) { - Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + if(rankTime == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); Kokkos::parallel_for(nloc, generate_random, Dim>( Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, delta, kw, @@ -603,25 +653,33 @@ int main(int argc, char *argv[]){ else { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } - - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { - size_type bufSize = Pbegin->packedSize(nloc); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + if(rankTime < sizeTime-1) { + size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pbegin, *buf, request, nloc); + Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } - //Ippl::Comm->barrier(); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - IpplTimings::stopTimer(deepCopy); #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, @@ -648,12 +706,11 @@ int main(int argc, char *argv[]){ << "Tolerance: " << tol << " No. of cycles: " << nCycles << endl - << "Np= " << nloc + << "Np= " << Total_particles << " Fourier modes = " << nmPIF << " Grid points = " << nrPIC << endl; - Pcoarse->q = Pcoarse->Q_m/nloc; IpplTimings::stopTimer(particleCreation); msg << "particles created and initial conditions assigned " << endl; @@ -708,28 +765,42 @@ int main(int argc, char *argv[]){ // isPreviousDomainConverged = false; //} - bool isConverged, isPreviousDomainConverged; - - Pcoarse->shapetype_m = argv[13]; - Pcoarse->shapedegree_m = std::atoi(argv[14]); - IpplTimings::startTimer(initializeShapeFunctionPIF); - Pcoarse->initializeShapeFunctionPIF(); - IpplTimings::stopTimer(initializeShapeFunctionPIF); - - Pcoarse->initNUFFT(FLPIF); - + int sign = 1; for (unsigned int nc=0; nc < nCycles; nc++) { - double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); - Pcoarse->time_m = tStartMySlice; - Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, - isPreviousDomainConverged, ntCoarse, - dtCoarse, tStartMySlice); + + double tStartMySlice; + bool sendCriteria, recvCriteria; + bool isConverged = false; + bool isPreviousDomainConverged = false; + + //even cycles + if(nc % 2 == 0) { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + if(rankTime == 0) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + msg.setPrintNode(Ippl::Comm->size()-1); + } + //odd cycles + else { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + if(rankTime == (sizeTime - 1)) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + msg.setPrintNode(0); + } + unsigned int it = 0; + while (!isConverged) { //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, nc+1, it+1); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -749,13 +820,13 @@ int main(int argc, char *argv[]){ tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { + if(recvCriteria && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime-sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); - MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, - Ippl::getComm(), MPI_STATUS_IGNORE); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-sign, tagbool, + timeComm, MPI_STATUS_IGNORE); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); @@ -771,7 +842,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -781,11 +852,10 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); - double localRerror, localPerror; IpplTimings::startTimer(computeErrors); - double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); IpplTimings::stopTimer(computeErrors); //} @@ -796,14 +866,14 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(timeCommunication); - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + Ippl::Comm->isend(rankTime+sign, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); + MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+sign, tagbool, timeComm); } IpplTimings::stopTimer(timeCommunication); @@ -816,40 +886,73 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); //} IpplTimings::stopTimer(dumpData); + MPI_Barrier(spaceComm); + it += 1; } - Ippl::Comm->barrier(); + MPI_Barrier(MPI_COMM_WORLD); if((nCycles > 1) && (nc < (nCycles - 1))) { IpplTimings::startTimer(timeCommunication); tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + //send, receive criteria and tStartMySlice are reversed at the end of the cycle + if(nc % 2 == 0) { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + } + //odd cycles + else { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + } - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { - size_type bufSize = Pend->packedSize(nloc); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + if(recvCriteria) { + size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } - if(Ippl::Comm->rank() > 0) { + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); + Ippl::Comm->isend(rankTime-sign, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } IpplTimings::stopTimer(timeCommunication); - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); - IpplTimings::stopTimer(deepCopy); + sign *= -1; } } msg << TestName << " Parareal: End." << endl; @@ -857,5 +960,9 @@ int main(int argc, char *argv[]){ IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); + MPI_Comm_free(&spaceComm); + MPI_Comm_free(&timeComm); + + return 0; } diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 117016ef4..c951e17fd 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -413,7 +413,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { << ExAmp << endl; } - void dumpBumponTail(const unsigned int& nc, const unsigned int& iter) { + void dumpBumponTail(const unsigned int& nc, const unsigned int& iter, int rankTime, int rankSpace) { double fieldEnergy = 0.0; @@ -480,24 +480,26 @@ class ChargedParticlesPinT : public ippl::ParticleBase { fieldEnergy *= volume; - std::stringstream fname; - fname << "data/FieldBumponTail_rank_"; - fname << Ippl::Comm->rank(); - fname << "_nc_"; - fname << nc; - fname << "_iter_"; - fname << iter; - fname << ".csv"; + if(rankSpace == 0) { + std::stringstream fname; + fname << "data/FieldBumponTail_rank_"; + fname << rankTime; + fname << "_nc_"; + fname << nc; + fname << "_iter_"; + fname << iter; + fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); - csvout << time_m << " " - << fieldEnergy << " " - << EzAmp << endl; + csvout << time_m << " " + << fieldEnergy << " " + << EzAmp << endl; + } } @@ -802,14 +804,14 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIC(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, - const double dt, const double& tStartMySlice) { + const double dt, const double& tStartMySlice, MPI_Comm& spaceComm) { static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIC_m = 0.0; - scatter(q, rhoPIC_m, Rtemp); + scatter(q, rhoPIC_m, Rtemp, spaceComm); rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); @@ -838,7 +840,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //scatter the charge onto the underlying grid rhoPIC_m = 0.0; - scatter(q, rhoPIC_m, Rtemp); + scatter(q, rhoPIC_m, Rtemp, spaceComm); rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); @@ -973,16 +975,16 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const bool& /*isConverged*/, - const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter) { + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, int rankTime, int rankSpace, + MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -996,8 +998,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0)) { IpplTimings::startTimer(dumpData); //dumpLandau(iter); - dumpBumponTail(nc, iter); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); + dumpBumponTail(nc, iter, rankTime, rankSpace); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp); + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -1032,8 +1034,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { IpplTimings::startTimer(dumpData); //dumpLandau(iter); - dumpBumponTail(nc, iter); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp); + dumpBumponTail(nc, iter, rankTime, rankSpace); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index da2491d49..ead8c38c7 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -145,8 +145,7 @@ double CDF(const double& x, const double& alpha, const double& k) { } double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - Vector_t& length) { + Vector_t& length, MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -172,16 +171,18 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - lError = std::sqrt(localError)/std::sqrt(localNorm); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); - double relError = lError;//absError / std::sqrt(globaltemp); + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); return relError; } -double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -198,9 +199,12 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); Kokkos::fence(); - lError = std::sqrt(localError)/std::sqrt(localNorm); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); - double relError = lError;//absError / std::sqrt(globaltemp); + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); return relError; @@ -376,6 +380,24 @@ const char* TestName = "LandauDampingPinT"; int main(int argc, char *argv[]){ Ippl ippl(argc, argv); + int spaceColor, timeColor; + MPI_Comm spaceComm, timeComm; + + int spaceProcs = std::atoi(argv[15]); + int timeProcs = std::atoi(argv[16]); + spaceColor = Ippl::Comm->rank() / spaceProcs; + timeColor = Ippl::Comm->rank() % spaceProcs; + + MPI_Comm_split(Ippl::getComm(), spaceColor, Ippl::Comm->rank(), &spaceComm); + MPI_Comm_split(Ippl::getComm(), timeColor, Ippl::Comm->rank(), &timeComm); + + int rankSpace, sizeSpace, rankTime, sizeTime; + MPI_Comm_rank(spaceComm, &rankSpace); + MPI_Comm_size(spaceComm, &sizeSpace); + + MPI_Comm_rank(timeComm, &rankTime); + MPI_Comm_size(timeComm, &sizeTime); + Inform msg(TestName, Ippl::Comm->size()-1); Inform msg2all(TestName,INFORM_ALL_NODES); @@ -407,7 +429,7 @@ int main(int argc, char *argv[]){ const double tEnd = std::atof(argv[8]); const unsigned int nCycles = std::atoi(argv[12]); double tEndCycle = tEnd / nCycles; - const double dtSlice = tEndCycle / Ippl::Comm->size(); + const double dtSlice = tEndCycle / sizeTime; const double dtFine = std::atof(argv[9]); const double dtCoarse = std::atof(argv[10]); const unsigned int ntFine = std::ceil(dtSlice / dtFine); @@ -465,11 +487,26 @@ int main(int argc, char *argv[]){ PLayout_t PL(FLPIC, meshPIC); - size_type nloc = totalP; + double factor = 1.0 / sizeSpace; + size_type nloc = (size_type)(factor * totalP); + + size_type Total_particles = 0; + + //MPI_Allreduce(&nloc, &Total_particles, 1, + // MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); + + //int rest = (int) (totalP - Total_particles); + + //if ( (rankTime == 0) && (rankSpace < rest) ) { + // ++nloc; + //} + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,nloc); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -498,8 +535,19 @@ int main(int argc, char *argv[]){ Pbegin->create(nloc); Pend->create(nloc); + Pcoarse->q = Pcoarse->Q_m/Total_particles; + using buffer_type = ippl::Communicate::buffer_type; int tag; + + Pcoarse->shapetype_m = argv[13]; + Pcoarse->shapedegree_m = std::atoi(argv[14]); + IpplTimings::startTimer(initializeShapeFunctionPIF); + Pcoarse->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + Pcoarse->initNUFFT(FLPIF); + #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs @@ -538,8 +586,8 @@ int main(int argc, char *argv[]){ tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if(Ippl::Comm->rank() == 0) { - Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + if(rankTime == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); Kokkos::parallel_for(nloc, generate_random, Dim>( Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); @@ -550,25 +598,31 @@ int main(int argc, char *argv[]){ else { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); - - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { - size_type bufSize = Pbegin->packedSize(nloc); + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + if(rankTime < sizeTime-1) { + size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pbegin, *buf, request, nloc); + Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } - - //Ippl::Comm->barrier(); - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - IpplTimings::stopTimer(deepCopy); #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, @@ -591,13 +645,12 @@ int main(int argc, char *argv[]){ << "Tolerance: " << tol << " No. of cycles: " << nCycles << endl - << "Np= " << totalP + << "Np= " << Total_particles << " Fourier modes = " << nmPIF << " Grid points = " << nrPIC << endl; - Pcoarse->q = Pcoarse->Q_m/nloc; IpplTimings::stopTimer(particleCreation); msg << "particles created and initial conditions assigned " << endl; @@ -652,27 +705,40 @@ int main(int argc, char *argv[]){ // isPreviousDomainConverged = false; //} - bool isConverged, isPreviousDomainConverged; - - Pcoarse->shapetype_m = argv[13]; - Pcoarse->shapedegree_m = std::atoi(argv[14]); - IpplTimings::startTimer(initializeShapeFunctionPIF); - Pcoarse->initializeShapeFunctionPIF(); - IpplTimings::stopTimer(initializeShapeFunctionPIF); - - Pcoarse->initNUFFT(FLPIF); + int sign = 1; for (unsigned int nc=0; nc < nCycles; nc++) { - double tStartMySlice = (nc * tEndCycle) + (Ippl::Comm->rank() * dtSlice); - Pcoarse->time_m = tStartMySlice; - Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, isConverged, - isPreviousDomainConverged, ntCoarse, - dtCoarse, tStartMySlice); + double tStartMySlice; + bool sendCriteria, recvCriteria; + bool isConverged = false; + bool isPreviousDomainConverged = false; + + //even cycles + if(nc % 2 == 0) { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + if(rankTime == 0) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + msg.setPrintNode(Ippl::Comm->size()-1); + } + //odd cycles + else { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + if(rankTime == (sizeTime - 1)) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + msg.setPrintNode(0); + } + unsigned int it = 0; while (!isConverged) { //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, isConverged, tStartMySlice, nc+1, it+1); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -689,13 +755,13 @@ int main(int argc, char *argv[]){ tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - if((Ippl::Comm->rank() > 0) && (!isPreviousDomainConverged)) { + if(recvCriteria && (!isPreviousDomainConverged)) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()-1, tag, *Pbegin, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime-sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); - MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()-1, tagbool, - Ippl::getComm(), MPI_STATUS_IGNORE); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-sign, tagbool, + timeComm, MPI_STATUS_IGNORE); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); @@ -711,7 +777,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -719,16 +785,8 @@ int main(int argc, char *argv[]){ PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); - double localRerror, localPerror; - double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - - //double Rerror = computeRL2Error(Pend->R, Pcoarse->RprevIter, it+1, Ippl::Comm->rank(), localRerror, length); - //double Perror = computePL2Error(Pend->P, Pcoarse->PprevIter, it+1, Ippl::Comm->rank(), localPerror); - //double EfieldError = 0; - //if(it > 0) { - // EfieldError = computeFieldError(Pcoarse->rhoPIF_m, Pcoarse->rhoPIFprevIter_m); - //} + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); IpplTimings::stopTimer(computeErrors); @@ -737,17 +795,17 @@ int main(int argc, char *argv[]){ } IpplTimings::startTimer(timeCommunication); - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { + if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()+1, tag, *Pend, *buf, request, nloc); + Ippl::Comm->isend(rankTime+sign, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Send(&isConverged, 1, MPI_C_BOOL, Ippl::Comm->rank()+1, tagbool, Ippl::getComm()); + MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+sign, tagbool, timeComm); } IpplTimings::stopTimer(timeCommunication); - + msg << "Finished iteration: " << it+1 << " in cycle: " << nc+1 @@ -757,37 +815,70 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(localRerror, localPerror, nc+1, it+1); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); + MPI_Barrier(spaceComm); + it += 1; } - Ippl::Comm->barrier(); + MPI_Barrier(MPI_COMM_WORLD); if((nCycles > 1) && (nc < (nCycles - 1))) { IpplTimings::startTimer(timeCommunication); tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - - if(Ippl::Comm->rank() < Ippl::Comm->size()-1) { - size_type bufSize = Pend->packedSize(nloc); + + //send, receive criteria and tStartMySlice are reversed at the end of the cycle + if(nc % 2 == 0) { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + } + //odd cycles + else { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + } + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + if(recvCriteria) { + size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - Ippl::Comm->recv(Ippl::Comm->rank()+1, tag, *Pend, *buf, bufSize, nloc); + Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } - if(Ippl::Comm->rank() > 0) { + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); MPI_Request request; - Ippl::Comm->isend(Ippl::Comm->rank()-1, tag, *Pend, *buf, request, nloc); + Ippl::Comm->isend(rankTime-sign, tag, *Pend, *buf, request, nloc, timeComm); buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } IpplTimings::stopTimer(timeCommunication); - - IpplTimings::startTimer(deepCopy); - Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); - Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); - IpplTimings::stopTimer(deepCopy); + sign *= -1; } } @@ -796,5 +887,9 @@ int main(int argc, char *argv[]){ IpplTimings::print(); IpplTimings::print(std::string("timing.dat")); + MPI_Comm_free(&spaceComm); + MPI_Comm_free(&timeComm); + + return 0; } diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index e0981e81c..a19e200eb 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -524,15 +524,15 @@ int main(int argc, char *argv[]){ MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); - int rest = (int) (totalP - Total_particles); + //int rest = (int) (totalP - Total_particles); - if ( Ippl::Comm->rank() < rest ) { - ++nloc; - } + //if ( (rankTime == 0) && (rankSpace < rest) ) { + // ++nloc; + //} double Q = -1562.5; double Bext = 5.0; - Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,totalP); + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); Pbegin = std::make_unique(PL); Pend = std::make_unique(PL); @@ -561,7 +561,7 @@ int main(int argc, char *argv[]){ Pbegin->create(nloc); Pend->create(nloc); - Pcoarse->q = Pcoarse->Q_m/totalP; + Pcoarse->q = Pcoarse->Q_m/Total_particles; using buffer_type = ippl::Communicate::buffer_type; int tag; @@ -673,7 +673,7 @@ int main(int argc, char *argv[]){ //<< " Max. iterations: " << maxIter << " No. of cycles: " << nCycles << endl - << "Np= " << totalP + << "Np= " << Total_particles << " Fourier modes = " << nmPIF << " Grid points = " << nrPIC << endl; @@ -742,12 +742,6 @@ int main(int argc, char *argv[]){ bool isConverged = false; bool isPreviousDomainConverged = false; - //IpplTimings::startTimer(initializeCycles); - //Pcoarse->initializeParareal(Pbegin->R, Pbegin->P, Pcoarse->R, Pcoarse->P, Pcoarse->R0, - // Pcoarse->P0, isConverged, - // isPreviousDomainConverged, ntCoarse, - // dtCoarse, tStartMySlice, Bext, rankTime, spaceComm); - //IpplTimings::stopTimer(initializeCycles); //even cycles if(nc % 2 == 0) { sendCriteria = (rankTime < (sizeTime-1)); @@ -896,7 +890,13 @@ int main(int argc, char *argv[]){ recvCriteria = (rankTime > 0); tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); } - + + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + if(recvCriteria) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); From 86522129a3d0f1f1154fea70b8a647e9544117dc Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 19 Jun 2023 10:09:10 +0200 Subject: [PATCH 090/117] Penning Trap ICs modified in all mini-apps --- alpine/ElectrostaticPIC/ChargedParticles.hpp | 2 +- alpine/ElectrostaticPIC/PenningTrap.cpp | 12 ++++++++---- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 12 ++++++++---- alpine/PinT/PenningTrapPinT.cpp | 10 +++++++--- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 61730648d..67b8f738f 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -341,7 +341,7 @@ class ChargedParticles : public ippl::ParticleBase { rhoNorm_m = norm(rho_m); IpplTimings::stopTimer(sumTimer); - //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); //rho = rho_e - rho_i rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); diff --git a/alpine/ElectrostaticPIC/PenningTrap.cpp b/alpine/ElectrostaticPIC/PenningTrap.cpp index f2b408cb2..bdb0da505 100644 --- a/alpine/ElectrostaticPIC/PenningTrap.cpp +++ b/alpine/ElectrostaticPIC/PenningTrap.cpp @@ -206,7 +206,8 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t rmin = {0.0, 0.0, 0.0}; - Vector_t rmax = {20.0, 20.0, 20.0}; + //Vector_t rmax = {20.0, 20.0, 20.0}; + Vector_t rmax = {25.0, 25.0, 25.0}; double dx = rmax[0] / nr[0]; double dy = rmax[1] / nr[1]; double dz = rmax[2] / nr[2]; @@ -236,9 +237,12 @@ int main(int argc, char *argv[]){ for (unsigned d = 0; dE_m.initialize(mesh, FL); P->rho_m.initialize(mesh, FL); diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index 1ae3ab415..e667fed18 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -183,7 +183,8 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t rmin(0.0); - Vector_t rmax(20.0); + //Vector_t rmax(20.0); + Vector_t rmax(25.0); double dx = rmax[0] / nr[0]; double dy = rmax[1] / nr[1]; double dz = rmax[2] / nr[2]; @@ -195,9 +196,12 @@ int main(int argc, char *argv[]){ for (unsigned d = 0; d& Q, ParticleAttrib& Qp Vector_t diff = Qview(i) - QprevIterView(i); for (unsigned d = 0; d < 3; ++d) { - bool isLeft = (diff[d] <= -22.0); - bool isRight = (diff[d] >= 22.0); - bool isInside = ((diff[d] > -22.0) && (diff[d] < 22.0)); + bool isLeft = (diff[d] <= -17.0); + bool isRight = (diff[d] >= 17.0); + bool isInside = ((diff[d] > -17.0) && (diff[d] < 17.0)); diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) +(isRight * (diff[d] - length[d])); } @@ -485,6 +485,7 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t rmin(0.0); Vector_t rmax(25.0); + //Vector_t rmax(20.0); Vector_t length = rmax - rmin; double dxPIC = length[0] / nrPIC[0]; double dyPIC = length[1] / nrPIC[1]; @@ -496,6 +497,9 @@ int main(int argc, char *argv[]){ for (unsigned d = 0; d Date: Mon, 19 Jun 2023 10:12:25 +0200 Subject: [PATCH 091/117] error checking criterion changed --- alpine/PinT/PenningTrapPinT.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 8a1988e15..3f7d2b587 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -159,9 +159,9 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp Vector_t diff = Qview(i) - QprevIterView(i); for (unsigned d = 0; d < 3; ++d) { - bool isLeft = (diff[d] <= -17.0); - bool isRight = (diff[d] >= 17.0); - bool isInside = ((diff[d] > -17.0) && (diff[d] < 17.0)); + bool isLeft = (diff[d] <= -22.0); + bool isRight = (diff[d] >= 22.0); + bool isInside = ((diff[d] > -22.0) && (diff[d] < 22.0)); diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) +(isRight * (diff[d] - length[d])); } From cb03845465ac1ad90cf7048f267f50265fa0f654 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 20 Jun 2023 14:06:22 +0200 Subject: [PATCH 092/117] Total particles changed in PIF codes --- .../BumponTailInstabilityPIF.cpp | 43 ++++++++++--------- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 26 +++++------ alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 26 +++++------ src/Particle/ParticleAttrib.hpp | 2 +- 4 files changed, 52 insertions(+), 45 deletions(-) diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp index 5ddbd6c46..ca7efa343 100644 --- a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -173,11 +173,6 @@ int main(int argc, char *argv[]){ const unsigned int nt = std::atoi(argv[5]); const double dt = std::atof(argv[6]); - msg << TestName - << endl - << "nt " << nt << " Np= " - << totalP << " Fourier modes = " << nr - << endl; using bunch_type = ChargedParticlesPIF; @@ -240,9 +235,27 @@ int main(int argc, char *argv[]){ FieldLayout_t FL(domain, decomp, isAllPeriodic); PLayout_t PL(FL, mesh); + double factorConf = 1.0/Ippl::Comm->size(); + double factorVelBulk = 1.0 - epsilon; + double factorVelBeam = 1.0 - factorVelBulk; + size_type nlocBulk = (size_type)(factorConf * factorVelBulk * totalP); + size_type nlocBeam = (size_type)(factorConf * factorVelBeam * totalP); + size_type nloc = nlocBulk + nlocBeam; + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + + msg << TestName + << endl + << "nt " << nt << " Np= " + << Total_particles << " Fourier modes = " << nr + << endl; + + //Q = -\int\int f dx dv double Q = -rmax[0] * rmax[1] * rmax[2]; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,totalP); + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,Total_particles); P->nr_m = nr; @@ -266,21 +279,11 @@ int main(int argc, char *argv[]){ maxU[d] = CDF(rmax[d], delta, kw[d], d); } - double factorConf = 1.0/Ippl::Comm->size(); - double factorVelBulk = 1.0 - epsilon; - double factorVelBeam = 1.0 - factorVelBulk; - size_type nlocBulk = (size_type)(factorConf * factorVelBulk * totalP); - size_type nlocBeam = (size_type)(factorConf * factorVelBeam * totalP); - size_type nloc = nlocBulk + nlocBeam; - size_type Total_particles = 0; - - MPI_Allreduce(&nloc, &Total_particles, 1, - MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); - int rest = (int) (totalP - Total_particles); + //int rest = (int) (totalP - Total_particles); - if ( Ippl::Comm->rank() < rest ) - ++nloc; + //if ( Ippl::Comm->rank() < rest ) + // ++nloc; P->create(nloc); Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); @@ -293,7 +296,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); IpplTimings::stopTimer(particleCreation); - P->q = P->Q_m/totalP; + P->q = P->Q_m/Total_particles; msg << "particles created and initial conditions assigned " << endl; IpplTimings::startTimer(initializeShapeFunctionPIF); diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 0ed48fc0e..2373d3f15 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -170,10 +170,18 @@ int main(int argc, char *argv[]){ const unsigned int nt = std::atoi(argv[5]); const double dt = std::atof(argv[6]); + double factor = 1.0/Ippl::Comm->size(); + size_type nloc = (size_type)(factor * totalP); + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + + msg << "Landau damping" << endl << "nt " << nt << " Np= " - << totalP << " Fourier modes = " << nr + << Total_particles << " Fourier modes = " << nr << endl; using bunch_type = ChargedParticlesPIF; @@ -214,7 +222,7 @@ int main(int argc, char *argv[]){ //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; //double Q = -64.0 * pi * pi * pi; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,totalP); + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,Total_particles); P->nr_m = nr; @@ -241,17 +249,11 @@ int main(int argc, char *argv[]){ //maxU[d] = rmax[d];//CDF(Regions(myRank)[d].max(), alpha, kw[d]); } - double factor = 1.0/Ippl::Comm->size(); - size_type nloc = (size_type)(factor * totalP); - size_type Total_particles = 0; - - MPI_Allreduce(&nloc, &Total_particles, 1, - MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); - int rest = (int) (totalP - Total_particles); + //int rest = (int) (totalP - Total_particles); - if ( Ippl::Comm->rank() < rest ) - ++nloc; + //if ( Ippl::Comm->rank() < rest ) + // ++nloc; P->create(nloc); Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); @@ -263,7 +265,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); IpplTimings::stopTimer(particleCreation); - P->q = P->Q_m/totalP; + P->q = P->Q_m/Total_particles; msg << "particles created and initial conditions assigned " << endl; IpplTimings::startTimer(initializeShapeFunctionPIF); diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index e667fed18..f70ddaa08 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -161,10 +161,18 @@ int main(int argc, char *argv[]){ const unsigned int nt = std::atoi(argv[5]); const double dt = std::atof(argv[6]); + double factor = 1.0/Ippl::Comm->size(); + size_type nloc = (size_type)(factor * totalP); + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + + msg << TestName << endl << "nt " << nt << " Np= " - << totalP << " Fourier modes = " << nr + << Total_particles << " Fourier modes = " << nr << endl; using bunch_type = ChargedParticlesPIF; @@ -213,7 +221,7 @@ int main(int argc, char *argv[]){ double Q = -1562.5; double Bext = 5.0; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,totalP); + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,Total_particles); P->nr_m = nr; @@ -236,17 +244,11 @@ int main(int argc, char *argv[]){ maxU[d] = CDF(rmax[d], mu[d], sd[d]); } - double factor = 1.0/Ippl::Comm->size(); - size_type nloc = (size_type)(factor * totalP); - size_type Total_particles = 0; - - MPI_Allreduce(&nloc, &Total_particles, 1, - MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); - int rest = (int) (totalP - Total_particles); + //int rest = (int) (totalP - Total_particles); - if ( Ippl::Comm->rank() < rest ) - ++nloc; + //if ( Ippl::Comm->rank() < rest ) + // ++nloc; P->create(nloc); Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); @@ -258,7 +260,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); IpplTimings::stopTimer(particleCreation); - P->q = P->Q_m/totalP; + P->q = P->Q_m/Total_particles; msg << "particles created and initial conditions assigned " << endl; IpplTimings::startTimer(initializeShapeFunctionPIF); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 6a4047742..7e45c5071 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -543,7 +543,7 @@ namespace ippl { MPI_C_DOUBLE_COMPLEX, MPI_SUM, spaceComm); IpplTimings::stopTimer(scatterAllReducePIFTimer); - //IpplTimings::startTimer(scatterPIFNUFFTTimer); + IpplTimings::startTimer(scatterPIFNUFFTTimer); using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for("Multiply with shape functions", From 824fc55b875c31e073b58861021b9e04361e2f8a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 24 Jun 2023 08:42:00 +0200 Subject: [PATCH 093/117] Bug in Two-stram instability fixed --- alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp | 1 + alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp index ca7efa343..dcd059bdf 100644 --- a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -303,6 +303,7 @@ int main(int argc, char *argv[]){ P->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); + P->initNUFFT(FL); P->scatter(); diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 51c308f8f..78042f404 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -133,6 +133,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { fftParams.add("tolerance", 1e-6); fftParams.add("use_cufinufft_defaults", false); + //fftParams.add("use_cufinufft_defaults", true); q.initializeNUFFT(FL, 1, fftParams); E.initializeNUFFT(FL, 2, fftParams); From ffcdaf782b3a76f019de48104b0a39c8aa039cc6 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 7 Jul 2023 08:24:17 +0200 Subject: [PATCH 094/117] FFT temporaries moved as member variables and sort ption removed in NUFFT as it is error prone --- .../BumponTailInstability.cpp | 4 +- alpine/ElectrostaticPIC/ChargedParticles.hpp | 2 +- alpine/ElectrostaticPIC/PenningTrap.cpp | 2 +- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 2 +- alpine/PinT/ChargedParticlesPinT.hpp | 2 +- src/FFT/FFT.h | 26 ++- src/FFT/FFT.hpp | 114 ++++++++---- src/Particle/ParticleAttrib.hpp | 2 +- test/FFT/TestNUFFT1.cpp | 164 +++++++++--------- 9 files changed, 193 insertions(+), 125 deletions(-) diff --git a/alpine/ElectrostaticPIC/BumponTailInstability.cpp b/alpine/ElectrostaticPIC/BumponTailInstability.cpp index c15cf60aa..07e595cbc 100644 --- a/alpine/ElectrostaticPIC/BumponTailInstability.cpp +++ b/alpine/ElectrostaticPIC/BumponTailInstability.cpp @@ -252,7 +252,7 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.5*dx;//0.05 + const double dt = std::atof(argv[9]);;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -383,6 +383,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpBumponTail(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -442,6 +443,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); P->dumpBumponTail(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 67b8f738f..61730648d 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -341,7 +341,7 @@ class ChargedParticles : public ippl::ParticleBase { rhoNorm_m = norm(rho_m); IpplTimings::stopTimer(sumTimer); - dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); //rho = rho_e - rho_i rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); diff --git a/alpine/ElectrostaticPIC/PenningTrap.cpp b/alpine/ElectrostaticPIC/PenningTrap.cpp index bdb0da505..4cb27474c 100644 --- a/alpine/ElectrostaticPIC/PenningTrap.cpp +++ b/alpine/ElectrostaticPIC/PenningTrap.cpp @@ -216,7 +216,7 @@ int main(int argc, char *argv[]){ Vector_t origin = {rmin[0], rmin[1], rmin[2]}; //unsigned int nrMax = 2048;// Max grid size in our studies //double dxFinest = rmax[0] / nrMax; - const double dt = 0.05;//0.5 * dxFinest;//size of timestep + const double dt = std::atof(argv[9]);;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 78042f404..b271372bc 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -128,7 +128,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { ippl::ParameterList fftParams; fftParams.add("gpu_method", 1); - fftParams.add("gpu_sort", 1); + fftParams.add("gpu_sort", 0); fftParams.add("gpu_kerevalmeth", 1); fftParams.add("tolerance", 1e-6); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index c951e17fd..9cb817dd5 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -165,7 +165,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { ippl::ParameterList fftParams; fftParams.add("gpu_method", 1); - fftParams.add("gpu_sort", 1); + fftParams.add("gpu_sort", 0); fftParams.add("gpu_kerevalmeth", 1); fftParams.add("tolerance", 1e-6); diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 2b41a9495..1b0fad70e 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -36,6 +36,7 @@ #include #include +#include "Types/IpplTypes.h" #include "FieldLayout/FieldLayout.h" #include "Field/Field.h" //#include "Particle/ParticleAttrib.h" @@ -175,6 +176,7 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backend; using workspace_t = typename heffte::fft3d::template buffer_container; + using view_type = typename detail::ViewType::view_type; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. @@ -202,6 +204,7 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_type tempField_m; }; @@ -220,6 +223,8 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backend; typedef Kokkos::complex Complex_t; using workspace_t = typename heffte::fft3d_r2c::template buffer_container; + using view_real_type = typename detail::ViewType::view_type; + using view_complex_type = typename detail::ViewType::view_type; typedef Field ComplexField_t; @@ -253,6 +258,8 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_real_type tempFieldf_m; + view_complex_type tempFieldg_m; }; @@ -269,6 +276,7 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backendSine; using workspace_t = typename heffte::fft3d::template buffer_container; + using view_type = typename detail::ViewType::view_type; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. @@ -294,6 +302,7 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_type tempField_m; }; /** @@ -309,6 +318,7 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backendCos; using workspace_t = typename heffte::fft3d::template buffer_container; + using view_type = typename detail::ViewType::view_type; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. @@ -334,6 +344,7 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_type tempField_m; }; @@ -353,20 +364,23 @@ namespace ippl { using complexType = typename detail::CufinufftType::complexType; using plan_t = typename detail::CufinufftType::plan_t; + using view_field_type = typename detail::ViewType::view_type; + using view_particle_real_type = typename detail::ViewType::view_type; + using view_particle_complex_type = typename detail::ViewType::view_type; /** Create a new FFT object with the layout for the input Field, type * (1 or 2) for the NUFFT and parameters for cuFINUFFT. */ - FFT(const Layout_t& layout, int type, const ParameterList& params); + FFT(const Layout_t& layout, const detail::size_type& localNp, int type, const ParameterList& params); // Destructor ~FFT(); /** Do the NUFFT. */ - template - void transform(const ParticleAttrib< Vector, Properties... >& R, - ParticleAttrib& Q, ComplexField_t& f); + template + void transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, ComplexField_t& f); private: @@ -382,6 +396,10 @@ namespace ippl { int ier_m; T tol_m; int type_m; + view_field_type tempField_m; + view_particle_real_type tempR_m[3] = {}; + view_particle_complex_type tempQ_m; + }; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 0698e1ceb..2c36113fa 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -74,6 +74,9 @@ namespace ippl { high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } setup(low, high, params); } @@ -148,11 +151,12 @@ namespace ippl { *2) heffte accepts data in layout left (by default) eventhough this *can be changed during heffte box creation */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); + //Kokkos::View + // tempField("tempField", fview.extent(0) - 2*nghost, + // fview.extent(1) - 2*nghost, + // fview.extent(2) - 2*nghost); + auto tempField = tempField_m; using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for("copy from Kokkos FFT", @@ -259,6 +263,14 @@ namespace ippl { lDomOutput[d].first() - 1); } + + if(tempFieldf_m.size() < lDomInput.size()) { + Kokkos::realloc(tempFieldf_m, lDomInput[0].length(), lDomInput[1].length(), lDomInput[2].length()); + } + if(tempFieldg_m.size() < lDomOutput.size()) { + Kokkos::realloc(tempFieldg_m, lDomOutput[0].length(), lDomOutput[1].length(), lDomOutput[2].length()); + } + setup(lowInput, highInput, lowOutput, highOutput, params); } @@ -337,16 +349,18 @@ namespace ippl { *2) heffte accepts data in layout left (by default) eventhough this *can be changed during heffte box creation */ - Kokkos::View - tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, - fview.extent(1) - 2*nghostf, - fview.extent(2) - 2*nghostf); - - Kokkos::View - tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, - gview.extent(1) - 2*nghostg, - gview.extent(2) - 2*nghostg); - + //Kokkos::View + // tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, + // fview.extent(1) - 2*nghostf, + // fview.extent(2) - 2*nghostf); + + //Kokkos::View + // tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, + // gview.extent(1) - 2*nghostg, + // gview.extent(2) - 2*nghostg); + + auto tempFieldf = tempFieldf_m; + auto tempFieldg = tempFieldg_m; using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for("copy from Kokkos f field in FFT", @@ -463,6 +477,9 @@ namespace ippl { high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } setup(low, high, params); } @@ -534,11 +551,12 @@ namespace ippl { *2) heffte accepts data in layout left (by default) eventhough this *can be changed during heffte box creation */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); + //Kokkos::View + // tempField("tempField", fview.extent(0) - 2*nghost, + // fview.extent(1) - 2*nghost, + // fview.extent(2) - 2*nghost); + auto tempField = tempField_m; using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for("copy from Kokkos FFT", @@ -624,6 +642,9 @@ namespace ippl { high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } setup(low, high, params); } @@ -696,11 +717,12 @@ namespace ippl { *2) heffte accepts data in layout left (by default) eventhough this *can be changed during heffte box creation */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); + //Kokkos::View + // tempField("tempField", fview.extent(0) - 2*nghost, + // fview.extent(1) - 2*nghost, + // fview.extent(2) - 2*nghost); + auto tempField = tempField_m; using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for("copy from Kokkos FFT", @@ -762,6 +784,7 @@ namespace ippl { template FFT::FFT(const Layout_t& layout, + const detail::size_type& localNp, int type, const ParameterList& params) { @@ -783,6 +806,17 @@ namespace ippl { } type_m = type; + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } + for(size_t d = 0; d < Dim; ++d) { + if(tempR_m[d].size() < localNp) { + Kokkos::realloc(tempR_m[d], localNp); + } + } + if(tempQ_m.size() < localNp) { + Kokkos::realloc(tempQ_m, localNp); + } setup(nmodes, params); } @@ -832,10 +866,10 @@ namespace ippl { template - template + template void - FFT::transform(const ParticleAttrib< Vector, Properties... >& R, - ParticleAttrib& Q, + FFT::transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, typename FFT::ComplexField_t& f) { @@ -865,24 +899,30 @@ namespace ippl { * cuFINUFFT's layout is left, hence we allocate the temporary * Kokkos views with the same layout */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); + //Kokkos::View + // tempField("tempField", fview.extent(0) - 2*nghost, + // fview.extent(1) - 2*nghost, + // fview.extent(2) - 2*nghost); - //Initialize the pointers to NULL and fill only relevant dimensions - //CUFINUFFT requires the input like this. - Kokkos::View tempR[3] = {}; + ////Initialize the pointers to NULL and fill only relevant dimensions + ////CUFINUFFT requires the input like this. + //Kokkos::View tempR[3] = {}; - for(size_t d = 0; d < Dim; ++d) { - Kokkos::realloc(tempR[d], localNp); - } + //for(size_t d = 0; d < Dim; ++d) { + // Kokkos::realloc(tempR[d], localNp); + //} - Kokkos::View tempQ("tempQ", localNp); - + //Kokkos::View tempQ("tempQ", localNp); + + auto tempField = tempField_m; + auto tempQ = tempQ_m; + Kokkos::View tempR[3] = {}; + for(size_t d = 0; d < Dim; ++d) { + tempR[d] = tempR_m[d]; + } using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for("copy from field data NUFFT", diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 7e45c5071..93f8620b3 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -497,7 +497,7 @@ namespace ippl { template void ParticleAttrib::initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams) { - fftType_mp = std::make_shared>(layout, type, fftParams); + fftType_mp = std::make_shared>(layout, *(this->localNum_mp), type, fftParams); } diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index a020c4c79..0e261b035 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -73,7 +73,7 @@ int main(int argc, char *argv[]) { typedef Bunch bunch_type; - ippl::Vector pt = {32, 32, 32}; + ippl::Vector pt = {512, 512, 512}; ippl::Index I(pt[0]); ippl::Index J(pt[1]); ippl::Index K(pt[2]); @@ -91,10 +91,16 @@ int main(int argc, char *argv[]) { 2.0 * pi / double(pt[2]), }; + //std::array dx = { + // 25.0 / double(pt[0]), + // 25.0 / double(pt[1]), + // 25.0 / double(pt[2]), + //}; typedef ippl::Vector Vector_t; Vector_t hx = {dx[0], dx[1], dx[2]}; Vector_t origin = {-pi, -pi, -pi}; + //Vector_t origin = {0, 0, 0}; ippl::UniformCartesian mesh(owned, hx, origin); playout_type pl(layout, mesh); @@ -105,7 +111,7 @@ int main(int argc, char *argv[]) { using size_type = ippl::detail::size_type; - size_type Np = std::pow(32,3) * 20; + size_type Np = std::pow(512,3) * 5; typedef ippl::Field, dim> field_type; @@ -115,9 +121,9 @@ int main(int argc, char *argv[]) { ippl::ParameterList fftParams; fftParams.add("gpu_method", 1); - fftParams.add("gpu_sort", 1); + fftParams.add("gpu_sort", 0); fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-10); + fftParams.add("tolerance", 1e-6); fftParams.add("use_cufinufft_defaults", false); @@ -127,15 +133,17 @@ int main(int argc, char *argv[]) { int type = 1; - fft = std::make_unique(layout, type, fftParams); Vector_t minU = {-pi, -pi, -pi}; Vector_t maxU = {pi, pi, pi}; + //Vector_t minU = {0.0, 0.0, 0.0}; + //Vector_t maxU = {25.0, 25.0, 25.0}; size_type nloc = Np/Ippl::Comm->size(); bunch.create(nloc); + fft = std::make_unique(layout, nloc, type, fftParams); Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42)); Kokkos::parallel_for(nloc, generate_random, dim>( @@ -167,82 +175,82 @@ int main(int argc, char *argv[]) { auto Qview = bunch.Q.getView(); Kokkos::complex imag = {0.0, 1.0}; - size_t flatN = pt[0] * pt[1] * pt[2]; - auto fview = field_dft.getView(); + //size_t flatN = pt[0] * pt[1] * pt[2]; + //auto fview = field_dft.getView(); - typedef Kokkos::TeamPolicy<> team_policy; - typedef Kokkos::TeamPolicy<>::member_type member_type; - - Kokkos::parallel_for("NUDFT type 1", - team_policy(flatN, Kokkos::AUTO), - KOKKOS_LAMBDA(const member_type& teamMember) { - const size_t flatIndex = teamMember.league_rank(); - - const int k = (int)(flatIndex / (pt[0] * pt[1])); - const int flatIndex2D = flatIndex - (k * pt[0] * pt[1]); - const int i = flatIndex2D % pt[0]; - const int j = (int)(flatIndex2D / pt[0]); - - Kokkos::complex reducedValue = 0.0; - ippl::Vector iVec = {i, j, k}; - ippl::VectorkVec; - for(size_t d = 0; d < 3; ++d) { - kVec[d] = (2.0 * pi / (maxU[d] - minU[d])) * (iVec[d] - (pt[d] / 2)); - } - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, nloc), - [=](const size_t idx, Kokkos::complex& innerReduce) - { - double arg = 0.0; - for(size_t d = 0; d < 3; ++d) { - arg += kVec[d]*Rview(idx)[d]; - } - const double& val = Qview(idx); - - innerReduce += (Kokkos::cos(arg) - - imag * Kokkos::sin(arg)) * val; - }, Kokkos::Sum>(reducedValue)); - - if(teamMember.team_rank() == 0) { - fview(i+nghost,j+nghost,k+nghost) = reducedValue; - } - - }); - - typename field_type::HostMirror rhoNUDFT_host = field_dft.getHostMirror(); - Kokkos::deep_copy(rhoNUDFT_host, field_dft.getView()); - std::stringstream pname; - pname << "data/FieldFFT_"; - pname << Ippl::Comm->rank(); - pname << ".csv"; - Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); - pcsvout.precision(10); - pcsvout.setf(std::ios::scientific, std::ios::floatfield); - pcsvout << "rho" << endl; - for (int i = 0; i< pt[0]; i++) { - for (int j = 0; j< pt[1]; j++) { - for (int k = 0; k< pt[2]; k++) { - pcsvout << field_result(i+nghost,j+nghost, k+nghost) << endl; - } - } - } - std::stringstream pname2; - pname2 << "data/FieldDFT_"; - pname2 << Ippl::Comm->rank(); - pname2 << ".csv"; - Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); - pcsvout2.precision(10); - pcsvout2.setf(std::ios::scientific, std::ios::floatfield); - pcsvout2 << "rho" << endl; - for (int i = 0; i< pt[0]; i++) { - for (int j = 0; j< pt[1]; j++) { - for (int k = 0; k< pt[2]; k++) { - pcsvout2 << rhoNUDFT_host(i+nghost,j+nghost, k+nghost) << endl; - } - } - } - Ippl::Comm->barrier(); + //typedef Kokkos::TeamPolicy<> team_policy; + //typedef Kokkos::TeamPolicy<>::member_type member_type; + + //Kokkos::parallel_for("NUDFT type 1", + // team_policy(flatN, Kokkos::AUTO), + // KOKKOS_LAMBDA(const member_type& teamMember) { + // const size_t flatIndex = teamMember.league_rank(); + // + // const int k = (int)(flatIndex / (pt[0] * pt[1])); + // const int flatIndex2D = flatIndex - (k * pt[0] * pt[1]); + // const int i = flatIndex2D % pt[0]; + // const int j = (int)(flatIndex2D / pt[0]); + // + // Kokkos::complex reducedValue = 0.0; + // ippl::Vector iVec = {i, j, k}; + // ippl::VectorkVec; + // for(size_t d = 0; d < 3; ++d) { + // kVec[d] = (2.0 * pi / (maxU[d] - minU[d])) * (iVec[d] - (pt[d] / 2)); + // } + // Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, nloc), + // [=](const size_t idx, Kokkos::complex& innerReduce) + // { + // double arg = 0.0; + // for(size_t d = 0; d < 3; ++d) { + // arg += kVec[d]*Rview(idx)[d]; + // } + // const double& val = Qview(idx); + + // innerReduce += (Kokkos::cos(arg) + // - imag * Kokkos::sin(arg)) * val; + // }, Kokkos::Sum>(reducedValue)); + + // if(teamMember.team_rank() == 0) { + // fview(i+nghost,j+nghost,k+nghost) = reducedValue; + // } + + // }); + // + //typename field_type::HostMirror rhoNUDFT_host = field_dft.getHostMirror(); + //Kokkos::deep_copy(rhoNUDFT_host, field_dft.getView()); + //std::stringstream pname; + //pname << "data/FieldFFT_"; + //pname << Ippl::Comm->rank(); + //pname << ".csv"; + //Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + //pcsvout.precision(10); + //pcsvout.setf(std::ios::scientific, std::ios::floatfield); + //pcsvout << "rho" << endl; + //for (int i = 0; i< pt[0]; i++) { + // for (int j = 0; j< pt[1]; j++) { + // for (int k = 0; k< pt[2]; k++) { + // pcsvout << field_result(i+nghost,j+nghost, k+nghost) << endl; + // } + // } + //} + //std::stringstream pname2; + //pname2 << "data/FieldDFT_"; + //pname2 << Ippl::Comm->rank(); + //pname2 << ".csv"; + //Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + //pcsvout2.precision(10); + //pcsvout2.setf(std::ios::scientific, std::ios::floatfield); + //pcsvout2 << "rho" << endl; + //for (int i = 0; i< pt[0]; i++) { + // for (int j = 0; j< pt[1]; j++) { + // for (int k = 0; k< pt[2]; k++) { + // pcsvout2 << rhoNUDFT_host(i+nghost,j+nghost, k+nghost) << endl; + // } + // } + // } + // Ippl::Comm->barrier(); From 3b744169fd4e650d2f8c7d02c2f01e641412daf5 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 31 Jul 2023 15:19:05 +0200 Subject: [PATCH 095/117] Uncommited changes committed and pushed --- alpine/ElectrostaticPIC/ChargedParticles.hpp | 2 +- alpine/ElectrostaticPIC/LandauDamping.cpp | 2 +- alpine/PinT/LandauDampingPinT.cpp | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 61730648d..67b8f738f 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -341,7 +341,7 @@ class ChargedParticles : public ippl::ParticleBase { rhoNorm_m = norm(rho_m); IpplTimings::stopTimer(sumTimer); - //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); //rho = rho_e - rho_i rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); diff --git a/alpine/ElectrostaticPIC/LandauDamping.cpp b/alpine/ElectrostaticPIC/LandauDamping.cpp index fde09c024..0aed5ebc8 100644 --- a/alpine/ElectrostaticPIC/LandauDamping.cpp +++ b/alpine/ElectrostaticPIC/LandauDamping.cpp @@ -202,7 +202,7 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; - double alpha = 0.05; + double alpha = 0.5; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; double dx = rmax[0] / nr[0]; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index ead8c38c7..4c2fe9e5f 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -463,7 +463,8 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; //double alpha = 0.05; - Vector_t alpha = {0.05, 0.05, 0.05}; + //Vector_t alpha = {0.05, 0.05, 0.05}; + Vector_t alpha = {0.5, 0.5, 0.5}; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; Vector_t length = rmax - rmin; From 061854f00a38ec40c3d68b81400b079486dc0b2f Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 5 Sep 2023 17:43:51 +0200 Subject: [PATCH 096/117] Uncommited changes pushed --- alpine/PinT/LandauDampingPinT.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 4c2fe9e5f..93ced88d4 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -463,8 +463,8 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; //double alpha = 0.05; - //Vector_t alpha = {0.05, 0.05, 0.05}; - Vector_t alpha = {0.5, 0.5, 0.5}; + Vector_t alpha = {0.05, 0.05, 0.05}; + //Vector_t alpha = {0.5, 0.5, 0.5}; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; Vector_t length = rmax - rmin; From 7497f8eecec808fe979896ad5f0a14cbfa22f8b6 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 20 Dec 2023 05:43:00 -0800 Subject: [PATCH 097/117] State corresponding to Perlmutter full system scaling study --- alpine/PinT/BumponTailInstabilityPinT.cpp | 16 +++---- alpine/PinT/ChargedParticlesPinT.hpp | 54 +++++++++++------------ alpine/PinT/LandauDampingPinT.cpp | 14 +++--- alpine/PinT/PenningTrapPinT.cpp | 8 ++-- src/FFT/FFT.h | 22 ++++----- src/FFT/FFT.hpp | 20 +++++---- test/FFT/TestNUFFT2.cpp | 8 ++-- 7 files changed, 73 insertions(+), 69 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index e965bf997..fe5d4c3fa 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -436,7 +436,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); - static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); @@ -884,13 +884,13 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); - //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { - //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - //} - IpplTimings::stopTimer(dumpData); + //IpplTimings::startTimer(dumpData); + ////Pcoarse->writeError(Rerror, Perror, it+1); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + ////if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { + ////Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); + ////} + //IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 9cb817dd5..0e2d5912b 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -975,11 +975,11 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter, int rankTime, int rankSpace, + const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, + const unsigned int& /*iter*/, int /*rankTime*/, int /*rankSpace*/, MPI_Comm& spaceComm) { - static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); @@ -995,13 +995,13 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - if((time_m == 0.0)) { - IpplTimings::startTimer(dumpData); - //dumpLandau(iter); - dumpBumponTail(nc, iter, rankTime, rankSpace); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - IpplTimings::stopTimer(dumpData); - } + //if((time_m == 0.0)) { + // IpplTimings::startTimer(dumpData); + // //dumpLandau(iter); + // dumpBumponTail(nc, iter, rankTime, rankSpace); + // dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + // IpplTimings::stopTimer(dumpData); + //} for (unsigned int it=0; it { time_m += dt; - IpplTimings::startTimer(dumpData); - //dumpLandau(iter); - dumpBumponTail(nc, iter, rankTime, rankSpace); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - IpplTimings::stopTimer(dumpData); + //IpplTimings::startTimer(dumpData); + ////dumpLandau(iter); + //dumpBumponTail(nc, iter, rankTime, rankSpace); + //dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //IpplTimings::stopTimer(dumpData); } } @@ -1044,12 +1044,12 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter, const double& Bext, - int rankTime, int rankSpace, + const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, + const unsigned int& /*iter*/, const double& Bext, + int /*rankTime*/, int /*rankSpace*/, MPI_Comm& spaceComm) { - static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); @@ -1065,11 +1065,11 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - if((time_m == 0.0)) { - IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - IpplTimings::stopTimer(dumpData); - } + //if((time_m == 0.0)) { + // IpplTimings::startTimer(dumpData); + // dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + // IpplTimings::stopTimer(dumpData); + //} double alpha = -0.5 * dt; double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); Vector_t rmax = rmax_m; @@ -1144,9 +1144,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; - IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - IpplTimings::stopTimer(dumpData); + //IpplTimings::startTimer(dumpData); + //dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //IpplTimings::stopTimer(dumpData); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 4c2fe9e5f..e2419b146 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -419,7 +419,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); - static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); @@ -463,8 +463,8 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; //double alpha = 0.05; - //Vector_t alpha = {0.05, 0.05, 0.05}; - Vector_t alpha = {0.5, 0.5, 0.5}; + Vector_t alpha = {0.05, 0.05, 0.05}; + //Vector_t alpha = {0.5, 0.5, 0.5}; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; Vector_t length = rmax - rmin; @@ -814,10 +814,10 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); - IpplTimings::stopTimer(dumpData); + //IpplTimings::startTimer(dumpData); + ////Pcoarse->writeError(Rerror, Perror, it+1); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 3f7d2b587..3733bfca7 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -440,7 +440,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); - static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); static IpplTimings::TimerRef initializeCycles = IpplTimings::getTimer("initializeCycles"); @@ -859,11 +859,11 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - IpplTimings::startTimer(dumpData); + //IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - IpplTimings::stopTimer(dumpData); + //IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 1b0fad70e..434495c91 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -130,11 +130,11 @@ namespace ippl { template <> struct CufinufftType { - std::function makeplan = cufinufftf_makeplan; - std::function setpts = cufinufftf_setpts; - std::function execute = cufinufftf_execute; + std::function makeplan = cufinufftf_makeplan; + std::function setpts = cufinufftf_setpts; + std::function execute = cufinufftf_execute; std::function destroy = cufinufftf_destroy; using complexType = cuFloatComplex; @@ -143,11 +143,11 @@ namespace ippl { template <> struct CufinufftType { - std::function makeplan = cufinufft_makeplan; - std::function setpts = cufinufft_setpts; - std::function execute = cufinufft_execute; + std::function makeplan = cufinufft_makeplan; + std::function setpts = cufinufft_setpts; + std::function execute = cufinufft_execute; std::function destroy = cufinufft_destroy; using complexType = cuDoubleComplex; @@ -388,7 +388,7 @@ namespace ippl { /** setup performs the initialization necessary. */ - void setup(std::array& nmodes, + void setup(std::array& nmodes, const ParameterList& params); detail::CufinufftType nufft_m; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 2c36113fa..b87f9f5fb 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -795,7 +795,7 @@ namespace ippl { * where we fill 0. */ - std::array nmodes; + std::array nmodes; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -826,7 +826,7 @@ namespace ippl { */ template void - FFT::setup(std::array& nmodes, + FFT::setup(std::array& nmodes, const ParameterList& params) { @@ -841,8 +841,9 @@ namespace ippl { opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); } - int maxbatchsize = 0; //default option. ignored for ntransf = 1 which - // is our case + opts.gpu_maxbatchsize = 0; //default option. ignored for ntransf = 1 which + // is our case + opts.gpu_device_id = (int)(Ippl::Comm->rank() % 4); int iflag; @@ -859,7 +860,7 @@ namespace ippl { //dim in cufinufft is int int dim = static_cast(Dim); ier_m = nufft_m.makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, - maxbatchsize, &plan_m, &opts); + &plan_m, &opts); } @@ -884,6 +885,7 @@ namespace ippl { const Layout_t& layout = f.getLayout(); const UniformCartesian& mesh = f.get_mesh(); const Vector& dx = mesh.getMeshSpacing(); + const Vector& origin = mesh.getOrigin(); const auto& domain = layout.getDomain(); Vector Len; Vector N; @@ -947,16 +949,16 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i) { for(size_t d = 0; d < Dim; ++d) { - tempR[d](i) = Rview(i)[d] * (2.0 * pi / Len[d]); + tempR[d](i) = (Rview(i)[d] - origin[d]) * (2.0 * pi / Len[d]); } tempQ(i).x = Qview(i); tempQ(i).y = 0.0; }); - ier_m = nufft_m.setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, - NULL, NULL, NULL, plan_m); + ier_m = nufft_m.setpts(plan_m, localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + NULL, NULL, NULL); - ier_m = nufft_m.execute(tempQ.data(), tempField.data(), plan_m); + ier_m = nufft_m.execute(plan_m, tempQ.data(), tempField.data()); Kokkos::fence(); diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp index 8ffaf6827..f55351db7 100644 --- a/test/FFT/TestNUFFT2.cpp +++ b/test/FFT/TestNUFFT2.cpp @@ -148,14 +148,16 @@ int main(int argc, char *argv[]) { int type = 2; - fft = std::make_unique(layout, type, fftParams); - - Vector_t minU = {-pi, -pi, -pi}; Vector_t maxU = {pi, pi, pi}; size_type nloc = Np/Ippl::Comm->size(); + + fft = std::make_unique(layout, nloc, type, fftParams); + + + const int nghost = field.getNghost(); using mdrange_type = Kokkos::MDRangePolicy>; auto fview = field.getView(); From 1343b2a60f622176d5fa5b4b61f077c6370afd78 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 3 Jan 2024 06:51:10 +0100 Subject: [PATCH 098/117] Tweaks needed for finufft 2.2.0 and stages 2024 --- CMakeLists.txt | 3 ++- alpine/ElectrostaticPIC/ChargedParticles.hpp | 2 +- src/FFT/FFT.hpp | 8 +++++--- src/Types/ViewTypes.h | 1 + 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f15ec370..f27d10ac4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,8 @@ endif () add_compile_options (-Wall) add_compile_options (-Wunused) add_compile_options (-Wextra) -add_compile_options (-Werror) +#add_compile_options (-Werror) + # allow deprecated functions add_compile_options (-Wno-deprecated-declarations) diff --git a/alpine/ElectrostaticPIC/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp index 67b8f738f..61730648d 100644 --- a/alpine/ElectrostaticPIC/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -341,7 +341,7 @@ class ChargedParticles : public ippl::ParticleBase { rhoNorm_m = norm(rho_m); IpplTimings::stopTimer(sumTimer); - dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); //rho = rho_e - rho_i rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index b87f9f5fb..acb06d7bd 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -831,7 +831,7 @@ namespace ippl { { cufinufft_opts opts; - ier_m = cufinufft_default_opts(type_m, Dim, &opts); + cufinufft_default_opts(&opts); tol_m = 1e-6; if(!params.get("use_cufinufft_defaults")) { @@ -841,9 +841,11 @@ namespace ippl { opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); } - opts.gpu_maxbatchsize = 0; //default option. ignored for ntransf = 1 which + opts.gpu_maxbatchsize = 0; //default option. ignored for ntransf = 1 which // is our case - opts.gpu_device_id = (int)(Ippl::Comm->rank() % 4); + //For Perlmutter since the mask to hide the other GPUs in the node is + //somehow not working there + //opts.gpu_device_id = (int)(Ippl::Comm->rank() % 4); int iflag; diff --git a/src/Types/ViewTypes.h b/src/Types/ViewTypes.h index 7cfc4238d..a8877926d 100644 --- a/src/Types/ViewTypes.h +++ b/src/Types/ViewTypes.h @@ -19,6 +19,7 @@ #define IPPL_VIEW_TYPES_H #include +#include namespace ippl { /** From 1cf99a32015bee0800eaba9e3fada507077f5afa Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 4 Jan 2024 08:24:01 +0100 Subject: [PATCH 099/117] Code modified for using NUFFT of higher tolerance also as coarse propagator --- alpine/PinT/ChargedParticlesPinT.hpp | 121 ++++++++++++++++++++++----- alpine/PinT/LandauDampingPinT.cpp | 20 ++++- src/FFT/FFT.h | 3 + src/Particle/ParticleAttrib.h | 16 ++-- src/Particle/ParticleAttrib.hpp | 26 +++--- 5 files changed, 145 insertions(+), 41 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 0e2d5912b..7c6d6bcad 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -52,6 +52,9 @@ extern const char* TestName; template class ChargedParticlesPinT : public ippl::ParticleBase { public: + + //using nufft_t = typename ippl::FFT; + CxField_t rhoPIF_m; Field_t Sk_m; Field_t rhoPIC_m; @@ -79,6 +82,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { int shapedegree_m; + //nufft_t nufftType1Fine_m,nufftType2Fine_m,nufftType1Coarse_m,nufftType2Coarse_m; + std::shared_ptr> nufftType1Fine_mp,nufftType2Fine_mp,nufftType1Coarse_mp,nufftType2Coarse_mp; + public: ParticleAttrib q; // charge typename ippl::ParticleBase::particle_position_type P; // G(P^(k)_n) @@ -161,20 +167,51 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } - void initNUFFT(FieldLayout_t& FLPIF) { - ippl::ParameterList fftParams; + //void initNUFFT(FieldLayout_t& FLPIF, double& tol) { + // ippl::ParameterList fftParams; - fftParams.add("gpu_method", 1); - fftParams.add("gpu_sort", 0); - fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-6); + // fftParams.add("gpu_method", 1); + // fftParams.add("gpu_sort", 0); + // fftParams.add("gpu_kerevalmeth", 1); + // //fftParams.add("tolerance", 1e-6); + // fftParams.add("tolerance", tol); - fftParams.add("use_cufinufft_defaults", false); + // fftParams.add("use_cufinufft_defaults", false); - q.initializeNUFFT(FLPIF, 1, fftParams); - E.initializeNUFFT(FLPIF, 2, fftParams); - } + // q.initializeNUFFT(FLPIF, 1, fftParams); + // E.initializeNUFFT(FLPIF, 2, fftParams); + //} + + void initNUFFTs(FieldLayout_t& FLPIF, double& coarseTol, + double& fineTol) { + + ippl::ParameterList fftCoarseParams,fftFineParams; + + fftFineParams.add("gpu_method", 1); + fftFineParams.add("gpu_sort", 0); + fftFineParams.add("gpu_kerevalmeth", 1); + fftFineParams.add("tolerance", fineTol); + + fftCoarseParams.add("gpu_method", 1); + fftCoarseParams.add("gpu_sort", 0); + fftCoarseParams.add("gpu_kerevalmeth", 1); + fftCoarseParams.add("tolerance", coarseTol); + fftFineParams.add("use_cufinufft_defaults", false); + fftCoarseParams.add("use_cufinufft_defaults", false); + + //nufftType1Fine_m = nufft_t(FLPIF, this->getLocalNum(), 1, fftFineParams); + //nufftType2Fine_m = nufft_t(FLPIF, this->getLocalNum(), 2, fftFineParams); + + //nufftType1Coarse_m = nufft_t(FLPIF, this->getLocalNum(), 1, fftCoarseParams); + //nufftType2Coarse_m = nufft_t(FLPIF, this->getLocalNum(), 2, fftCoarseParams); + nufftType1Fine_mp = std::make_shared>(FLPIF, this->getLocalNum(), 1, fftFineParams); + nufftType2Fine_mp = std::make_shared>(FLPIF, this->getLocalNum(), 2, fftFineParams); + + nufftType1Coarse_mp = std::make_shared>(FLPIF, this->getLocalNum(), 1, fftCoarseParams); + nufftType2Coarse_mp = std::make_shared>(FLPIF, this->getLocalNum(), 2, fftCoarseParams); + } + void initializeParareal(ParticleAttrib& Rbegin, ParticleAttrib& Pbegin, ParticleAttrib& Rcoarse, @@ -977,19 +1014,31 @@ class ChargedParticlesPinT : public ippl::ParticleBase { ParticleAttrib& Ptemp, const unsigned int& nt, const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, const unsigned int& /*iter*/, int /*rankTime*/, int /*rankSpace*/, - MPI_Comm& spaceComm) { + const std::string& propagator, MPI_Comm& spaceComm) { //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); + if(propagator == "Coarse") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); + } + else if(propagator == "Fine") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); + } + rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + if(propagator == "Coarse") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Coarse_mp.get(), q); + } + else if(propagator == "Fine") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); + } + //gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); q = Q_m / Np_m; @@ -1018,12 +1067,24 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); + if(propagator == "Coarse") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); + } + else if(propagator == "Fine") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); + } + //scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + if(propagator == "Coarse") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Coarse_mp.get(), q); + } + else if(propagator == "Fine") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); + } + //gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); q = Q_m / Np_m; @@ -1047,19 +1108,29 @@ class ChargedParticlesPinT : public ippl::ParticleBase { const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, const unsigned int& /*iter*/, const double& Bext, int /*rankTime*/, int /*rankSpace*/, - MPI_Comm& spaceComm) { + const std::string& propagator, MPI_Comm& spaceComm) { //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); + if(propagator == "Coarse") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); + } + else if(propagator == "Fine") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); + } rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + if(propagator == "Coarse") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Coarse_mp.get(), q); + } + else if(propagator == "Fine") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); + } q = Q_m / Np_m; @@ -1113,12 +1184,22 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); + if(propagator == "Coarse") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); + } + else if(propagator == "Fine") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); + } rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); // Solve for and gather E field - gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + if(propagator == "Coarse") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Coarse_mp.get(), q); + } + else if(propagator == "Fine") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); + } q = Q_m / Np_m; //kick diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index e2419b146..3a17b804a 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -547,7 +547,12 @@ int main(int argc, char *argv[]){ Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - Pcoarse->initNUFFT(FLPIF); + //Pcoarse->initNUFFT(FLPIF); + double coarseTol = 1e-2; + double fineTol = 1e-6; + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial @@ -609,7 +614,9 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); + //Pcoarse->initNUFFT(FLPIF, coarseTol); Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -739,7 +746,8 @@ int main(int argc, char *argv[]){ while (!isConverged) { //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, spaceComm); + //Pcoarse->initNUFFT(FLPIF, fineTol); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, fine, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -778,7 +786,9 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + //Pcoarse->initNUFFT(FLPIF, coarseTol); + //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -862,7 +872,9 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + //Pcoarse->initNUFFT(FLPIF, coarseTol); + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 434495c91..816ae8e4b 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -368,6 +368,9 @@ namespace ippl { using view_particle_real_type = typename detail::ViewType::view_type; using view_particle_complex_type = typename detail::ViewType::view_type; + + FFT() = default; + /** Create a new FFT object with the layout for the input Field, type * (1 or 2) for the NUFFT and parameters for cuFINUFFT. */ diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index aeb0df5f0..10b391d69 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -176,19 +176,21 @@ namespace ippl { const ParticleAttrib, Properties... >& pp); #ifdef KOKKOS_ENABLE_CUDA - template - void initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams); + //template + //void initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams); template void scatterPIFNUFFT(Field& f, Field& Sk, const ParticleAttrib, Properties... >& pp, + FFT* nufft, const MPI_Comm& spaceComm) const; template void gatherPIFNUFFT(Field& f, Field& Sk, const ParticleAttrib, Properties... >& pp, + FFT* nufft, ParticleAttrib& q); #endif @@ -199,11 +201,11 @@ namespace ippl { private: view_type dview_m; -#ifdef KOKKOS_ENABLE_CUDA - //TODO: Remove hard-coded dimension by having Dim as template - //parameter. Does this need to be in CUDA ifdefs? - std::shared_ptr> fftType_mp; -#endif +//#ifdef KOKKOS_ENABLE_CUDA +// //TODO: Remove hard-coded dimension by having Dim as template +// //parameter. Does this need to be in CUDA ifdefs? +// std::shared_ptr> fftType_mp; +//#endif }; } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 93f8620b3..41b11f220 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -493,12 +493,12 @@ namespace ippl { #ifdef KOKKOS_ENABLE_CUDA - template - template - void ParticleAttrib::initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams) { - - fftType_mp = std::make_shared>(layout, *(this->localNum_mp), type, fftParams); - } + //template + //template + //void ParticleAttrib::initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams) { + // + // fftType_mp = std::make_shared>(layout, *(this->localNum_mp), type, fftParams); + //} @@ -506,6 +506,7 @@ namespace ippl { template void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp, + FFT* nufft, const MPI_Comm& spaceComm) const { @@ -524,7 +525,8 @@ namespace ippl { tempField = 0.0; - fftType_mp->transform(pp, q, tempField); + //fftType_mp->transform(pp, q, tempField); + nufft->transform(pp, q, tempField); //fftType_mp->transform(pp, q, f); @@ -566,6 +568,7 @@ namespace ippl { template void ParticleAttrib::gatherPIFNUFFT(Field& f, Field& Sk, const ParticleAttrib< Vector, Properties... >& pp, + FFT* nufft, ParticleAttrib& q) { static IpplTimings::TimerRef gatherPIFNUFFTTimer = IpplTimings::getTimer("GatherPIFNUFFT"); @@ -631,7 +634,8 @@ namespace ippl { tempview(i, j, k) *= -Skview(i, j, k) * (imag * kVec[gd] * factor); }); - fftType_mp->transform(pp, q, tempField); + //fftType_mp->transform(pp, q, tempField); + nufft->transform(pp, q, tempField); Kokkos::parallel_for("Assign E gather NUFFT", Np, @@ -651,10 +655,11 @@ namespace ippl { inline void scatterPIFNUFFT(const ParticleAttrib& attrib, Field& f, Field& Sk, const ParticleAttrib, Properties...>& pp, + FFT* nufft, const MPI_Comm& spaceComm = MPI_COMM_WORLD) { #ifdef KOKKOS_ENABLE_CUDA - attrib.scatterPIFNUFFT(f, Sk, pp, spaceComm); + attrib.scatterPIFNUFFT(f, Sk, pp, nufft, spaceComm); #else //throw IpplException("scatterPIFNUFFT", "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to // be compiled with CUDA. Otherwise use scatterPIFNUDFT."); @@ -665,10 +670,11 @@ namespace ippl { inline void gatherPIFNUFFT(ParticleAttrib& attrib, Field& f, Field& Sk, const ParticleAttrib, Properties...>& pp, + FFT* nufft, ParticleAttrib& q) { #ifdef KOKKOS_ENABLE_CUDA - attrib.gatherPIFNUFFT(f, Sk, pp, q); + attrib.gatherPIFNUFFT(f, Sk, pp, nufft, q); #else //throw IpplException("gatherPIFNUFFT", // "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to From f67d5ae3313ab99a2e0e670db2d0b18a776e737a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 9 Jan 2024 09:32:38 +0100 Subject: [PATCH 100/117] In the middle of trying to understand why particles creation take so much time --- alpine/PinT/ChargedParticlesPinT.hpp | 6 ++- alpine/PinT/LandauDampingPinT.cpp | 7 ++- alpine/PinT/PenningTrapPinT.cpp | 80 ++++++++++++++++++++++++---- 3 files changed, 79 insertions(+), 14 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 7c6d6bcad..235341cfb 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -1136,7 +1136,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - //if((time_m == 0.0)) { + //if((time_m == 0.0) && (propagator == "Fine")) { // IpplTimings::startTimer(dumpData); // dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); // IpplTimings::stopTimer(dumpData); @@ -1226,7 +1226,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; //IpplTimings::startTimer(dumpData); - //dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //if(propagator == "Fine") { + // dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //} //IpplTimings::stopTimer(dumpData); } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 3a17b804a..e42c13a39 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -615,8 +615,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); //Pcoarse->initNUFFT(FLPIF, coarseTol); - Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); - //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -787,6 +787,9 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(coarsePropagator); //Pcoarse->initNUFFT(FLPIF, coarseTol); + //double coarseTol = (double)(std::pow(0.1,std::min((int)(it+2),3))); + //double fineTol = 1e-6; + //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(coarsePropagator); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 3733bfca7..759027652 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -440,10 +440,13 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); - //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); static IpplTimings::TimerRef initializeCycles = IpplTimings::getTimer("initializeCycles"); + static IpplTimings::TimerRef initialComm = IpplTimings::getTimer("initialComm"); + static IpplTimings::TimerRef initialCoarse = IpplTimings::getTimer("initialCoarse"); + static IpplTimings::TimerRef warmupStep = IpplTimings::getTimer("warmupStep"); IpplTimings::startTimer(mainTimer); @@ -552,7 +555,6 @@ int main(int argc, char *argv[]){ Pcoarse->initFFTSolver(); - IpplTimings::startTimer(particleCreation); Vector_t minU, maxU; for (unsigned d = 0; d initNUFFT(FLPIF); + double coarseTol = 1e-3; + double fineTol = 1e-6; + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; + + //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + //IpplTimings::startTimer(warmupStep); + //if(rankTime == 0) { + // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); + // Kokkos::parallel_for(nloc, + // generate_random, Dim>( + // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, + // minU, maxU)); + + + // Kokkos::fence(); + //} + //else { + // size_type bufSize = Pbegin->packedSize(nloc); + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + // Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + // buf->resetReadPos(); + //} + + //Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + //Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + //Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + //Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + ////Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + //Pcoarse->BorisPIF(Pend->R, Pend->P, 45, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + // + //Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + //Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + //if(rankTime < sizeTime-1) { + // size_type bufSize = Pend->packedSize(nloc); + // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + // MPI_Request request; + // Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); + // buf->resetWritePos(); + // MPI_Wait(&request, MPI_STATUS_IGNORE); + //} + //IpplTimings::stopTimer(warmupStep); + + + + IpplTimings::startTimer(particleCreation); + + //Pcoarse->initNUFFT(FLPIF); #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial @@ -614,6 +665,7 @@ int main(int argc, char *argv[]){ //condition is not the same on different GPUs tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + IpplTimings::startTimer(initialComm); if(rankTime == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); Kokkos::parallel_for(nloc, @@ -630,6 +682,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } + IpplTimings::stopTimer(initialComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); @@ -638,13 +691,17 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + IpplTimings::startTimer(initialCoarse); + //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + IpplTimings::stopTimer(initialCoarse); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(initialComm); if(rankTime < sizeTime-1) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -653,6 +710,7 @@ int main(int argc, char *argv[]){ buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } + IpplTimings::stopTimer(initialComm); #else Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, @@ -776,7 +834,7 @@ int main(int argc, char *argv[]){ //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, - Bext, rankTime, rankSpace, spaceComm); + Bext, rankTime, rankSpace, fine, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -819,7 +877,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + //Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -859,11 +918,11 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - //IpplTimings::startTimer(dumpData); + IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - //IpplTimings::stopTimer(dumpData); + IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); @@ -915,7 +974,8 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); From b417e289b9892177cfcf2204271a1b097f8819b8 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 10 Jan 2024 15:42:15 +0100 Subject: [PATCH 101/117] Reason for initial communication to be expensive identified. --- alpine/PinT/BumponTailInstabilityPinT.cpp | 33 +++++++---- alpine/PinT/LandauDampingPinT.cpp | 23 ++++---- alpine/PinT/PenningTrapPinT.cpp | 72 ++++++----------------- 3 files changed, 52 insertions(+), 76 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index fe5d4c3fa..0a72b851d 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -574,22 +574,13 @@ int main(int argc, char *argv[]){ //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); - - IpplTimings::startTimer(particleCreation); - + Vector_t minU, maxU; for (unsigned d = 0; d create(nloc); - Pbegin->create(nloc); - Pend->create(nloc); - - Pcoarse->q = Pcoarse->Q_m/Total_particles; - using buffer_type = ippl::Communicate::buffer_type; int tag; @@ -599,8 +590,23 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeShapeFunctionPIF); Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); + + double coarseTol = 1e-3; + double fineTol = 1e-6; + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; + + IpplTimings::startTimer(particleCreation); + + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + Pcoarse->q = Pcoarse->Q_m/Total_particles; + - Pcoarse->initNUFFT(FLPIF); + //Pcoarse->initNUFFT(FLPIF); #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs @@ -665,6 +671,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -800,7 +807,7 @@ int main(int argc, char *argv[]){ while (!isConverged) { //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, spaceComm); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, fine, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -843,6 +850,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(coarsePropagator); Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + //Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -936,6 +944,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index e42c13a39..bb0b12240 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -522,22 +522,13 @@ int main(int argc, char *argv[]){ //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); Pcoarse->initFFTSolver(); - - IpplTimings::startTimer(particleCreation); - + Vector_t minU, maxU; for (unsigned d = 0; d create(nloc); - Pbegin->create(nloc); - Pend->create(nloc); - - Pcoarse->q = Pcoarse->Q_m/Total_particles; - using buffer_type = ippl::Communicate::buffer_type; int tag; @@ -548,12 +539,22 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(initializeShapeFunctionPIF); //Pcoarse->initNUFFT(FLPIF); - double coarseTol = 1e-2; + double coarseTol = 1e-3; double fineTol = 1e-6; Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); std::string coarse = "Coarse"; std::string fine = "Fine"; + + IpplTimings::startTimer(particleCreation); + + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + Pcoarse->q = Pcoarse->Q_m/Total_particles; + + #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 759027652..967a284ec 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -563,11 +563,6 @@ int main(int argc, char *argv[]){ } - Pcoarse->create(nloc); - Pbegin->create(nloc); - Pend->create(nloc); - - Pcoarse->q = Pcoarse->Q_m/Total_particles; using buffer_type = ippl::Communicate::buffer_type; int tag; @@ -584,48 +579,15 @@ int main(int argc, char *argv[]){ std::string coarse = "Coarse"; std::string fine = "Fine"; - //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - - //IpplTimings::startTimer(warmupStep); - //if(rankTime == 0) { - // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); - // Kokkos::parallel_for(nloc, - // generate_random, Dim>( - // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, - // minU, maxU)); - - - // Kokkos::fence(); - //} - //else { - // size_type bufSize = Pbegin->packedSize(nloc); - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - // Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); - // buf->resetReadPos(); - //} - - //Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); - //Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); - //Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); - //Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); - ////Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - //Pcoarse->BorisPIF(Pend->R, Pend->P, 45, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - // - //Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); - //Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); - //if(rankTime < sizeTime-1) { - // size_type bufSize = Pend->packedSize(nloc); - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); - // MPI_Request request; - // Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); - // buf->resetWritePos(); - // MPI_Wait(&request, MPI_STATUS_IGNORE); - //} - //IpplTimings::stopTimer(warmupStep); + IpplTimings::startTimer(particleCreation); + + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + Pcoarse->q = Pcoarse->Q_m/Total_particles; - IpplTimings::startTimer(particleCreation); //Pcoarse->initNUFFT(FLPIF); @@ -692,8 +654,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(initialCoarse); - //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + //Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(initialCoarse); IpplTimings::startTimer(deepCopy); @@ -797,6 +759,8 @@ int main(int argc, char *argv[]){ int sign = 1; + //coarseTol = 1e-3; + //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); for (unsigned int nc=0; nc < nCycles; nc++) { double tStartMySlice; @@ -877,8 +841,10 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - //Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + //coarseTol = 1e-4;//(double)(std::pow(0.1,std::min((int)(it+2),4))); + //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + //Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -918,11 +884,11 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - IpplTimings::startTimer(dumpData); + //IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - IpplTimings::stopTimer(dumpData); + //IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); @@ -974,8 +940,8 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + //Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); From ccc064f859cbe4e9e12679a36db8ee3b234633c2 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 29 Jan 2024 15:32:41 +0100 Subject: [PATCH 102/117] Uncommitted changes committed --- alpine/PinT/BumponTailInstabilityPinT.cpp | 36 ++++++++------ alpine/PinT/ChargedParticlesPinT.hpp | 60 ++++++++++++----------- alpine/PinT/LandauDampingPinT.cpp | 43 +++++++++------- alpine/PinT/PenningTrapPinT.cpp | 24 +++++---- 4 files changed, 90 insertions(+), 73 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 0a72b851d..1d3b81545 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -436,7 +436,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); - //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); @@ -591,19 +591,25 @@ int main(int argc, char *argv[]){ Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - double coarseTol = 1e-3; - double fineTol = 1e-6; - Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - std::string coarse = "Coarse"; - std::string fine = "Fine"; - IpplTimings::startTimer(particleCreation); Pcoarse->create(nloc); Pbegin->create(nloc); Pend->create(nloc); - + Pcoarse->q = Pcoarse->Q_m/Total_particles; + + IpplTimings::stopTimer(particleCreation); + + + double coarseTol = std::atof(argv[17]); + double fineTol = 1e-12; + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; + + IpplTimings::startTimer(particleCreation); + //Pcoarse->initNUFFT(FLPIF); @@ -892,13 +898,13 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - //IpplTimings::startTimer(dumpData); - ////Pcoarse->writeError(Rerror, Perror, it+1); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); - ////if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { - ////Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - ////} - //IpplTimings::stopTimer(dumpData); + IpplTimings::startTimer(dumpData); + //Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { + //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); + //} + IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 235341cfb..862b097e3 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -1012,11 +1012,11 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, - const unsigned int& /*iter*/, int /*rankTime*/, int /*rankSpace*/, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, int rankTime, int rankSpace, const std::string& propagator, MPI_Comm& spaceComm) { - //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); @@ -1044,13 +1044,13 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - //if((time_m == 0.0)) { - // IpplTimings::startTimer(dumpData); - // //dumpLandau(iter); - // dumpBumponTail(nc, iter, rankTime, rankSpace); - // dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - // IpplTimings::stopTimer(dumpData); - //} + if((time_m == 0.0) && (propagator == "Fine")) { + IpplTimings::startTimer(dumpData); + //dumpLandau(iter); + dumpBumponTail(nc, iter, rankTime, rankSpace); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + IpplTimings::stopTimer(dumpData); + } for (unsigned int it=0; it { time_m += dt; - //IpplTimings::startTimer(dumpData); - ////dumpLandau(iter); - //dumpBumponTail(nc, iter, rankTime, rankSpace); - //dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - //IpplTimings::stopTimer(dumpData); + if(propagator == "Fine") { + IpplTimings::startTimer(dumpData); + //dumpLandau(iter); + dumpBumponTail(nc, iter, rankTime, rankSpace); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + IpplTimings::stopTimer(dumpData); + } } } @@ -1105,12 +1107,12 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, - const unsigned int& /*iter*/, const double& Bext, - int /*rankTime*/, int /*rankSpace*/, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, const double& Bext, + int rankTime, int rankSpace, const std::string& propagator, MPI_Comm& spaceComm) { - //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); //checkBounds(Rtemp); @@ -1136,11 +1138,11 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - //if((time_m == 0.0) && (propagator == "Fine")) { - // IpplTimings::startTimer(dumpData); - // dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - // IpplTimings::stopTimer(dumpData); - //} + if((time_m == 0.0) && (propagator == "Fine")) { + IpplTimings::startTimer(dumpData); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + IpplTimings::stopTimer(dumpData); + } double alpha = -0.5 * dt; double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); Vector_t rmax = rmax_m; @@ -1225,11 +1227,11 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m += dt; - //IpplTimings::startTimer(dumpData); - //if(propagator == "Fine") { - // dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); - //} - //IpplTimings::stopTimer(dumpData); + if(propagator == "Fine") { + IpplTimings::startTimer(dumpData); + dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + IpplTimings::stopTimer(dumpData); + } } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index bb0b12240..994519005 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -419,7 +419,7 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); - //static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); @@ -537,14 +537,6 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(initializeShapeFunctionPIF); Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - - //Pcoarse->initNUFFT(FLPIF); - double coarseTol = 1e-3; - double fineTol = 1e-6; - Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - std::string coarse = "Coarse"; - std::string fine = "Fine"; - IpplTimings::startTimer(particleCreation); @@ -553,6 +545,19 @@ int main(int argc, char *argv[]){ Pend->create(nloc); Pcoarse->q = Pcoarse->Q_m/Total_particles; + + IpplTimings::stopTimer(particleCreation); + + //Pcoarse->initNUFFT(FLPIF); + double coarseTol = std::atof(argv[17]); + double fineTol = 1e-12; + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; + + + IpplTimings::startTimer(particleCreation); + #ifdef KOKKOS_ENABLE_CUDA @@ -616,8 +621,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); //Pcoarse->initNUFFT(FLPIF, coarseTol); - //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); - Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -791,8 +796,8 @@ int main(int argc, char *argv[]){ //double coarseTol = (double)(std::pow(0.1,std::min((int)(it+2),3))); //double fineTol = 1e-6; //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + //Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -828,10 +833,10 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - //IpplTimings::startTimer(dumpData); - ////Pcoarse->writeError(Rerror, Perror, it+1); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); - //IpplTimings::stopTimer(dumpData); + IpplTimings::startTimer(dumpData); + //Pcoarse->writeError(Rerror, Perror, it+1); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); @@ -876,9 +881,9 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); //Pcoarse->initNUFFT(FLPIF, coarseTol); - Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 967a284ec..7cc1af833 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -572,9 +572,18 @@ int main(int argc, char *argv[]){ Pcoarse->initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); + IpplTimings::startTimer(particleCreation); + + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + Pcoarse->q = Pcoarse->Q_m/Total_particles; + + IpplTimings::stopTimer(particleCreation); - double coarseTol = 1e-3; - double fineTol = 1e-6; + double coarseTol = std::atof(argv[17]); + double fineTol = 1e-12; Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); std::string coarse = "Coarse"; std::string fine = "Fine"; @@ -582,11 +591,6 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(particleCreation); - Pcoarse->create(nloc); - Pbegin->create(nloc); - Pend->create(nloc); - - Pcoarse->q = Pcoarse->Q_m/Total_particles; //Pcoarse->initNUFFT(FLPIF); @@ -884,11 +888,11 @@ int main(int argc, char *argv[]){ << " Perror: " << Perror << endl; - //IpplTimings::startTimer(dumpData); + IpplTimings::startTimer(dumpData); //Pcoarse->writeError(Rerror, Perror, it+1); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - //IpplTimings::stopTimer(dumpData); + IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); From 4688555a546b20ada62a3dde3a1186b3f1538973 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 1 Mar 2024 17:03:59 +0100 Subject: [PATCH 103/117] In the middle of cleanup before running speedup studies --- CMakeLists.txt | 5 +- .../BumponTailInstabilityPIF.cpp | 49 +++- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 260 +++++++++--------- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 54 +++- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 52 +++- alpine/PinT/BumponTailInstabilityPinT.cpp | 14 +- alpine/PinT/ChargedParticlesPinT.hpp | 91 +++++- alpine/PinT/LandauDampingPinT.cpp | 191 +++++-------- alpine/PinT/PenningTrapPinT.cpp | 32 ++- src/FFT/FFT.hpp | 4 +- 10 files changed, 449 insertions(+), 303 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f27d10ac4..db77c210f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,11 +21,14 @@ endif () add_compile_options (-Wall) add_compile_options (-Wunused) add_compile_options (-Wextra) -#add_compile_options (-Werror) +add_compile_options (-Werror) # allow deprecated functions add_compile_options (-Wno-deprecated-declarations) +add_compile_options (-Wno-stringop-overflow) +add_compile_options (-Wno-array-bounds) +add_compile_options (-Wno-restrict) option (USE_STATIC_LIBRARIES "Link with static libraries if available" ON) diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp index dcd059bdf..3ef320c57 100644 --- a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -1,6 +1,6 @@ // Electrostatic Two-stream/Bump-on-tail instability test with Particle-in-Fourier schemes // Usage: -// srun ./BumponTailInstabilityPIF
--info 5 +// srun ./BumponTailInstabilityPIF
--info 5 // nx = No. of Fourier modes in the x-direction // ny = No. of Fourier modes in the y-direction // nz = No. of Fourier modes in the z-direction @@ -9,8 +9,9 @@ // dt = Time stepsize // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) +// tol = tolerance of NUFFT // Example: -// srun ./BumponTailInstabilityPIF 32 32 32 655360 20 0.05 B-spline 1 --info 5 +// srun ./BumponTailInstabilityPIF 32 32 32 655360 20 0.05 B-spline 1 1e-4 --info 5 // // Copyright (c) 2023, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -262,6 +263,42 @@ int main(int argc, char *argv[]){ P->rho_m.initialize(mesh, FL); P->Sk_m.initialize(mesh, FL); + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domain[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domain[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + P->rhoPIFreal_m.initialize(mesh, FL); + P->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + P->fft_mp = std::make_shared(FL, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + P->time_m = 0.0; P->shapetype_m = argv[7]; @@ -269,11 +306,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(particleCreation); - //typedef ippl::detail::RegionLayout RegionLayout_t; - //const RegionLayout_t& RLayout = PL.getRegionLayout(); - //const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t minU, maxU; - //int myRank = Ippl::Comm->rank(); for (unsigned d = 0; d initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - P->initNUFFT(FL); + + double tol = std::atof(argv[9]); + P->initNUFFT(FL,tol); P->scatter(); diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index b271372bc..bb762d408 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -42,16 +42,58 @@ typedef Field Field_t; typedef Field, Dim> CxField_t; typedef Field VField_t; +typedef ippl::FFT FFT_t; const double pi = std::acos(-1.0); // Test programs have to define this variable for VTK dump purposes extern const char* TestName; +void dumpVTK(Field_t& rho, int nx, int ny, int nz, int iteration, + double dx, double dy, double dz) { + + typename Field_t::view_type::host_mirror_type host_view = rho.getHostMirror(); + + std::stringstream fname; + fname << "data/scalar_"; + fname << std::setw(4) << std::setfill('0') << iteration; + fname << ".vtk"; + + Kokkos::deep_copy(host_view, rho.getView()); + + Inform vtkout(NULL, fname.str().c_str(), Inform::OVERWRITE); + vtkout.precision(10); + vtkout.setf(std::ios::scientific, std::ios::floatfield); + + // start with header + vtkout << "# vtk DataFile Version 2.0" << endl; + vtkout << TestName << endl; + vtkout << "ASCII" << endl; + vtkout << "DATASET STRUCTURED_POINTS" << endl; + vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << endl; + vtkout << "ORIGIN " << -dx << " " << -dy << " " << -dz << endl; + vtkout << "SPACING " << dx << " " << dy << " " << dz << endl; + vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << endl; + + vtkout << "SCALARS Rho float" << endl; + vtkout << "LOOKUP_TABLE default" << endl; + for (int z=0; z class ChargedParticlesPIF : public ippl::ParticleBase { public: CxField_t rho_m; + CxField_t rhoPIFhalf_m; + Field_t rhoPIFreal_m; CxField_t rhoDFT_m; Field_t Sk_m; @@ -74,7 +116,9 @@ class ChargedParticlesPIF : public ippl::ParticleBase { std::string shapetype_m; int shapedegree_m; + std::shared_ptr fft_mp; + std::shared_ptr> nufftType1_mp,nufftType2_mp; public: ParticleAttrib q; // charge @@ -124,29 +168,29 @@ class ChargedParticlesPIF : public ippl::ParticleBase { setBCAllPeriodic(); } - void initNUFFT(FieldLayout_t& FL) { + void initNUFFT(FieldLayout_t& FL, double& tol) { ippl::ParameterList fftParams; fftParams.add("gpu_method", 1); fftParams.add("gpu_sort", 0); fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-6); + fftParams.add("tolerance", tol); fftParams.add("use_cufinufft_defaults", false); //fftParams.add("use_cufinufft_defaults", true); - q.initializeNUFFT(FL, 1, fftParams); - E.initializeNUFFT(FL, 2, fftParams); + nufftType1_mp = std::make_shared>(FL, this->getLocalNum(), 1, fftParams); + nufftType2_mp = std::make_shared>(FL, this->getLocalNum(), 2, fftParams); } void gather() { - gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, this->q); + gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, nufftType2_mp.get(), q); //gatherPIFNUDFT(this->E, rho_m, Sk_m, this->R); //Set the charge back to original as we used this view as a //temporary buffer during gather - this->q = Q_m / Np_m; + q = Q_m / Np_m; } @@ -154,7 +198,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Inform m("scatter "); rho_m = {0.0, 0.0}; - scatterPIFNUFFT(q, rho_m, Sk_m, this->R); + scatterPIFNUFFT(q, rho_m, Sk_m, this->R, nufftType1_mp.get()); //rhoDFT_m = {0.0, 0.0}; //scatterPIFNUDFT(q, rho_m, Sk_m, this->R); @@ -205,8 +249,6 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -231,32 +273,6 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); fieldEnergy *= volume; - //auto Eview = E.getView(); - - //double fieldEnergy, ExAmp; - //double temp = 0.0; - - //Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), - // KOKKOS_LAMBDA(const int i, double& valL){ - // double myVal = Eview(i)[0] * Eview(i)[0]; - // valL += myVal; - // }, Kokkos::Sum(temp)); - - //double globaltemp = 0.0; - //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //fieldEnergy = globaltemp * volume / totalP ; - - //double tempMax = 0.0; - //Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), - // KOKKOS_LAMBDA(const size_t i, double& valL) - // { - // double myVal = std::fabs(Eview(i)[0]); - // if(myVal > valL) valL = myVal; - // }, Kokkos::Max(tempMax)); - //ExAmp = 0.0; - //MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - if (Ippl::Comm->rank() == 0) { std::stringstream fname; @@ -378,16 +394,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double potentialEnergy, kineticEnergy; double temp = 0.0; - - //auto Eview = E.getView(); - //Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), - // KOKKOS_LAMBDA(const int i, double& valL){ - // double myVal = dot(Eview(i), Eview(i)).apply(); - // valL += myVal; - // }, Kokkos::Sum(temp)); - - - + auto rhoview = rho_m.getView(); const int nghost = rho_m.getNghost(); using mdrange_type = Kokkos::MDRangePolicy>; @@ -422,8 +429,6 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -438,14 +443,6 @@ class ChargedParticlesPIF : public ippl::ParticleBase { myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); } - //double myVal = rhoview(i,j,k).real() * rhoview(i,j,k).real() + - // rhoview(i,j,k).imag() * rhoview(i,j,k).imag(); - //if(Dr != 0.0) { - // myVal /= Dr; - //} - //else { - // myVal = 0.0; - //} valL += myVal; }, Kokkos::Sum(temp)); @@ -472,6 +469,77 @@ class ChargedParticlesPIF : public ippl::ParticleBase { kineticEnergy = globaltemp; + auto rhoPIFhalfview = rhoPIFhalf_m.getView(); + const int nghostHalf = rhoPIFhalf_m.getNghost(); + + const FieldLayout_t& layoutHalf = rhoPIFhalf_m.getLayout(); + const auto& domainHalf = layoutHalf.getDomain(); + + Vector Nhalf; + for (unsigned d=0; d < Dim; ++d) { + Nhalf[d] = domainHalf[d].length(); + } + + Kokkos::parallel_for("Transfer complex rho to half domain", + mdrange_type({0, 0, 0}, + {Nhalf[0], + Nhalf[1], + Nhalf[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + Vector iVec = {i, j, k}; + int shift; + for(size_t d = 0; d < Dim; ++d) { + bool isLessThanHalf = (iVec[d] < (Nhalf[d]/2)); + shift = ((int)isLessThanHalf * 2) - 1; + iVec[d] = (iVec[d] + shift * (Nhalf[d]/2)) + nghostHalf; + } + rhoPIFhalfview(Nhalf[0]-1-i+nghostHalf, iVec[1], iVec[2]) = rhoview(i+nghostHalf,j+nghostHalf,k+nghostHalf); + }); + + + rhoPIFreal_m = 0.0; + fft_mp->transform(-1, rhoPIFreal_m, rhoPIFhalf_m); + + + rhoPIFreal_m = (1.0/(nr_m[0]*nr_m[1]*nr_m[2])) * volume * rhoPIFreal_m; + auto rhoPIFrealview = rhoPIFreal_m.getView(); + temp = 0.0; + Kokkos::parallel_reduce("Rho real sum", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + + valL += rhoPIFrealview(i+nghost, j+nghost, k+nghost); + }, Kokkos::Sum(temp)); + + double charge = temp; + + Vector_t totalMomentum = 0.0; + + Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, Vector_t& valL){ + valL += (-qView(i)) * Pview(i); + }, Kokkos::Sum>(totalMomentum)); + + Vector_t globalMom; + + double magMomentum = 0.0; + for(size_t d = 0; d < Dim; ++d) { + MPI_Allreduce(&totalMomentum[d], &globalMom[d], 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + magMomentum += globalMom[d] * globalMom[d]; + } + + magMomentum = std::sqrt(magMomentum); + if (Ippl::Comm->rank() == 0) { std::stringstream fname; fname << "data/Energy_"; @@ -480,17 +548,19 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); + csvout.precision(17); csvout.setf(std::ios::scientific, std::ios::floatfield); if(time_m == 0.0) { - csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + csvout << "time, Potential energy, Kinetic energy, Total energy Total charge Total Momentum" << endl; } csvout << time_m << " " << potentialEnergy << " " << kineticEnergy << " " - << potentialEnergy + kineticEnergy << endl; + << potentialEnergy + kineticEnergy << " " + << charge << " " + << magMomentum << endl; } @@ -529,8 +599,6 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); double kh = kVec[d] * dx[d]; bool isNotZero = (kh != 0.0); @@ -551,75 +619,15 @@ class ChargedParticlesPIF : public ippl::ParticleBase { } - //void dumpBumponTail() { - - // const int nghostE = E_m.getNghost(); - // auto Eview = E_m.getView(); - // double fieldEnergy, EzAmp; - // using mdrange_type = Kokkos::MDRangePolicy>; - - // double temp = 0.0; - // Kokkos::parallel_reduce("Ex inner product", - // mdrange_type({nghostE, nghostE, nghostE}, - // {Eview.extent(0) - nghostE, - // Eview.extent(1) - nghostE, - // Eview.extent(2) - nghostE}), - // KOKKOS_LAMBDA(const size_t i, const size_t j, - // const size_t k, double& valL) - // { - // double myVal = std::pow(Eview(i, j, k)[2], 2); - // valL += myVal; - // }, Kokkos::Sum(temp)); - // double globaltemp = 0.0; - // MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - // fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; - - // double tempMax = 0.0; - // Kokkos::parallel_reduce("Ex max norm", - // mdrange_type({nghostE, nghostE, nghostE}, - // {Eview.extent(0) - nghostE, - // Eview.extent(1) - nghostE, - // Eview.extent(2) - nghostE}), - // KOKKOS_LAMBDA(const size_t i, const size_t j, - // const size_t k, double& valL) - // { - // double myVal = std::fabs(Eview(i, j, k)[2]); - // if(myVal > valL) valL = myVal; - // }, Kokkos::Max(tempMax)); - // EzAmp = 0.0; - // MPI_Reduce(&tempMax, &EzAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - - - // if (Ippl::Comm->rank() == 0) { - // std::stringstream fname; - // fname << "data/FieldBumponTail_"; - // fname << Ippl::Comm->size(); - // fname << ".csv"; - - - // Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - // csvout.precision(10); - // csvout.setf(std::ios::scientific, std::ios::floatfield); - - // if(time_m == 0.0) { - // csvout << "time, Ez_field_energy, Ez_max_norm" << endl; - // } - - // csvout << time_m << " " - // << fieldEnergy << " " - // << EzAmp << endl; - - // } - // - // Ippl::Comm->barrier(); - //} void dumpFieldData() { typename CxField_t::HostMirror rhoNUFFT_host = rho_m.getHostMirror(); - typename CxField_t::HostMirror rhoNUDFT_host = rhoDFT_m.getHostMirror(); + typename Field_t::HostMirror rhoNUFFT_real = rhoPIFreal_m.getHostMirror(); + //typename CxField_t::HostMirror rhoNUDFT_host = rhoDFT_m.getHostMirror(); Kokkos::deep_copy(rhoNUFFT_host, rho_m.getView()); - Kokkos::deep_copy(rhoNUDFT_host, rhoDFT_m.getView()); + Kokkos::deep_copy(rhoNUFFT_real, rhoPIFreal_m.getView()); + //Kokkos::deep_copy(rhoNUDFT_host, rhoDFT_m.getView()); const int nghost = rho_m.getNghost(); std::stringstream pname; pname << "data/FieldFFT_"; @@ -637,7 +645,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { } } std::stringstream pname2; - pname2 << "data/FieldDFT_"; + pname2 << "data/Fieldreal_"; pname2 << Ippl::Comm->rank(); pname2 << ".csv"; Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); @@ -647,7 +655,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { for (int i = 0; i< nr_m[0]; i++) { for (int j = 0; j< nr_m[1]; j++) { for (int k = 0; k< nr_m[2]; k++) { - pcsvout2 << rhoNUDFT_host(i+nghost,j+nghost, k+nghost) << endl; + pcsvout2 << rhoNUFFT_real(i+nghost,j+nghost, k+nghost) << endl; } } } diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index 2373d3f15..fe5e8b68c 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -1,6 +1,6 @@ // Electrostatic Landau damping test with Particle-in-Fourier schemes // Usage: -// srun ./LandauDampingPIF
--info 5 +// srun ./LandauDampingPIF
--info 5 // nx = No. of Fourier modes in the x-direction // ny = No. of Fourier modes in the y-direction // nz = No. of Fourier modes in the z-direction @@ -9,8 +9,9 @@ // dt = Time stepsize // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) +// tol = tolerance of NUFFT // Example: -// srun ./LandauDampingPIF 32 32 32 655360 20 0.05 B-spline 1 --info 5 +// srun ./LandauDampingPIF 32 32 32 655360 20 0.05 B-spline 1 1e-4 --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -200,10 +201,7 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; - //Vector_t kw = {1.0, 1.0, 1.0}; double alpha = 0.05; - //Vector_t rmin(-pi); - //Vector_t rmax(pi); Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw; Vector_t length = rmax - rmin; @@ -221,7 +219,6 @@ int main(int argc, char *argv[]){ //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; - //double Q = -64.0 * pi * pi * pi; P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,Total_particles); P->nr_m = nr; @@ -230,6 +227,42 @@ int main(int argc, char *argv[]){ P->rhoDFT_m.initialize(mesh, FL); P->Sk_m.initialize(mesh, FL); + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domain[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domain[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + P->rhoPIFreal_m.initialize(mesh, FL); + P->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + P->fft_mp = std::make_shared(FL, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + P->time_m = 0.0; P->shapetype_m = argv[7]; @@ -237,16 +270,10 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(particleCreation); - //typedef ippl::detail::RegionLayout RegionLayout_t; - //const RegionLayout_t& RLayout = PL.getRegionLayout(); - //const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t minU, maxU; - //int myRank = Ippl::Comm->rank(); for (unsigned d = 0; d initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - P->initNUFFT(FL); + double tol = std::atof(argv[9]); + P->initNUFFT(FL,tol); P->scatter(); diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index f70ddaa08..54984352e 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -1,6 +1,6 @@ // Electrostatic Penning trap test with Particle-in-Fourier schemes // Usage: -// srun ./PenningTrapPIF
--info 5 +// srun ./PenningTrapPIF
--info 5 // nx = No. of Fourier modes in the x-direction // ny = No. of Fourier modes in the y-direction // nz = No. of Fourier modes in the z-direction @@ -9,8 +9,9 @@ // dt = Time stepsize // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) +// tol = tolerance of NUFFT // Example: -// srun ./PenningTrapPIF 32 32 32 655360 20 0.05 B-spline 1 --info 5 +// srun ./PenningTrapPIF 32 32 32 655360 20 0.05 B-spline 1 1e-4 --info 5 // // Copyright (c) 2023, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -191,7 +192,6 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t rmin(0.0); - //Vector_t rmax(20.0); Vector_t rmax(25.0); double dx = rmax[0] / nr[0]; double dy = rmax[1] / nr[1]; @@ -228,6 +228,42 @@ int main(int argc, char *argv[]){ P->rho_m.initialize(mesh, FL); P->Sk_m.initialize(mesh, FL); + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domain[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domain[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + P->rhoPIFreal_m.initialize(mesh, FL); + P->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + P->fft_mp = std::make_shared(FL, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + P->time_m = 0.0; P->shapetype_m = argv[7]; @@ -235,9 +271,6 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(particleCreation); - //typedef ippl::detail::RegionLayout RegionLayout_t; - //const RegionLayout_t& RLayout = PL.getRegionLayout(); - //const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t minU, maxU; for (unsigned d = 0; d initializeShapeFunctionPIF(); IpplTimings::stopTimer(initializeShapeFunctionPIF); - P->initNUFFT(FL); + double tol = std::atof(argv[9]); + P->initNUFFT(FL,tol); P->scatter(); P->gather(); IpplTimings::startTimer(dumpDataTimer); - P->dumpEnergy(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); double alpha = -0.5 * dt; @@ -353,7 +387,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - P->dumpEnergy(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 1d3b81545..8bf8547f9 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -603,7 +603,7 @@ int main(int argc, char *argv[]){ double coarseTol = std::atof(argv[17]); - double fineTol = 1e-12; + double fineTol = 1e-3;//1e-12; Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); std::string coarse = "Coarse"; std::string fine = "Fine"; @@ -676,8 +676,8 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); - //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -855,8 +855,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - //Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -949,8 +949,8 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 862b097e3..9e6abddb5 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -44,6 +44,8 @@ typedef Field, Dim> CxField_t; typedef Field VField_t; typedef ippl::FFTPeriodicPoissonSolver Solver_t; +typedef ippl::FFT FFT_t; + const double pi = std::acos(-1.0); // Test programs have to define this variable for VTK dump purposes @@ -56,6 +58,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { //using nufft_t = typename ippl::FFT; CxField_t rhoPIF_m; + CxField_t rhoPIFhalf_m; + Field_t rhoPIFreal_m; Field_t Sk_m; Field_t rhoPIC_m; VField_t EfieldPIC_m; @@ -75,6 +79,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { size_type Np_m; std::shared_ptr solver_mp; + std::shared_ptr fft_mp; double time_m; @@ -96,6 +101,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { typename ippl::ParticleBase::particle_position_type RprevIter; // G(R^(k-1)_n) typename ippl::ParticleBase::particle_position_type PprevIter; // G(P^(k-1)_n) + //typename ippl::ParticleBase::particle_position_type Rfine; + //typename ippl::ParticleBase::particle_position_type Pfine; + /* This constructor is mandatory for all derived classes from ParticleBase as the bunch buffer uses this @@ -111,6 +119,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { this->addAttribute(P0); this->addAttribute(RprevIter); this->addAttribute(PprevIter); + //this->addAttribute(Rfine); + //this->addAttribute(Pfine); } ChargedParticlesPinT(PLayout& pl, @@ -135,6 +145,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { this->addAttribute(P0); this->addAttribute(RprevIter); this->addAttribute(PprevIter); + //this->addAttribute(Rfine); + //this->addAttribute(Pfine); setupBCs(); for (unsigned int i = 0; i < Dim; i++) decomp_m[i]=decomp[i]; @@ -624,7 +636,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), KOKKOS_LAMBDA(const int i, double& valL){ double myVal = dot(Pview(i), Pview(i)).apply(); - myVal *= -qView(i); + myVal *= -qView(i); //q/(q/m) where q/m=-1 for us valL += myVal; }, Kokkos::Sum(temp)); @@ -635,6 +647,77 @@ class ChargedParticlesPinT : public ippl::ParticleBase { kineticEnergy = globaltemp; + auto rhoPIFhalfview = rhoPIFhalf_m.getView(); + const int nghostHalf = rhoPIFhalf_m.getNghost(); + + const FieldLayout_t& layoutHalf = rhoPIFhalf_m.getLayout(); + const auto& domainHalf = layoutHalf.getDomain(); + + Vector Nhalf; + for (unsigned d=0; d < Dim; ++d) { + Nhalf[d] = domainHalf[d].length(); + } + + Kokkos::parallel_for("Transfer complex rho to half domain", + mdrange_type({0, 0, 0}, + {Nhalf[0], + Nhalf[1], + Nhalf[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + Vector iVec = {i, j, k}; + int shift; + for(size_t d = 0; d < Dim; ++d) { + bool isLessThanHalf = (iVec[d] < (Nhalf[d]/2)); + shift = ((int)isLessThanHalf * 2) - 1; + iVec[d] = (iVec[d] + shift * (Nhalf[d]/2)) + nghostHalf; + } + rhoPIFhalfview(Nhalf[0]-1-i+nghostHalf, iVec[1], iVec[2]) = + rhoview(i+nghostHalf,j+nghostHalf,k+nghostHalf); + }); + + + rhoPIFreal_m = 0.0; + fft_mp->transform(-1, rhoPIFreal_m, rhoPIFhalf_m); + + rhoPIFreal_m = (1.0/(N[0]*N[1]*N[2])) * volume * rhoPIFreal_m; + auto rhoPIFrealview = rhoPIFreal_m.getView(); + temp = 0.0; + Kokkos::parallel_reduce("Rho real sum", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + valL += rhoPIFrealview(i+nghost, j+nghost, k+nghost); + }, Kokkos::Sum(temp)); + + + double chargeTotal = temp; + + Vector_t totalMomentum = 0.0; + + Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, Vector_t& valL){ + valL += (-qView(i)) * Pview(i); + }, Kokkos::Sum>(totalMomentum)); + + Vector_t globalMom; + + double magMomentum = 0.0; + for(size_t d = 0; d < Dim; ++d) { + MPI_Allreduce(&totalMomentum[d], &globalMom[d], 1, MPI_DOUBLE, MPI_SUM, spaceComm); + magMomentum += globalMom[d] * globalMom[d]; + } + + magMomentum = std::sqrt(magMomentum); + if(rankSpace == 0) { std::stringstream fname; fname << "data/Energy_rank_"; @@ -647,7 +730,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); + csvout.precision(17); csvout.setf(std::ios::scientific, std::ios::floatfield); //csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; @@ -655,7 +738,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { csvout << time_m << " " << potentialEnergy << " " << kineticEnergy << " " - << potentialEnergy + kineticEnergy << endl; + << potentialEnergy + kineticEnergy << " " + << chargeTotal << " " + << magMomentum << endl; } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 994519005..a522f7824 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -39,8 +39,6 @@ #include "ChargedParticlesPinT.hpp" #include "StatesBeginSlice.hpp" #include "StatesEndSlice.hpp" -//#include "LeapFrogPIC.cpp" -//#include "LeapFrogPIF.cpp" #include #include #include @@ -156,6 +154,11 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + //This is just to undo the effect of periodic BCs during the + //error calculation. Otherwise even though the actual error is + //small the computed error might be very large. + //The values (e.g. 10) mentioned here are just an adhoc + //value depending on the domain length. for (unsigned d = 0; d < 3; ++d) { bool isLeft = (diff[d] <= -10.0); bool isRight = (diff[d] >= 10.0); @@ -211,8 +214,7 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp } double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - Vector_t& length) { + Vector_t& length, MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -223,6 +225,11 @@ double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + //This is just to undo the effect of periodic BCs during the + //error calculation. Otherwise even though the actual error is + //small the computed error might be very large. + //The values (e.g. 10) mentioned here are just an adhoc + //value depending on the domain length. for (unsigned d = 0; d < 3; ++d) { bool isLeft = (diff[d] <= -10.0); bool isRight = (diff[d] >= 10.0); @@ -235,33 +242,30 @@ double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& myValError = std::sqrt(myValError); - //bool isIncluded = (myValError < 10.0); - - //myValError *= isIncluded; - if(myValError > valLError) valLError = myValError; double myValnorm = dot(Qview(i), Qview(i)).apply(); myValnorm = std::sqrt(myValnorm); - //myValnorm *= isIncluded; - if(myValnorm > valLnorm) valLnorm = myValnorm; - //excluded += (!isIncluded); }, Kokkos::Max(localError), Kokkos::Max(localNorm)); Kokkos::fence(); - lError = localError/localNorm; - double relError = lError; + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_MAX, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_MAX, spaceComm); + + double relError = globalError/globalNorm; return relError; } double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { + MPI_Comm& spaceComm) { auto Qview = Q.getView(); auto QprevIterView = QprevIter.getView(); @@ -283,97 +287,17 @@ double computePLinfError(ParticleAttrib& Q, ParticleAttrib& }, Kokkos::Max(localError), Kokkos::Max(localNorm)); Kokkos::fence(); - lError = localError/localNorm; - - double relError = lError; - - return relError; - -} - - -double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { - - auto rhoview = rhoPIF.getView(); - auto rhoprevview = rhoPIFprevIter.getView(); - const int nghost = rhoPIF.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - - const FieldLayout_t& layout = rhoPIF.getLayout(); - const Mesh_t& mesh = rhoPIF.get_mesh(); - const Vector& dx = mesh.getMeshSpacing(); - const auto& domain = layout.getDomain(); - Vector Len; - Vector N; - - for (unsigned d=0; d < Dim; ++d) { - N[d] = domain[d].length(); - Len[d] = dx[d] * N[d]; - } - - double AbsError = 0.0; - double Enorm = 0.0; - Kokkos::complex imag = {0.0, 1.0}; - double pi = std::acos(-1.0); - Kokkos::parallel_reduce("Ex field error", - mdrange_type({0, 0, 0}, - {N[0], - N[1], - N[2]}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k, - double& errorSum, - double& fieldSum) - { - - Vector iVec = {i, j, k}; - Vector kVec; - double Dr = 0.0; - for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - Dr += kVec[d] * kVec[d]; - } - - double myError = 0.0; - double myField = 0.0; - Kokkos::complex Ek = {0.0, 0.0}; - Kokkos::complex Ekprev = {0.0, 0.0}; - for(size_t d = 0; d < Dim; ++d) { - if(Dr != 0.0) { - Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); - } - Ekprev = Ekprev - Ek; - myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); - myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); - } - errorSum += myError; - fieldSum += myField; - //Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); - //Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); - //rhokprev = rhokprev - rhok; - //myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); - //errorSum += myError; - //myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); - //fieldSum += myField; - - }, Kokkos::Sum(AbsError), Kokkos::Sum(Enorm)); - Kokkos::fence(); double globalError = 0.0; - MPI_Allreduce(&AbsError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_MAX, spaceComm); double globalNorm = 0.0; - MPI_Allreduce(&Enorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //fieldEnergy *= volume; - - double relError = std::sqrt(globalError)/std::sqrt(globalNorm); - + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_MAX, spaceComm); + + double relError = globalError/globalNorm; + return relError; -} +} const char* TestName = "LandauDampingPinT"; @@ -436,9 +360,6 @@ int main(int argc, char *argv[]){ const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); - //const double tStartMySlice = Ippl::Comm->rank() * dtSlice; - //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; - using bunch_type = ChargedParticlesPinT; using states_begin_type = StatesBeginSlice; @@ -462,7 +383,6 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; - //double alpha = 0.05; Vector_t alpha = {0.05, 0.05, 0.05}; //Vector_t alpha = {0.5, 0.5, 0.5}; Vector_t rmin(0.0); @@ -493,15 +413,6 @@ int main(int argc, char *argv[]){ size_type Total_particles = 0; - //MPI_Allreduce(&nloc, &Total_particles, 1, - // MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); - - //int rest = (int) (totalP - Total_particles); - - //if ( (rankTime == 0) && (rankSpace < rest) ) { - // ++nloc; - //} - MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); @@ -516,10 +427,44 @@ int main(int argc, char *argv[]){ Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); - //Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); - //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); + + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + Pcoarse->rhoPIFreal_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + Pcoarse->fft_mp = std::make_shared(FLPIF, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// Pcoarse->initFFTSolver(); @@ -550,7 +495,7 @@ int main(int argc, char *argv[]){ //Pcoarse->initNUFFT(FLPIF); double coarseTol = std::atof(argv[17]); - double fineTol = 1e-12; + double fineTol = std::atof(argv[18]); Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); std::string coarse = "Coarse"; std::string fine = "Fine"; @@ -596,7 +541,7 @@ int main(int argc, char *argv[]){ //IpplTimings::stopTimer(deepCopy); - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(rankTime == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); @@ -621,8 +566,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); //Pcoarse->initNUFFT(FLPIF, coarseTol); - Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); - //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -796,8 +741,8 @@ int main(int argc, char *argv[]){ //double coarseTol = (double)(std::pow(0.1,std::min((int)(it+2),3))); //double fineTol = 1e-6; //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - //Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -881,9 +826,9 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); //Pcoarse->initNUFFT(FLPIF, coarseTol); - //Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 7cc1af833..95ae8387a 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -599,22 +599,20 @@ int main(int argc, char *argv[]){ //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - //if(Ippl::Comm->rank() == 0) { - // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + //if(rankTime == 0) { + // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); // Kokkos::parallel_for(nloc, // generate_random, Dim>( // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, // minU, maxU)); - - // Kokkos::fence(); // size_type bufSize = Pbegin->packedSize(nloc); // std::vector requests(0); // int sends = 0; - // for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { + // for(int rank = 1; rank < sizeTime; ++rank) { // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); // requests.resize(requests.size() + 1); - // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); + // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc, timeComm); // buf->resetWritePos(); // ++sends; // } @@ -623,10 +621,14 @@ int main(int argc, char *argv[]){ //else { // size_type bufSize = Pbegin->packedSize(nloc); // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); + // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc, timeComm); // buf->resetReadPos(); //} + //Kokkos::deep_copy(Pcoarse->Rfine.getView(), Pbegin->R.getView()); + //Kokkos::deep_copy(Pcoarse->Pfine.getView(), Pbegin->P.getView()); + + //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); @@ -658,8 +660,8 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(initialCoarse); - Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - //Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); IpplTimings::stopTimer(initialCoarse); IpplTimings::startTimer(deepCopy); @@ -765,6 +767,8 @@ int main(int argc, char *argv[]){ int sign = 1; //coarseTol = 1e-3; //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + //Pcoarse->BorisPIF(Pcoarse->Rfine, Pcoarse->Pfine, (rankTime+1)*ntFine, dtFine, 0, 0, 0, + // Bext, rankTime, rankSpace, fine, spaceComm); for (unsigned int nc=0; nc < nCycles; nc++) { double tStartMySlice; @@ -847,8 +851,8 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(coarsePropagator); //coarseTol = 1e-4;//(double)(std::pow(0.1,std::min((int)(it+2),4))); //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - //Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + //Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; @@ -861,6 +865,8 @@ int main(int argc, char *argv[]){ //double localRerror, localPerror; double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); + //double Rerror = computeRL2Error(Pcoarse->Rfine, Pend->R, length, spaceComm); + //double Perror = computePL2Error(Pcoarse->Pfine, Pend->P, spaceComm); IpplTimings::stopTimer(computeErrors); @@ -944,8 +950,8 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - //Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index acb06d7bd..c8f28c8bf 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -124,8 +124,10 @@ namespace ippl { } } + //heffte_m = std::make_shared> + // (inbox, outbox, Ippl::getComm(), heffteOptions); heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); + (inbox, outbox, MPI_COMM_SELF, heffteOptions); //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); if(workspace_m.size() < heffte_m->size_workspace()) From 380e3e4210101e5c95ad999d3195002fb4d37d5d Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 1 Mar 2024 17:06:08 +0100 Subject: [PATCH 104/117] Unwanted files removed --- alpine/PinT/LeapFrogPIC.cpp | 60 ---------------------------------- alpine/PinT/LeapFrogPIF.cpp | 65 ------------------------------------- 2 files changed, 125 deletions(-) delete mode 100644 alpine/PinT/LeapFrogPIC.cpp delete mode 100644 alpine/PinT/LeapFrogPIF.cpp diff --git a/alpine/PinT/LeapFrogPIC.cpp b/alpine/PinT/LeapFrogPIC.cpp deleted file mode 100644 index d719a423e..000000000 --- a/alpine/PinT/LeapFrogPIC.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, -// Paul Scherrer Institut, Villigen PSI, Switzerland -// All rights reserved -// -// This file is part of IPPL. -// -// IPPL is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// You should have received a copy of the GNU General Public License -// along with IPPL. If not, see . -// - -//#include "ChargedParticlesPinT.hpp" - -void LeapFrogPIC(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, - ParticleAttrib& Ptemp, const unsigned int nt, - const double dt) { - - PLayout_t& PL = P.getLayout(); - - const auto& hr = P.hr_m; - const auto& rmax = P.rmax_m; - const auto& rmin = P.rmin_m; - for (unsigned int it=0; itsolve(); - - // gather E field - gather(P.E, P.EfieldPIC_m, Rtemp); - - //kick - Ptemp = Ptemp - 0.5 * dt * P.E; - } - -} diff --git a/alpine/PinT/LeapFrogPIF.cpp b/alpine/PinT/LeapFrogPIF.cpp deleted file mode 100644 index b7473237f..000000000 --- a/alpine/PinT/LeapFrogPIF.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, -// Paul Scherrer Institut, Villigen PSI, Switzerland -// All rights reserved -// -// This file is part of IPPL. -// -// IPPL is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// You should have received a copy of the GNU General Public License -// along with IPPL. If not, see . -// - -//#include "ChargedParticlesPinT.hpp" - -void LeapFrogPIF(ChargedParticlesPinT& P, ParticleAttrib& Rtemp, - ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const bool& isConverged, - const double& tStartMySlice) { - - auto& PL = P.getLayout(); - const auto& rmax = P.rmax_m; - const auto& rmin = P.rmin_m; - - P.time_m = tStartMySlice; - - for (unsigned int it=0; it Date: Fri, 1 Mar 2024 17:11:52 +0100 Subject: [PATCH 105/117] Still in the middle of cleanup --- alpine/PinT/LandauDampingPinT.cpp | 14 ++++----- alpine/PinT/StatesBeginSlice.hpp | 31 ------------------- .../{StatesEndSlice.hpp => StatesSlice.hpp} | 6 ++-- 3 files changed, 9 insertions(+), 42 deletions(-) delete mode 100644 alpine/PinT/StatesBeginSlice.hpp rename alpine/PinT/{StatesEndSlice.hpp => StatesSlice.hpp} (86%) diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index a522f7824..cb1a7a76e 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -37,8 +37,7 @@ // #include "ChargedParticlesPinT.hpp" -#include "StatesBeginSlice.hpp" -#include "StatesEndSlice.hpp" +#include "StatesSlice.hpp" #include #include #include @@ -362,12 +361,11 @@ int main(int argc, char *argv[]){ using bunch_type = ChargedParticlesPinT; - using states_begin_type = StatesBeginSlice; - using states_end_type = StatesEndSlice; + using states_type = StatesSlice; std::unique_ptr Pcoarse; - std::unique_ptr Pbegin; - std::unique_ptr Pend; + std::unique_ptr Pbegin; + std::unique_ptr Pend; ippl::NDIndex domainPIC; ippl::NDIndex domainPIF; @@ -419,8 +417,8 @@ int main(int argc, char *argv[]){ //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); - Pbegin = std::make_unique(PL); - Pend = std::make_unique(PL); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); Pcoarse->nr_m = nrPIC; Pcoarse->nm_m = nmPIF; diff --git a/alpine/PinT/StatesBeginSlice.hpp b/alpine/PinT/StatesBeginSlice.hpp deleted file mode 100644 index 621e88038..000000000 --- a/alpine/PinT/StatesBeginSlice.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland -// All rights reserved -// -// This file is part of IPPL. -// -// IPPL is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// You should have received a copy of the GNU General Public License -// along with IPPL. If not, see . -// - - -template -class StatesBeginSlice : public ippl::ParticleBase { - -public: - typename ippl::ParticleBase::particle_position_type P; - - StatesBeginSlice(PLayout& pl) - : ippl::ParticleBase(pl) - { - // register the particle attributes - this->addAttribute(P); - } - - ~StatesBeginSlice(){ } - -}; diff --git a/alpine/PinT/StatesEndSlice.hpp b/alpine/PinT/StatesSlice.hpp similarity index 86% rename from alpine/PinT/StatesEndSlice.hpp rename to alpine/PinT/StatesSlice.hpp index 6b69996a1..206f8746c 100644 --- a/alpine/PinT/StatesEndSlice.hpp +++ b/alpine/PinT/StatesSlice.hpp @@ -14,18 +14,18 @@ template -class StatesEndSlice : public ippl::ParticleBase { +class StatesSlice : public ippl::ParticleBase { public: typename ippl::ParticleBase::particle_position_type P; - StatesEndSlice(PLayout& pl) + StatesSlice(PLayout& pl) : ippl::ParticleBase(pl) { // register the particle attributes this->addAttribute(P); } - ~StatesEndSlice(){ } + ~StatesSlice(){ } }; From eb6fccd9224c0cfd258318bd831251fae4f76724 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 1 Mar 2024 17:40:43 +0100 Subject: [PATCH 106/117] Landaudamping cleaned, chargedparticles as well as others need to be cleaned as well --- alpine/PinT/ChargedParticlesPinT.hpp | 2 + alpine/PinT/LandauDampingPinT.cpp | 242 +++++---------------------- 2 files changed, 47 insertions(+), 197 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 9e6abddb5..fe46f837f 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -85,6 +85,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { std::string shapetype_m; + std::string coarsetype_m; + int shapedegree_m; //nufft_t nufftType1Fine_m,nufftType2Fine_m,nufftType1Coarse_m,nufftType2Coarse_m; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index cb1a7a76e..c6bbd3536 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -6,20 +6,27 @@ // European Conference on Parallel Processing. Springer, Cham, 2017. // // Usage: -// srun ./LandauDampingPinT -// --info 5 +// srun ./LandauDampingPinT +// +// --info 5 // nmx = No. of Fourier modes in the x-direction // nmy = No. of Fourier modes in the y-direction // nmz = No. of Fourier modes in the z-direction -// nx = No. of grid points in the x-direction -// ny = No. of grid points in the y-direction -// nz = No. of grid points in the z-direction +// nx = No. of grid points in the x-direction (not used if PIF is also used as coarse propagator) +// ny = No. of grid points in the y-direction (not used if PIF is also used as coarse propagator) +// nz = No. of grid points in the z-direction (not used if PIF is also used as coarse propagator) // Np = Total no. of macro-particles in the simulation +// tolParareal = Parareal tolerance // nCycles = No. of Parareal blocks/cycles // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) +// No. of space procs = Number of MPI ranks to be used in the spatial parallelization +// No. of time procs = Number of MPI ranks to be used in the time parallelization +// coarseTol = Coarse tolerance for PIF if we use PIF as a coarse propagator (will not be used when PIC is used) +// fineTol = fine tolerance for PIF +// coarseType = Type of coarse propagator (PIF or PIC) // Example: -// srun ./LandauDampingPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 4 B-spline 1 --info 5 +// srun ./LandauDampingPinT 32 32 32 16 16 16 655360 19.2 0.05 0.05 1e-5 1 B-spline 1 4 16 1e-2 1e-4 PIC --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -212,92 +219,6 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp } -double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - Vector_t& length, MPI_Comm& spaceComm) { - - auto Qview = Q.getView(); - auto QprevIterView = QprevIter.getView(); - double localError = 0.0; - double localNorm = 0.0; - - Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ - Vector_t diff = Qview(i) - QprevIterView(i); - - //This is just to undo the effect of periodic BCs during the - //error calculation. Otherwise even though the actual error is - //small the computed error might be very large. - //The values (e.g. 10) mentioned here are just an adhoc - //value depending on the domain length. - for (unsigned d = 0; d < 3; ++d) { - bool isLeft = (diff[d] <= -10.0); - bool isRight = (diff[d] >= 10.0); - bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); - diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) - +(isRight * (diff[d] - length[d])); - } - - double myValError = dot(diff, diff).apply(); - - myValError = std::sqrt(myValError); - - if(myValError > valLError) valLError = myValError; - - double myValnorm = dot(Qview(i), Qview(i)).apply(); - myValnorm = std::sqrt(myValnorm); - - if(myValnorm > valLnorm) valLnorm = myValnorm; - - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - - Kokkos::fence(); - - double globalError = 0.0; - MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_MAX, spaceComm); - double globalNorm = 0.0; - MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_MAX, spaceComm); - - double relError = globalError/globalNorm; - - return relError; - -} - -double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - MPI_Comm& spaceComm) { - - auto Qview = Q.getView(); - auto QprevIterView = QprevIter.getView(); - double localError = 0.0; - double localNorm = 0.0; - - Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ - Vector_t diff = Qview(i) - QprevIterView(i); - double myValError = dot(diff, diff).apply(); - myValError = std::sqrt(myValError); - - if(myValError > valLError) valLError = myValError; - - double myValnorm = dot(Qview(i), Qview(i)).apply(); - myValnorm = std::sqrt(myValnorm); - - if(myValnorm > valLnorm) valLnorm = myValnorm; - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - - Kokkos::fence(); - - double globalError = 0.0; - MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_MAX, spaceComm); - double globalNorm = 0.0; - MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_MAX, spaceComm); - - double relError = globalError/globalNorm; - - return relError; - -} - const char* TestName = "LandauDampingPinT"; int main(int argc, char *argv[]){ @@ -425,9 +346,14 @@ int main(int argc, char *argv[]){ Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); - Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); - Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); - + + Pcoarse->coarsetype_m = argv[19]; + + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->initFFTSolver(); + } //////////////////////////////////////////////////////////// //Initialize an FFT object for getting rho in real space and @@ -464,7 +390,6 @@ int main(int argc, char *argv[]){ //////////////////////////////////////////////////////////// - Pcoarse->initFFTSolver(); Vector_t minU, maxU; for (unsigned d = 0; d initNUFFT(FLPIF); double coarseTol = std::atof(argv[17]); double fineTol = std::atof(argv[18]); Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); std::string coarse = "Coarse"; std::string fine = "Fine"; - IpplTimings::startTimer(particleCreation); - - #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs - //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - //if(Ippl::Comm->rank() == 0) { - // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); - // Kokkos::parallel_for(nloc, - // generate_random, Dim>( - // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); - - // Kokkos::fence(); - // size_type bufSize = Pbegin->packedSize(nloc); - // std::vector requests(0); - // int sends = 0; - // for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); - // requests.resize(requests.size() + 1); - // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); - // buf->resetWritePos(); - // ++sends; - // } - // MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - //} - //else { - // size_type bufSize = Pbegin->packedSize(nloc); - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); - // buf->resetReadPos(); - //} - //Ippl::Comm->barrier(); - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - //IpplTimings::stopTimer(deepCopy); - + //For some reason using the next_tag with multiple cycles is not + //working so we use static tags here tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(rankTime == 0) { @@ -563,9 +454,14 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - //Pcoarse->initNUFFT(FLPIF, coarseTol); - //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); - Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + } + else { + //PIF with coarse tolerance as coarse propagator + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + } IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -581,6 +477,7 @@ int main(int argc, char *argv[]){ MPI_Wait(&request, MPI_STATUS_IGNORE); } #else + //Note the CPU version has not been tested. Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( @@ -612,57 +509,6 @@ int main(int argc, char *argv[]){ msg << "particles created and initial conditions assigned " << endl; - //Copy initial conditions as they are needed later - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - ////Get initial guess for ranks other than 0 by propagating the coarse solver - //IpplTimings::startTimer(coarsePropagator); - //if (Ippl::Comm->rank() > 0) { - // Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); - //} - // - //Ippl::Comm->barrier(); - // - //IpplTimings::stopTimer(coarsePropagator); - - //msg << "First Leap frog PIC done " << endl; - - // - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - ////Run the coarse integrator to get the values at the end of the time slice - //IpplTimings::startTimer(coarsePropagator); - //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - //IpplTimings::stopTimer(coarsePropagator); - //msg << "Second Leap frog PIC done " << endl; - - - ////The following might not be needed - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - //msg << "Starting parareal iterations ..." << endl; - //bool isConverged = false; - //bool isPreviousDomainConverged; - //if(Ippl::Comm->rank() == 0) { - // isPreviousDomainConverged = true; - //} - //else { - // isPreviousDomainConverged = false; - //} - - int sign = 1; for (unsigned int nc=0; nc < nCycles; nc++) { double tStartMySlice; @@ -695,7 +541,6 @@ int main(int argc, char *argv[]){ while (!isConverged) { //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); - //Pcoarse->initNUFFT(FLPIF, fineTol); Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, fine, spaceComm); IpplTimings::stopTimer(finePropagator); @@ -735,18 +580,19 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - //Pcoarse->initNUFFT(FLPIF, coarseTol); - //double coarseTol = (double)(std::pow(0.1,std::min((int)(it+2),3))); - //double fineTol = 1e-6; - //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); + IpplTimings::startTimer(computeErrors); double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); @@ -777,7 +623,6 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); @@ -824,9 +669,12 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - //Pcoarse->initNUFFT(FLPIF, coarseTol); - Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); From 67d2fd59fd63d933ade7157f42217811148f9732 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 2 Mar 2024 10:07:21 +0100 Subject: [PATCH 107/117] ChargedParticles also cleaned --- alpine/PinT/ChargedParticlesPinT.hpp | 507 +++------------------------ 1 file changed, 42 insertions(+), 465 deletions(-) diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index fe46f837f..31835f9e4 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -55,15 +55,12 @@ template class ChargedParticlesPinT : public ippl::ParticleBase { public: - //using nufft_t = typename ippl::FFT; - CxField_t rhoPIF_m; CxField_t rhoPIFhalf_m; Field_t rhoPIFreal_m; Field_t Sk_m; Field_t rhoPIC_m; VField_t EfieldPIC_m; - //VField_t EfieldPICprevIter_m; Vector nr_m; Vector nm_m; @@ -89,8 +86,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { int shapedegree_m; - //nufft_t nufftType1Fine_m,nufftType2Fine_m,nufftType1Coarse_m,nufftType2Coarse_m; - std::shared_ptr> nufftType1Fine_mp,nufftType2Fine_mp,nufftType1Coarse_mp,nufftType2Coarse_mp; + std::shared_ptr> nufftType1Fine_mp,nufftType2Fine_mp, + nufftType1Coarse_mp,nufftType2Coarse_mp; public: ParticleAttrib q; // charge @@ -103,27 +100,24 @@ class ChargedParticlesPinT : public ippl::ParticleBase { typename ippl::ParticleBase::particle_position_type RprevIter; // G(R^(k-1)_n) typename ippl::ParticleBase::particle_position_type PprevIter; // G(P^(k-1)_n) - //typename ippl::ParticleBase::particle_position_type Rfine; - //typename ippl::ParticleBase::particle_position_type Pfine; - - /* - This constructor is mandatory for all derived classes from - ParticleBase as the bunch buffer uses this - */ - ChargedParticlesPinT(PLayout& pl) - : ippl::ParticleBase(pl) - { - // register the particle attributes - this->addAttribute(q); - this->addAttribute(P); - this->addAttribute(E); - this->addAttribute(R0); - this->addAttribute(P0); - this->addAttribute(RprevIter); - this->addAttribute(PprevIter); - //this->addAttribute(Rfine); - //this->addAttribute(Pfine); - } + ///* + // This constructor is mandatory for all derived classes from + // ParticleBase as the bunch buffer uses this + //*/ + //ChargedParticlesPinT(PLayout& pl) + //: ippl::ParticleBase(pl) + //{ + // // register the particle attributes + // this->addAttribute(q); + // this->addAttribute(P); + // this->addAttribute(E); + // this->addAttribute(R0); + // this->addAttribute(P0); + // this->addAttribute(RprevIter); + // this->addAttribute(PprevIter); + // //this->addAttribute(Rfine); + // //this->addAttribute(Pfine); + //} ChargedParticlesPinT(PLayout& pl, Vector_t hr, @@ -147,8 +141,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { this->addAttribute(P0); this->addAttribute(RprevIter); this->addAttribute(PprevIter); - //this->addAttribute(Rfine); - //this->addAttribute(Pfine); setupBCs(); for (unsigned int i = 0; i < Dim; i++) decomp_m[i]=decomp[i]; @@ -181,21 +173,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } - //void initNUFFT(FieldLayout_t& FLPIF, double& tol) { - // ippl::ParameterList fftParams; - - // fftParams.add("gpu_method", 1); - // fftParams.add("gpu_sort", 0); - // fftParams.add("gpu_kerevalmeth", 1); - // //fftParams.add("tolerance", 1e-6); - // fftParams.add("tolerance", tol); - - // fftParams.add("use_cufinufft_defaults", false); - - // q.initializeNUFFT(FLPIF, 1, fftParams); - // E.initializeNUFFT(FLPIF, 2, fftParams); - //} - void initNUFFTs(FieldLayout_t& FLPIF, double& coarseTol, double& fineTol) { @@ -214,11 +191,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { fftFineParams.add("use_cufinufft_defaults", false); fftCoarseParams.add("use_cufinufft_defaults", false); - //nufftType1Fine_m = nufft_t(FLPIF, this->getLocalNum(), 1, fftFineParams); - //nufftType2Fine_m = nufft_t(FLPIF, this->getLocalNum(), 2, fftFineParams); - - //nufftType1Coarse_m = nufft_t(FLPIF, this->getLocalNum(), 1, fftCoarseParams); - //nufftType2Coarse_m = nufft_t(FLPIF, this->getLocalNum(), 2, fftCoarseParams); nufftType1Fine_mp = std::make_shared>(FLPIF, this->getLocalNum(), 1, fftFineParams); nufftType2Fine_mp = std::make_shared>(FLPIF, this->getLocalNum(), 2, fftFineParams); @@ -226,247 +198,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { nufftType2Coarse_mp = std::make_shared>(FLPIF, this->getLocalNum(), 2, fftCoarseParams); } - void initializeParareal(ParticleAttrib& Rbegin, - ParticleAttrib& Pbegin, - ParticleAttrib& Rcoarse, - ParticleAttrib& Pcoarse, - ParticleAttrib& Rtemp, - ParticleAttrib& Ptemp, - bool& isConverged, - bool& isPreviousDomainConverged, - const unsigned int& ntCoarse, - const double& dtCoarse, - const double& tStartMySlice, - const double& Bext, - const int& rankTime, - MPI_Comm& spaceComm) { - - //Copy initial conditions as they are needed later - //Kokkos::deep_copy(R0.getView(), this->R.getView()); - //Kokkos::deep_copy(P0.getView(), P.getView()); - Kokkos::deep_copy(Rtemp.getView(), Rcoarse.getView()); - Kokkos::deep_copy(Ptemp.getView(), Pcoarse.getView()); - - //Get initial guess for ranks other than 0 by propagating the coarse solver - if (rankTime > 0) { - //BorisPIC(this->R, P, rankTime*ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); - BorisPIC(Rcoarse, Pcoarse, rankTime*ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); - } - - //Copy initial conditions as they are needed later - //Kokkos::deep_copy(R0.getView(), this->R.getView()); - //Kokkos::deep_copy(P0.getView(), P.getView()); - - - //Ippl::Comm->barrier(); - - //Kokkos::deep_copy(Rbegin.getView(), this->R.getView()); - //Kokkos::deep_copy(Pbegin.getView(), P.getView()); - Kokkos::deep_copy(Rbegin.getView(), Rcoarse.getView()); - Kokkos::deep_copy(Pbegin.getView(), Pcoarse.getView()); - - - //Run the coarse integrator to get the values at the end of the time slice - //BorisPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); - BorisPIC(Rcoarse, Pcoarse, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); - - isConverged = false; - if(rankTime == 0) { - isPreviousDomainConverged = true; - } - else { - isPreviousDomainConverged = false; - } - } - - void initializeParareal(ParticleAttrib& Rbegin, - ParticleAttrib& Pbegin, - bool& isConverged, - bool& isPreviousDomainConverged, - const unsigned int& ntCoarse, - const double& dtCoarse, - const double& tStartMySlice) { - - //Copy initial conditions as they are needed later - Kokkos::deep_copy(R0.getView(), this->R.getView()); - Kokkos::deep_copy(P0.getView(), P.getView()); - - //Get initial guess for ranks other than 0 by propagating the coarse solver - if (Ippl::Comm->rank() > 0) { - LeapFrogPIC(this->R, P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); - } - - //Ippl::Comm->barrier(); - - Kokkos::deep_copy(Rbegin.getView(), this->R.getView()); - Kokkos::deep_copy(Pbegin.getView(), P.getView()); - - - //Run the coarse integrator to get the values at the end of the time slice - LeapFrogPIC(this->R, P, ntCoarse, dtCoarse, tStartMySlice); - - isConverged = false; - if(Ippl::Comm->rank() == 0) { - isPreviousDomainConverged = true; - } - else { - isPreviousDomainConverged = false; - } - } - - void dumpLandauPIC() { - - const int nghostE = EfieldPIC_m.getNghost(); - auto Eview = EfieldPIC_m.getView(); - double fieldEnergy, ExAmp; - using mdrange_type = Kokkos::MDRangePolicy>; - - double temp = 0.0; - Kokkos::parallel_reduce("Ex inner product", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::pow(Eview(i, j, k)[0], 2); - valL += myVal; - }, Kokkos::Sum(temp)); - double globaltemp = temp; - //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; - - double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::fabs(Eview(i, j, k)[0]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); - ExAmp = tempMax; - //MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - - - if (Ippl::Comm->rank() == 0) { - std::stringstream fname; - fname << "data/FieldLandau_"; - fname << Ippl::Comm->size(); - fname << ".csv"; - - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(time_m == 0.0) { - csvout << "time, Ex_field_energy, Ex_max_norm" << endl; - } - - csvout << time_m << " " - << fieldEnergy << " " - << ExAmp << endl; - - } - - //Ippl::Comm->barrier(); - } - - - - void dumpLandau(const unsigned int& iter) { + void dumpFieldEnergy(const unsigned int& nc, const unsigned int& iter, int rankTime, int rankSpace) { - - double fieldEnergy = 0.0; - double ExAmp = 0.0; - - auto rhoview = rhoPIF_m.getView(); - const int nghost = rhoPIF_m.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - - const FieldLayout_t& layout = rhoPIF_m.getLayout(); - const Mesh_t& mesh = rhoPIF_m.get_mesh(); - const Vector& dx = mesh.getMeshSpacing(); - const auto& domain = layout.getDomain(); - Vector Len; - Vector N; - - for (unsigned d=0; d < Dim; ++d) { - N[d] = domain[d].length(); - Len[d] = dx[d] * N[d]; - } - - - Kokkos::complex imag = {0.0, 1.0}; - double pi = std::acos(-1.0); - Kokkos::parallel_reduce("Ex energy and Max", - mdrange_type({0, 0, 0}, - {N[0], - N[1], - N[2]}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k, - double& tlSum, - double& tlMax) - { - - Vector iVec = {i, j, k}; - Vector kVec; - double Dr = 0.0; - for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); - Dr += kVec[d] * kVec[d]; - } - - Kokkos::complex Ek = {0.0, 0.0}; - bool isNotZero = (Dr != 0.0); - double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); - Ek = -(imag * kVec[0] * rhoview(i+nghost,j+nghost,k+nghost) * factor); - double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); - - tlSum += myVal; - - double myValMax = std::sqrt(myVal); - - if(myValMax > tlMax) tlMax = myValMax; - - }, Kokkos::Sum(fieldEnergy), Kokkos::Max(ExAmp)); - - - Kokkos::fence(); - double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - fieldEnergy *= volume; - - - std::stringstream fname; - fname << "data/FieldLandau_"; - fname << Ippl::Comm->rank(); - fname << "_iter_"; - fname << iter; - fname << ".csv"; - - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - - csvout << time_m << " " - << fieldEnergy << " " - << ExAmp << endl; - } - - void dumpBumponTail(const unsigned int& nc, const unsigned int& iter, int rankTime, int rankSpace) { - - double fieldEnergy = 0.0; double EzAmp = 0.0; @@ -505,8 +238,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -553,13 +284,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } } - - - - void dumpEnergy(size_type /*totalP*/, const unsigned int& nc, - const unsigned int& iter, ParticleAttrib& Ptemp, + void dumpEnergy(const unsigned int& nc, const unsigned int& iter, ParticleAttrib& Ptemp, int rankTime, int rankSpace, const MPI_Comm& spaceComm = MPI_COMM_WORLD) { - double potentialEnergy, kineticEnergy; double temp = 0.0; @@ -598,10 +324,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); - //kVec[d] = 2 * pi / Len[d] * iVec[d]; Dr += kVec[d] * kVec[d]; } @@ -615,14 +338,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); } - //double myVal = rhoview(i,j,k).real() * rhoview(i,j,k).real() + - // rhoview(i,j,k).imag() * rhoview(i,j,k).imag(); - //if(Dr != 0.0) { - // myVal /= Dr; - //} - //else { - // myVal = 0.0; - //} valL += myVal; }, Kokkos::Sum(temp)); @@ -638,13 +353,12 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), KOKKOS_LAMBDA(const int i, double& valL){ double myVal = dot(Pview(i), Pview(i)).apply(); - myVal *= -qView(i); //q/(q/m) where q/m=-1 for us + myVal *= -qView(i); //q/(q/m) where q/m=-1 valL += myVal; }, Kokkos::Sum(temp)); temp *= 0.5; double globaltemp = 0.0; - //double globaltemp = temp; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, spaceComm); kineticEnergy = globaltemp; @@ -660,6 +374,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Nhalf[d] = domainHalf[d].length(); } + //Heffte needs FFTshifted field whereas the field from cuFINUFFT + //is not shifted. Hence, here we do the shift. Kokkos::parallel_for("Transfer complex rho to half domain", mdrange_type({0, 0, 0}, {Nhalf[0], @@ -700,7 +416,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { valL += rhoPIFrealview(i+nghost, j+nghost, k+nghost); }, Kokkos::Sum(temp)); - double chargeTotal = temp; Vector_t totalMomentum = 0.0; @@ -746,71 +461,19 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } } - - - void dumpParticleData(const unsigned int& iter, ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const char* fname) { - - typename ParticleAttrib::HostMirror R_host = Rtemp.getHostMirror(); - typename ParticleAttrib::HostMirror P_host = Ptemp.getHostMirror(); - Kokkos::deep_copy(R_host, Rtemp.getView()); - Kokkos::deep_copy(P_host, Ptemp.getView()); - std::stringstream pname; - pname << "data/"; - pname << fname; - pname << "_rank_"; - pname << Ippl::Comm->rank(); - pname << "_iter_"; - pname << iter; - pname << ".csv"; - Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); - pcsvout.precision(10); - pcsvout.setf(std::ios::scientific, std::ios::floatfield); - pcsvout << "R_x, R_y, R_z, V_x, V_y, V_z" << endl; - for (size_type i = 0; i< this->getLocalNum(); i++) { - pcsvout << R_host(i)[0] << " " - << R_host(i)[1] << " " - << R_host(i)[2] << " " - << P_host(i)[0] << " " - << P_host(i)[1] << " " - << P_host(i)[2] << endl; - } - } - - void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter, int rankTime, int rankSpace) { - - //if(Ippl::Comm->rank() == 0) { - if(rankSpace == 0) { - std::stringstream fname; - fname << "data/localError_rank_"; - fname << rankTime; - fname << "_nc_"; - fname << nc; - fname << ".csv"; - - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); - csvout.precision(10); - csvout.setf(std::ios::scientific, std::ios::floatfield); - - if(iter == 1) { - csvout << "Iter, Rerror, Perror" << endl; - } - - csvout << iter << " " - << Rerror << " " - << Perror << endl; - } - - } - - void writeError(double Rerror, double Perror, unsigned int iter) { + void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter, int rankTime, int rankSpace) { - if(Ippl::Comm->rank() == 0) { + if(rankSpace == 0) { std::stringstream fname; - fname << "data/Error_Vs_Iter.csv"; + fname << "data/localError_rank_"; + fname << rankTime; + fname << "_nc_"; + fname << nc; + fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); - csvout.precision(10); + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(17); csvout.setf(std::ios::scientific, std::ios::floatfield); if(iter == 1) { @@ -820,55 +483,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { csvout << iter << " " << Rerror << " " << Perror << endl; - } - - Ippl::Comm->barrier(); - - } - void checkBounds(ParticleAttrib& R) { - - auto Rview = R.getView(); - double xMin = 0.0; - double yMin = 0.0; - double zMin = 0.0; - double xMax = 0.0; - double yMax = 0.0; - double zMax = 0.0; - Kokkos::parallel_reduce("Bounds calculation", R.size(), - KOKKOS_LAMBDA(const int i, - double& xlMin, - double& ylMin, - double& zlMin, - double& xlMax, - double& ylMax, - double& zlMax){ - - if(Rview(i)[0] < xlMin) xlMin = Rview(i)[0]; - if(Rview(i)[1] < ylMin) ylMin = Rview(i)[1]; - if(Rview(i)[2] < zlMin) zlMin = Rview(i)[2]; - - if(Rview(i)[0] > xlMax) xlMax = Rview(i)[0]; - if(Rview(i)[1] > ylMax) ylMax = Rview(i)[1]; - if(Rview(i)[2] > zlMax) zlMax = Rview(i)[2]; - - }, Kokkos::Min(xMin), Kokkos::Min(yMin), Kokkos::Min(zMin), - Kokkos::Max(xMax), Kokkos::Max(yMax), Kokkos::Max(zMax)); - - Kokkos::fence(); - - Vector_t Rmin = {xMin, yMin, zMin}; - Vector_t Rmax = {xMax, yMax, zMax}; - - for (unsigned d = 0; d < 3; ++d) { - if(Rmin[d] < rmin_m[d]) { - std::cout << "Invalid particles with min. in rank: " << Ippl::Comm->rank() << " Rmin: " << Rmin << std::endl; - } - if(Rmax[d] > rmax_m[d]) { - std::cout << "Invalid particles with max. in rank: " << Ippl::Comm->rank() << " Rmax: " << Rmax << std::endl; - } - } } void initializeShapeFunctionPIF() { @@ -903,8 +519,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector kVec; double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { - //bool shift = (iVec[d] > (N[d]/2)); - //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); double kh = kVec[d] * dx[d]; bool isNotZero = (kh != 0.0); @@ -916,8 +530,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { } Skview(i+nghost, j+nghost, k+nghost) = Sk; }); - - } else { throw IpplException("initializeShapeFunctionPIF", @@ -932,8 +544,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); PLayout& PL = this->getLayout(); - //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); - //checkBounds(Rtemp); rhoPIC_m = 0.0; scatter(q, rhoPIC_m, Rtemp, spaceComm); @@ -948,8 +558,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - //dumpLandauPIC(); - for (unsigned int it=0; it { //Apply particle BC PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); - //checkBounds(Rtemp); //scatter the charge onto the underlying grid rhoPIC_m = 0.0; scatter(q, rhoPIC_m, Rtemp, spaceComm); - rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); @@ -982,7 +588,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Ptemp = Ptemp - 0.5 * dt * E; time_m += dt; - //dumpLandauPIC(); } } @@ -992,8 +597,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); PLayout& PL = this->getLayout(); - //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); - //checkBounds(Rtemp); rhoPIC_m = 0.0; scatter(q, rhoPIC_m, Rtemp, spaceComm); @@ -1001,7 +604,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); //Field solve - EfieldPIC_m = 0.0; solver_mp->solve(); // gather E field @@ -1009,12 +611,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { time_m = tStartMySlice; - //dumpLandauPIC(); double alpha = -0.5 * dt; double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); Vector_t rmax = rmax_m; - for (unsigned int it=0; it { //Apply particle BC PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); - //checkBounds(Rtemp); //scatter the charge onto the underlying grid rhoPIC_m = 0.0; scatter(q, rhoPIC_m, Rtemp, spaceComm); - rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); //Field solve IpplTimings::startTimer(fieldSolvePIC); - EfieldPIC_m = 0.0; solver_mp->solve(); IpplTimings::stopTimer(fieldSolvePIC); @@ -1090,13 +687,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { }); time_m += dt; - //dumpLandauPIC(); } } - - void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, const double& dt, const double& tStartMySlice, const unsigned& nc, @@ -1105,8 +699,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); - //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); - //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; if(propagator == "Coarse") { scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); @@ -1114,7 +706,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { else if(propagator == "Fine") { scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); } - rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -1125,32 +716,29 @@ class ChargedParticlesPinT : public ippl::ParticleBase { else if(propagator == "Fine") { gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); } - //gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); + //Reset the value of q here as we used it as a temporary object in gather to + //save memory q = Q_m / Np_m; time_m = tStartMySlice; if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - //dumpLandau(iter); - dumpBumponTail(nc, iter, rankTime, rankSpace); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpFieldEnergy(nc, iter, rankTime, rankSpace); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { else if(propagator == "Fine") { scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); } - //scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, spaceComm); rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); @@ -1171,7 +758,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { else if(propagator == "Fine") { gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); } - //gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, q); q = Q_m / Np_m; @@ -1182,12 +768,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - //dumpLandau(iter); - dumpBumponTail(nc, iter, rankTime, rankSpace); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpFieldEnergy(nc, iter, rankTime, rankSpace); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } - } } @@ -1201,8 +785,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); PLayout& PL = this->getLayout(); - //PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); - //checkBounds(Rtemp); rhoPIF_m = {0.0, 0.0}; if(propagator == "Coarse") { scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); @@ -1227,7 +809,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } double alpha = -0.5 * dt; @@ -1235,8 +817,6 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector_t rmax = rmax_m; for (unsigned int it=0; it { //Apply particle BC PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); - //checkBounds(Rtemp); //scatter the charge onto the underlying grid rhoPIF_m = {0.0, 0.0}; @@ -1316,16 +895,14 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - dumpEnergy(this->getLocalNum(), nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } - } } private: void setBCAllPeriodic() { - this->setParticleBC(ippl::BC::PERIODIC); } From c19865559ca902f2d7c3e6ad2748b19082c5a36e Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 2 Mar 2024 10:19:51 +0100 Subject: [PATCH 108/117] FFT files cleaned a bit --- src/FFT/FFT.h | 1 - src/FFT/FFT.hpp | 26 +------------------------- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 816ae8e4b..6807e8ba3 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -39,7 +39,6 @@ #include "Types/IpplTypes.h" #include "FieldLayout/FieldLayout.h" #include "Field/Field.h" -//#include "Particle/ParticleAttrib.h" #include "Utility/ParameterList.h" #include "Utility/IpplException.h" diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index c8f28c8bf..e33552322 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -124,10 +124,8 @@ namespace ippl { } } - //heffte_m = std::make_shared> - // (inbox, outbox, Ippl::getComm(), heffteOptions); heffte_m = std::make_shared> - (inbox, outbox, MPI_COMM_SELF, heffteOptions); + (inbox, outbox, Ippl::getComm(), heffteOptions); //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); if(workspace_m.size() < heffte_m->size_workspace()) @@ -901,28 +899,6 @@ namespace ippl { const double pi = std::acos(-1.0); - /** - * cuFINUFFT's layout is left, hence we allocate the temporary - * Kokkos views with the same layout - */ - //Kokkos::View - // tempField("tempField", fview.extent(0) - 2*nghost, - // fview.extent(1) - 2*nghost, - // fview.extent(2) - 2*nghost); - - - ////Initialize the pointers to NULL and fill only relevant dimensions - ////CUFINUFFT requires the input like this. - //Kokkos::View tempR[3] = {}; - - - //for(size_t d = 0; d < Dim; ++d) { - // Kokkos::realloc(tempR[d], localNp); - //} - - - //Kokkos::View tempQ("tempQ", localNp); - auto tempField = tempField_m; auto tempQ = tempQ_m; Kokkos::View tempR[3] = {}; From 5e571f7ad1d5f9148c33489dd03a5239e62ad1ad Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 2 Mar 2024 11:04:58 +0100 Subject: [PATCH 109/117] Particle Attribute also cleaned a bit --- src/Particle/ParticleAttrib.h | 8 ---- src/Particle/ParticleAttrib.hpp | 72 ++++++++++----------------------- 2 files changed, 21 insertions(+), 59 deletions(-) diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 10b391d69..0053dcca7 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -176,9 +176,6 @@ namespace ippl { const ParticleAttrib, Properties... >& pp); #ifdef KOKKOS_ENABLE_CUDA - //template - //void initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams); - template void scatterPIFNUFFT(Field& f, Field& Sk, @@ -201,11 +198,6 @@ namespace ippl { private: view_type dview_m; -//#ifdef KOKKOS_ENABLE_CUDA -// //TODO: Remove hard-coded dimension by having Dim as template -// //parameter. Does this need to be in CUDA ifdefs? -// std::shared_ptr> fftType_mp; -//#endif }; } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 41b11f220..a2e33e334 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -185,12 +185,6 @@ namespace ippl { const int j = index[1] - lDom[1].first() + nghost; const int k = index[2] - lDom[2].first() + nghost; - //if((i < 1) || (i > lDom[0].last() + 2) || (j < 1) || (j > lDom[1].last() + 2) - // || (k < 1) || (k > lDom[0].last() + 2)) { - // std::cout << "i: " << i << ", j: " << j << ", k: " << k << std::endl; - // std::cout << "Invalid particle co-ordinates: " << pp(idx) << std::endl; - //} - // scatter const value_type& val = dview_m(idx); Kokkos::atomic_add(&viewLocal(i-1, j-1, k-1), wlo[0] * wlo[1] * wlo[2] * val); @@ -203,12 +197,10 @@ namespace ippl { Kokkos::atomic_add(&viewLocal(i, j, k ), whi[0] * whi[1] * whi[2] * val); } ); - IpplTimings::stopTimer(scatterPICTimer); - //static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("AccumulateHalo"); - //IpplTimings::startTimer(accumulateHaloTimer); tempField.accumulateHalo(); - //IpplTimings::stopTimer(accumulateHaloTimer); + + IpplTimings::stopTimer(scatterPICTimer); static IpplTimings::TimerRef scatterAllReducePICTimer = IpplTimings::getTimer("scatterAllReducePIC"); IpplTimings::startTimer(scatterAllReducePICTimer); @@ -222,7 +214,8 @@ namespace ippl { template template void ParticleAttrib::scatterPIFNUDFT(Field& f, Field& Sk, - const ParticleAttrib< Vector, Properties... >& pp) + const ParticleAttrib< Vector, Properties... >& pp, + const MPI_Comm& spaceComm) const { @@ -251,11 +244,6 @@ namespace ippl { typedef Kokkos::TeamPolicy<> team_policy; typedef Kokkos::TeamPolicy<>::member_type member_type; - - //using view_type_temp = typename detail::ViewType::view_type; - - //view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); - double pi = std::acos(-1.0); Kokkos::complex imag = {0.0, 1.0}; @@ -304,8 +292,8 @@ namespace ippl { }, Kokkos::Sum(reducedValue)); if(teamMember.team_rank() == 0) { - //viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; - fview(i+nghost,j+nghost,k+nghost) = reducedValue; + viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; + //fview(i+nghost,j+nghost,k+nghost) = reducedValue; } } @@ -313,12 +301,12 @@ namespace ippl { IpplTimings::stopTimer(scatterPIFNUDFTTimer); - //static IpplTimings::TimerRef scatterAllReduceTimer = IpplTimings::getTimer("scatterAllReduce"); - //IpplTimings::startTimer(scatterAllReduceTimer); - //int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); - //MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, - // MPI_C_DOUBLE_COMPLEX, MPI_SUM, Ippl::getComm()); - //IpplTimings::stopTimer(scatterAllReduceTimer); + static IpplTimings::TimerRef scatterAllReducePIFTimer = IpplTimings::getTimer("scatterAllReducePIF"); + IpplTimings::startTimer(scatterAllReducePIFTimer); + int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + MPI_C_DOUBLE_COMPLEX, MPI_SUM, spaceComm); + IpplTimings::stopTimer(scatterAllReducePIFTimer); } @@ -329,13 +317,10 @@ namespace ippl { const ParticleAttrib, Properties...>& pp) { - //static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("FillHalo"); - //IpplTimings::startTimer(fillHaloTimer); - f.fillHalo(); - //IpplTimings::stopTimer(fillHaloTimer); - static IpplTimings::TimerRef gatherPICTimer = IpplTimings::getTimer("GatherPIC"); IpplTimings::startTimer(gatherPICTimer); + + f.fillHalo(); const typename Field::view_type view = f.getView(); @@ -408,8 +393,6 @@ namespace ippl { Len[d] = dx[d] * N[d]; } - - typedef Kokkos::TeamPolicy<> team_policy; typedef Kokkos::TeamPolicy<>::member_type member_type; @@ -492,15 +475,6 @@ namespace ippl { } #ifdef KOKKOS_ENABLE_CUDA - - //template - //template - //void ParticleAttrib::initializeNUFFT(FieldLayout& layout, int type, ParameterList& fftParams) { - // - // fftType_mp = std::make_shared>(layout, *(this->localNum_mp), type, fftParams); - //} - - template template @@ -525,10 +499,7 @@ namespace ippl { tempField = 0.0; - //fftType_mp->transform(pp, q, tempField); nufft->transform(pp, q, tempField); - //fftType_mp->transform(pp, q, f); - using view_type = typename Field::view_type; view_type fview = f.getView(); @@ -622,7 +593,6 @@ namespace ippl { double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); - //kVec[d] = (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } @@ -634,7 +604,6 @@ namespace ippl { tempview(i, j, k) *= -Skview(i, j, k) * (imag * kVec[gd] * factor); }); - //fftType_mp->transform(pp, q, tempField); nufft->transform(pp, q, tempField); Kokkos::parallel_for("Assign E gather NUFFT", @@ -644,13 +613,17 @@ namespace ippl { dview_m(i)[gd] = qview(i); }); } - IpplTimings::stopTimer(gatherPIFNUFFTTimer); } #endif + /* + * Non-class functions + * + */ + template inline void scatterPIFNUFFT(const ParticleAttrib& attrib, Field& f, @@ -682,10 +655,6 @@ namespace ippl { #endif } - /* - * Non-class function - * - */ template @@ -700,7 +669,8 @@ namespace ippl { template inline void scatterPIFNUDFT(const ParticleAttrib& attrib, Field& f, - Field& Sk, const ParticleAttrib, Properties...>& pp) + Field& Sk, const ParticleAttrib, Properties...>& pp, + const MPI_Comm& spaceComm = MPI_COMM_WORLD) { attrib.scatterPIFNUDFT(f, Sk, pp); } From 8efb99ee9ef0aded347aae8a26c853eb96a070af Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 2 Mar 2024 12:51:47 +0100 Subject: [PATCH 110/117] Landaudamping results verified after clean up --- src/Particle/ParticleAttrib.h | 3 ++- src/Particle/ParticleAttrib.hpp | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 0053dcca7..bfa089d29 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -163,7 +163,8 @@ namespace ippl { template void scatterPIFNUDFT(Field& f, Field& Sk, - const ParticleAttrib, Properties... >& pp) const; + const ParticleAttrib, Properties... >& pp, + const MPI_Comm& spaceComm) const; template void diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index a2e33e334..724bc2913 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -244,6 +244,10 @@ namespace ippl { typedef Kokkos::TeamPolicy<> team_policy; typedef Kokkos::TeamPolicy<>::member_type member_type; + using view_type_temp = typename detail::ViewType::view_type; + + view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); + double pi = std::acos(-1.0); Kokkos::complex imag = {0.0, 1.0}; @@ -672,7 +676,7 @@ namespace ippl { Field& Sk, const ParticleAttrib, Properties...>& pp, const MPI_Comm& spaceComm = MPI_COMM_WORLD) { - attrib.scatterPIFNUDFT(f, Sk, pp); + attrib.scatterPIFNUDFT(f, Sk, pp, spaceComm); } From fc8252939373ecdf2ec74dd8235d84b049582f93 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 2 Mar 2024 13:26:50 +0100 Subject: [PATCH 111/117] TSI and Penning trap also cleaned. Need to test and see if it works --- alpine/PinT/BumponTailInstabilityPinT.cpp | 416 +++++-------------- alpine/PinT/LandauDampingPinT.cpp | 15 +- alpine/PinT/PenningTrapPinT.cpp | 483 +++++----------------- 3 files changed, 218 insertions(+), 696 deletions(-) diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 8bf8547f9..0e405720a 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -6,20 +6,28 @@ // European Conference on Parallel Processing. Springer, Cham, 2017. // // Usage: -// srun ./BumponTailInstabilityPinT -// --info 5 +// Usage: +// srun ./BumponTailInstabilityPinT +// +// --info 5 // nmx = No. of Fourier modes in the x-direction // nmy = No. of Fourier modes in the y-direction // nmz = No. of Fourier modes in the z-direction -// nx = No. of grid points in the x-direction -// ny = No. of grid points in the y-direction -// nz = No. of grid points in the z-direction +// nx = No. of grid points in the x-direction (not used if PIF is also used as coarse propagator) +// ny = No. of grid points in the y-direction (not used if PIF is also used as coarse propagator) +// nz = No. of grid points in the z-direction (not used if PIF is also used as coarse propagator) // Np = Total no. of macro-particles in the simulation +// tolParareal = Parareal tolerance // nCycles = No. of Parareal blocks/cycles // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) +// No. of space procs = Number of MPI ranks to be used in the spatial parallelization +// No. of time procs = Number of MPI ranks to be used in the time parallelization +// coarseTol = Coarse tolerance for PIF if we use PIF as a coarse propagator (will not be used when PIC is used) +// fineTol = fine tolerance for PIF +// coarseType = Type of coarse propagator (PIF or PIC) // Example: -// srun ./BumponTailInstabilityPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 4 B-spline 1 --info 5 +// srun ./BumponTailInstabilityPinT 32 32 32 16 16 16 655360 19.2 0.05 0.05 1e-5 1 B-spline 1 4 16 1e-2 1e-4 PIC --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -39,8 +47,6 @@ #include "ChargedParticlesPinT.hpp" #include "StatesBeginSlice.hpp" #include "StatesEndSlice.hpp" -//#include "LeapFrogPIC.cpp" -//#include "LeapFrogPIF.cpp" #include #include #include @@ -171,6 +177,11 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + //This is just to undo the effect of periodic BCs during the + //error calculation. Otherwise even though the actual error is + //small the computed error might be very large. + //The values (e.g. 10) mentioned here are just an adhoc + //value depending on the domain length. for (unsigned d = 0; d < 3; ++d) { bool isLeft = (diff[d] <= -10.0); bool isRight = (diff[d] >= 10.0); @@ -225,171 +236,6 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp } -double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - Vector_t& length) { - - auto Qview = Q.getView(); - auto QprevIterView = QprevIter.getView(); - double localError = 0.0; - double localNorm = 0.0; - - Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ - Vector_t diff = Qview(i) - QprevIterView(i); - - for (unsigned d = 0; d < 3; ++d) { - bool isLeft = (diff[d] <= -10.0); - bool isRight = (diff[d] >= 10.0); - bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); - diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) - +(isRight * (diff[d] - length[d])); - } - - double myValError = dot(diff, diff).apply(); - - myValError = std::sqrt(myValError); - - //bool isIncluded = (myValError < 10.0); - - //myValError *= isIncluded; - - if(myValError > valLError) valLError = myValError; - - double myValnorm = dot(Qview(i), Qview(i)).apply(); - myValnorm = std::sqrt(myValnorm); - - //myValnorm *= isIncluded; - - if(myValnorm > valLnorm) valLnorm = myValnorm; - - //excluded += (!isIncluded); - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - - Kokkos::fence(); - lError = localError/localNorm; - - double relError = lError; - - return relError; - -} - - -double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { - - auto Qview = Q.getView(); - auto QprevIterView = QprevIter.getView(); - double localError = 0.0; - double localNorm = 0.0; - - Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ - Vector_t diff = Qview(i) - QprevIterView(i); - double myValError = dot(diff, diff).apply(); - myValError = std::sqrt(myValError); - - if(myValError > valLError) valLError = myValError; - - double myValnorm = dot(Qview(i), Qview(i)).apply(); - myValnorm = std::sqrt(myValnorm); - - if(myValnorm > valLnorm) valLnorm = myValnorm; - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - - Kokkos::fence(); - lError = localError/localNorm; - - double relError = lError; - - return relError; - -} - -double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { - - auto rhoview = rhoPIF.getView(); - auto rhoprevview = rhoPIFprevIter.getView(); - const int nghost = rhoPIF.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - - const FieldLayout_t& layout = rhoPIF.getLayout(); - const Mesh_t& mesh = rhoPIF.get_mesh(); - const Vector& dx = mesh.getMeshSpacing(); - const auto& domain = layout.getDomain(); - Vector Len; - Vector N; - - for (unsigned d=0; d < Dim; ++d) { - N[d] = domain[d].length(); - Len[d] = dx[d] * N[d]; - } - - double AbsError = 0.0; - double Enorm = 0.0; - Kokkos::complex imag = {0.0, 1.0}; - double pi = std::acos(-1.0); - Kokkos::parallel_reduce("Ex field error", - mdrange_type({0, 0, 0}, - {N[0], - N[1], - N[2]}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k, - double& errorSum, - double& fieldSum) - { - - Vector iVec = {i, j, k}; - Vector kVec; - double Dr = 0.0; - for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - Dr += kVec[d] * kVec[d]; - } - - double myError = 0.0; - double myField = 0.0; - Kokkos::complex Ek = {0.0, 0.0}; - Kokkos::complex Ekprev = {0.0, 0.0}; - for(size_t d = 0; d < Dim; ++d) { - if(Dr != 0.0) { - Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); - } - Ekprev = Ekprev - Ek; - myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); - myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); - } - errorSum += myError; - fieldSum += myField; - //Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); - //Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); - //rhokprev = rhokprev - rhok; - //myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); - //errorSum += myError; - //myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); - //fieldSum += myField; - - }, Kokkos::Sum(AbsError), Kokkos::Sum(Enorm)); - - Kokkos::fence(); - double globalError = 0.0; - MPI_Allreduce(&AbsError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - double globalNorm = 0.0; - MPI_Allreduce(&Enorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //fieldEnergy *= volume; - - double relError = std::sqrt(globalError)/std::sqrt(globalNorm); - - return relError; -} - - const char* TestName = "TwoStreamInstability"; //const char* TestName = "BumponTailInstability"; @@ -452,20 +298,13 @@ int main(int argc, char *argv[]){ const unsigned int ntFine = std::ceil(dtSlice / dtFine); const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); - //const unsigned int maxIter = std::atoi(argv[12]); - - - //const double tStartMySlice = Ippl::Comm->rank() * dtSlice; - //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; - using bunch_type = ChargedParticlesPinT; - using states_begin_type = StatesBeginSlice; - using states_end_type = StatesEndSlice; + using states_type = StatesSlice; std::unique_ptr Pcoarse; - std::unique_ptr Pbegin; - std::unique_ptr Pend; + std::unique_ptr Pbegin; + std::unique_ptr Pend; ippl::NDIndex domainPIC; ippl::NDIndex domainPIF; @@ -543,38 +382,62 @@ int main(int argc, char *argv[]){ size_type Total_particles = 0; - //MPI_Allreduce(&nloc, &Total_particles, 1, - // MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); - - //int rest = (int) (totalP - Total_particles); - - //if ( (rankTime == 0) && (rankSpace < rest) ) { - // ++nloc; - //} - MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); - - //Q = -\int\int f dx dv double Q = -length[0] * length[1] * length[2]; Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); - Pbegin = std::make_unique(PL); - Pend = std::make_unique(PL); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); Pcoarse->nr_m = nrPIC; Pcoarse->nm_m = nmPIF; Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); - //Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); - Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); - Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); - //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); - Pcoarse->initFFTSolver(); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->initFFTSolver(); + } + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()); + } + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + Pcoarse->rhoPIFreal_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + Pcoarse->fft_mp = std::make_shared(FLPIF, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + Vector_t minU, maxU; for (unsigned d = 0; d shapetype_m = argv[13]; Pcoarse->shapedegree_m = std::atoi(argv[14]); IpplTimings::startTimer(initializeShapeFunctionPIF); @@ -603,54 +465,22 @@ int main(int argc, char *argv[]){ double coarseTol = std::atof(argv[17]); - double fineTol = 1e-3;//1e-12; + double fineTol = std::atof(argv[18]); Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); std::string coarse = "Coarse"; std::string fine = "Fine"; IpplTimings::startTimer(particleCreation); - - - //Pcoarse->initNUFFT(FLPIF); #ifdef KOKKOS_ENABLE_CUDA + //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs - //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - //if(Ippl::Comm->rank() == 0) { - // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); - // Kokkos::parallel_for(nloc, - // generate_random, Dim>( - // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, delta, kw, - // sigma, muBulk, muBeam, nlocBulk, minU, maxU)); - - - // Kokkos::fence(); - // size_type bufSize = Pbegin->packedSize(nloc); - // std::vector requests(0); - // int sends = 0; - // for(int rank = 1; rank < Ippl::Comm->size(); ++rank) { - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); - // requests.resize(requests.size() + 1); - // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc); - // buf->resetWritePos(); - // ++sends; - // } - // MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - //} - //else { - // size_type bufSize = Pbegin->packedSize(nloc); - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc); - // buf->resetReadPos(); - //} - //Ippl::Comm->barrier(); - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); - //Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + IpplTimings::startTimer(timeCommunication); + //For some reason using the next_tag with multiple cycles is not + //working so we use static tags here + tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); if(rankTime == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); @@ -668,6 +498,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } + IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); @@ -676,14 +507,22 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); - Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + } + else { + //PIF with coarse tolerance as coarse propagator + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(timeCommunication); if(rankTime < sizeTime-1) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -692,8 +531,10 @@ int main(int argc, char *argv[]){ buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } + IpplTimings::stopTimer(timeCommunication); #else + //Note the CPU version has not been tested. Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( @@ -705,8 +546,6 @@ int main(int argc, char *argv[]){ Ippl::Comm->barrier(); #endif - //Pcoarse->dumpParticleData(0, Pcoarse->R, Pcoarse->P, "Parareal"); - msg << "Parareal " << TestName << endl @@ -728,57 +567,6 @@ int main(int argc, char *argv[]){ msg << "particles created and initial conditions assigned " << endl; - //Copy initial conditions as they are needed later - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - ////Get initial guess for ranks other than 0 by propagating the coarse solver - //IpplTimings::startTimer(coarsePropagator); - //if (Ippl::Comm->rank() > 0) { - // Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice); - //} - // - //Ippl::Comm->barrier(); - // - //IpplTimings::stopTimer(coarsePropagator); - - //msg << "First Leap frog PIC done " << endl; - - // - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - ////Run the coarse integrator to get the values at the end of the time slice - //IpplTimings::startTimer(coarsePropagator); - //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice); - //IpplTimings::stopTimer(coarsePropagator); - //msg << "Second Leap frog PIC done " << endl; - - ////Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - - ////The following might not be needed - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - //msg << "Starting parareal iterations ..." << endl; - //bool isConverged = false; - //bool isPreviousDomainConverged; - //if(Ippl::Comm->rank() == 0) { - // isPreviousDomainConverged = true; - //} - //else { - // isPreviousDomainConverged = false; - //} - - int sign = 1; for (unsigned int nc=0; nc < nCycles; nc++) { @@ -821,9 +609,6 @@ int main(int argc, char *argv[]){ Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); - //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); - IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); @@ -855,30 +640,28 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - //Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); - - PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); - IpplTimings::stopTimer(computeErrors); - //} if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { isConverged = true; } - IpplTimings::startTimer(timeCommunication); if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); @@ -899,11 +682,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); - //if(Ippl::Comm->rank() == Ippl::Comm->size()-1) { - //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); - //} IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); @@ -913,7 +692,6 @@ int main(int argc, char *argv[]){ MPI_Barrier(MPI_COMM_WORLD); if((nCycles > 1) && (nc < (nCycles - 1))) { - IpplTimings::startTimer(timeCommunication); tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); //send, receive criteria and tStartMySlice are reversed at the end of the cycle @@ -935,12 +713,14 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(timeCommunication); if(recvCriteria) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } + IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); @@ -949,15 +729,21 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - //Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); - Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); - + IpplTimings::startTimer(timeCommunication); if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index c6bbd3536..947a4ed5d 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -428,6 +428,8 @@ int main(int argc, char *argv[]){ //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs + + IpplTimings::startTimer(timeCommunication); //For some reason using the next_tag with multiple cycles is not //working so we use static tags here tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); @@ -447,6 +449,8 @@ int main(int argc, char *argv[]){ Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } + IpplTimings::stopTimer(timeCommunication); + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); @@ -454,7 +458,7 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - + IpplTimings::startTimer(coarsePropagator); if(Pcoarse->coarsetype_m == "PIC") { Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); } @@ -462,12 +466,14 @@ int main(int argc, char *argv[]){ //PIF with coarse tolerance as coarse propagator Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); } + IpplTimings::stopTimer(coarsePropagator); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(timeCommunication); if(rankTime < sizeTime-1) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -476,6 +482,7 @@ int main(int argc, char *argv[]){ buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } + IpplTimings::stopTimer(timeCommunication); #else //Note the CPU version has not been tested. Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); @@ -633,7 +640,6 @@ int main(int argc, char *argv[]){ MPI_Barrier(MPI_COMM_WORLD); if((nCycles > 1) && (nc < (nCycles - 1))) { - IpplTimings::startTimer(timeCommunication); tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); //send, receive criteria and tStartMySlice are reversed at the end of the cycle @@ -655,12 +661,14 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(timeCommunication); if(recvCriteria) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } + IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); @@ -669,12 +677,14 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(coarsePropagator); if(Pcoarse->coarsetype_m == "PIC") { Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); } else { Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); } + IpplTimings::stopTimer(coarsePropagator); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); @@ -682,6 +692,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(timeCommunication); if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 95ae8387a..f5026949c 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -6,20 +6,27 @@ // European Conference on Parallel Processing. Springer, Cham, 2017. // // Usage: -// srun ./PenningTrapPinT -// --info 5 +// srun ./PenningTrapPinT +// +// --info 5 // nmx = No. of Fourier modes in the x-direction // nmy = No. of Fourier modes in the y-direction // nmz = No. of Fourier modes in the z-direction -// nx = No. of grid points in the x-direction -// ny = No. of grid points in the y-direction -// nz = No. of grid points in the z-direction +// nx = No. of grid points in the x-direction (not used if PIF is also used as coarse propagator) +// ny = No. of grid points in the y-direction (not used if PIF is also used as coarse propagator) +// nz = No. of grid points in the z-direction (not used if PIF is also used as coarse propagator) // Np = Total no. of macro-particles in the simulation +// tolParareal = Parareal tolerance // nCycles = No. of Parareal blocks/cycles // ShapeType = Shape function type B-spline only for the moment // degree = B-spline degree (-1 for delta function) +// No. of space procs = Number of MPI ranks to be used in the spatial parallelization +// No. of time procs = Number of MPI ranks to be used in the time parallelization +// coarseTol = Coarse tolerance for PIF if we use PIF as a coarse propagator (will not be used when PIC is used) +// fineTol = fine tolerance for PIF +// coarseType = Type of coarse propagator (PIF or PIC) // Example: -// srun ./PenningTrapPinT 32 32 32 32 32 32 655360 20.0 0.05 0.05 1e-5 4 B-spline 1 --info 5 +// srun ./PenningTrapPinT 32 32 32 16 16 16 655360 19.2 0.05 0.05 1e-5 1 B-spline 1 4 16 1e-2 1e-4 PIC --info 5 // // Copyright (c) 2022, Sriramkrishnan Muralikrishnan, // Jülich Supercomputing Centre, Jülich, Germany. @@ -39,8 +46,6 @@ #include "ChargedParticlesPinT.hpp" #include "StatesBeginSlice.hpp" #include "StatesEndSlice.hpp" -//#include "LeapFrogPIC.cpp" -//#include "LeapFrogPIF.cpp" #include #include #include @@ -158,6 +163,11 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ Vector_t diff = Qview(i) - QprevIterView(i); + //This is just to undo the effect of periodic BCs during the + //error calculation. Otherwise even though the actual error is + //small the computed error might be very large. + //The values (e.g. 22) mentioned here are just an adhoc + //value depending on the domain length. for (unsigned d = 0; d < 3; ++d) { bool isLeft = (diff[d] <= -22.0); bool isRight = (diff[d] >= 22.0); @@ -174,12 +184,9 @@ double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& Qp Kokkos::fence(); double globalError = 0.0; - //MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); double globalNorm = 0.0; - //MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); - //lError = std::sqrt(localError)/std::sqrt(localNorm); double relError = std::sqrt(globalError) / std::sqrt(globalNorm); @@ -205,12 +212,9 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp Kokkos::fence(); double globalError = 0.0; - //MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); double globalNorm = 0.0; - //MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); - //lError = std::sqrt(localError)/std::sqrt(localNorm); double relError = std::sqrt(globalError) / std::sqrt(globalNorm); @@ -218,194 +222,19 @@ double computePL2Error(ParticleAttrib& Q, ParticleAttrib& Qp } -double computeRLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError, - Vector_t& length) { - - auto Qview = Q.getView(); - auto QprevIterView = QprevIter.getView(); - double localError = 0.0; - double localNorm = 0.0; - - Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ - Vector_t diff = Qview(i) - QprevIterView(i); - - for (unsigned d = 0; d < 3; ++d) { - bool isLeft = (diff[d] <= -22.0); - bool isRight = (diff[d] >= 22.0); - bool isInside = ((diff[d] > -22.0) && (diff[d] < 22.0)); - diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) - +(isRight * (diff[d] - length[d])); - } - - double myValError = dot(diff, diff).apply(); - - myValError = std::sqrt(myValError); - - //bool isIncluded = (myValError < 10.0); - - //myValError *= isIncluded; - - if(myValError > valLError) valLError = myValError; - - double myValnorm = dot(Qview(i), Qview(i)).apply(); - myValnorm = std::sqrt(myValnorm); - - //myValnorm *= isIncluded; - - if(myValnorm > valLnorm) valLnorm = myValnorm; - - //excluded += (!isIncluded); - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - - Kokkos::fence(); - lError = localError/localNorm; - - double relError = lError; - - return relError; - -} - -double computePLinfError(ParticleAttrib& Q, ParticleAttrib& QprevIter, - const unsigned int& /*iter*/, const int& /*myrank*/, double& lError) { - - auto Qview = Q.getView(); - auto QprevIterView = QprevIter.getView(); - double localError = 0.0; - double localNorm = 0.0; - - Kokkos::parallel_reduce("Abs. max error and norm", Q.size(), - KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ - Vector_t diff = Qview(i) - QprevIterView(i); - double myValError = dot(diff, diff).apply(); - myValError = std::sqrt(myValError); - - if(myValError > valLError) valLError = myValError; - - double myValnorm = dot(Qview(i), Qview(i)).apply(); - myValnorm = std::sqrt(myValnorm); - - if(myValnorm > valLnorm) valLnorm = myValnorm; - }, Kokkos::Max(localError), Kokkos::Max(localNorm)); - - Kokkos::fence(); - lError = localError/localNorm; - - double relError = lError; - - return relError; - -} - - -double computeFieldError(CxField_t& rhoPIF, CxField_t& rhoPIFprevIter) { - - auto rhoview = rhoPIF.getView(); - auto rhoprevview = rhoPIFprevIter.getView(); - const int nghost = rhoPIF.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - - const FieldLayout_t& layout = rhoPIF.getLayout(); - const Mesh_t& mesh = rhoPIF.get_mesh(); - const Vector& dx = mesh.getMeshSpacing(); - const auto& domain = layout.getDomain(); - Vector Len; - Vector N; - - for (unsigned d=0; d < Dim; ++d) { - N[d] = domain[d].length(); - Len[d] = dx[d] * N[d]; - } - - double AbsError = 0.0; - double Enorm = 0.0; - Kokkos::complex imag = {0.0, 1.0}; - double pi = std::acos(-1.0); - Kokkos::parallel_reduce("Ex field error", - mdrange_type({0, 0, 0}, - {N[0], - N[1], - N[2]}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k, - double& errorSum, - double& fieldSum) - { - - Vector iVec = {i, j, k}; - Vector kVec; - double Dr = 0.0; - for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); - Dr += kVec[d] * kVec[d]; - } - - double myError = 0.0; - double myField = 0.0; - Kokkos::complex Ek = {0.0, 0.0}; - Kokkos::complex Ekprev = {0.0, 0.0}; - for(size_t d = 0; d < Dim; ++d) { - if(Dr != 0.0) { - Ek = -(imag * kVec[d] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - Ekprev = -(imag * kVec[d] * rhoprevview(i+nghost,j+nghost,k+nghost) / Dr); - } - Ekprev = Ekprev - Ek; - myError += Ekprev.real() * Ekprev.real() + Ekprev.imag() * Ekprev.imag(); - myField += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); - } - errorSum += myError; - fieldSum += myField; - //Kokkos::complex rhok = rhoview(i+nghost,j+nghost,k+nghost); - //Kokkos::complex rhokprev = rhoprevview(i+nghost,j+nghost,k+nghost); - //rhokprev = rhokprev - rhok; - //myError = rhokprev.real() * rhokprev.real() + rhokprev.imag() * rhokprev.imag(); - //errorSum += myError; - //myField = rhok.real() * rhok.real() + rhok.imag() * rhok.imag(); - //fieldSum += myField; - - }, Kokkos::Sum(AbsError), Kokkos::Sum(Enorm)); - - Kokkos::fence(); - double globalError = 0.0; - MPI_Allreduce(&AbsError, &globalError, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - double globalNorm = 0.0; - MPI_Allreduce(&Enorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); - //fieldEnergy *= volume; - - double relError = std::sqrt(globalError)/std::sqrt(globalNorm); - - return relError; -} - - const char* TestName = "PenningTrapPinT"; int main(int argc, char *argv[]){ - Ippl ippl(argc, argv); - - //int rankWorld, sizeWorld; - //MPI_Init(&argc, &argv); - //MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); - //MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld); int spaceColor, timeColor; MPI_Comm spaceComm, timeComm; int spaceProcs = std::atoi(argv[15]); int timeProcs = std::atoi(argv[16]); - //spaceColor = rankWorld / spaceProcs; - //timeColor = rankWorld % spaceProcs; spaceColor = Ippl::Comm->rank() / spaceProcs; timeColor = Ippl::Comm->rank() % spaceProcs; - //MPI_Comm_split(MPI_COMM_WORLD, spaceColor, rankWorld, &spaceComm); - //MPI_Comm_split(MPI_COMM_WORLD, timeColor, rankWorld, &timeComm); MPI_Comm_split(Ippl::getComm(), spaceColor, Ippl::Comm->rank(), &spaceComm); MPI_Comm_split(Ippl::getComm(), timeColor, Ippl::Comm->rank(), &timeComm); @@ -416,9 +245,6 @@ int main(int argc, char *argv[]){ MPI_Comm_rank(timeComm, &rankTime); MPI_Comm_size(timeComm, &sizeTime); - //Ippl ippl(argc, argv, spaceComm); - - //Inform msg(TestName, sizeSpace-1); Inform msg(TestName, Ippl::Comm->size()-1); Inform msg2all(TestName,INFORM_ALL_NODES); @@ -443,10 +269,6 @@ int main(int argc, char *argv[]){ static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); - static IpplTimings::TimerRef initializeCycles = IpplTimings::getTimer("initializeCycles"); - static IpplTimings::TimerRef initialComm = IpplTimings::getTimer("initialComm"); - static IpplTimings::TimerRef initialCoarse = IpplTimings::getTimer("initialCoarse"); - static IpplTimings::TimerRef warmupStep = IpplTimings::getTimer("warmupStep"); IpplTimings::startTimer(mainTimer); @@ -460,18 +282,13 @@ int main(int argc, char *argv[]){ const unsigned int ntFine = std::ceil(dtSlice / dtFine); const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); const double tol = std::atof(argv[11]); - //const unsigned int maxIter = std::atoi(argv[12]); - - //const double tEndMySlice = (Ippl::Comm->rank() + 1) * dtSlice; - using bunch_type = ChargedParticlesPinT; - using states_begin_type = StatesBeginSlice; - using states_end_type = StatesEndSlice; + using states_type = StatesSlice; std::unique_ptr Pcoarse; - std::unique_ptr Pbegin; - std::unique_ptr Pend; + std::unique_ptr Pbegin; + std::unique_ptr Pend; ippl::NDIndex domainPIC; ippl::NDIndex domainPIF; @@ -488,26 +305,20 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t rmin(0.0); Vector_t rmax(25.0); - //Vector_t rmax(20.0); Vector_t length = rmax - rmin; double dxPIC = length[0] / nrPIC[0]; double dyPIC = length[1] / nrPIC[1]; double dzPIC = length[2] / nrPIC[2]; - Vector_t mu, sd; for (unsigned d = 0; d(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); - Pbegin = std::make_unique(PL); - Pend = std::make_unique(PL); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); Pcoarse->nr_m = nrPIC; Pcoarse->nm_m = nmPIF; Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); - //Pcoarse->rhoPIFprevIter_m.initialize(meshPIF, FLPIF); - Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); - Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); - //Pcoarse->EfieldPICprevIter_m.initialize(meshPIC, FLPIC); - Pcoarse->initFFTSolver(); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->initFFTSolver(); + } + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + Pcoarse->rhoPIFreal_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + Pcoarse->fft_mp = std::make_shared(FLPIF, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + Vector_t minU, maxU; for (unsigned d = 0; d initNUFFTs(FLPIF, coarseTol, fineTol); std::string coarse = "Coarse"; std::string fine = "Fine"; - IpplTimings::startTimer(particleCreation); - - - - //Pcoarse->initNUFFT(FLPIF); - #ifdef KOKKOS_ENABLE_CUDA //If we don't do the following even with the same seed the initial //condition is not the same on different GPUs - //tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - //if(rankTime == 0) { - // Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); - // Kokkos::parallel_for(nloc, - // generate_random, Dim>( - // Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, - // minU, maxU)); - // Kokkos::fence(); - // size_type bufSize = Pbegin->packedSize(nloc); - // std::vector requests(0); - // int sends = 0; - // for(int rank = 1; rank < sizeTime; ++rank) { - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND + sends, bufSize); - // requests.resize(requests.size() + 1); - // Ippl::Comm->isend(rank, tag, *Pbegin, *buf, requests.back(), nloc, timeComm); - // buf->resetWritePos(); - // ++sends; - // } - // MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - //} - //else { - // size_type bufSize = Pbegin->packedSize(nloc); - // buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); - // Ippl::Comm->recv(0, tag, *Pbegin, *buf, bufSize, nloc, timeComm); - // buf->resetReadPos(); - //} - - //Kokkos::deep_copy(Pcoarse->Rfine.getView(), Pbegin->R.getView()); - //Kokkos::deep_copy(Pcoarse->Pfine.getView(), Pbegin->P.getView()); - - - //If we don't do the following even with the same seed the initial - //condition is not the same on different GPUs - tag = Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); - - IpplTimings::startTimer(initialComm); + + IpplTimings::startTimer(timeCommunication); + //For some reason using the next_tag with multiple cycles is not + //working so we use static tags here + tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + if(rankTime == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); Kokkos::parallel_for(nloc, @@ -650,7 +451,7 @@ int main(int argc, char *argv[]){ Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } - IpplTimings::stopTimer(initialComm); + IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); @@ -659,17 +460,21 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - IpplTimings::startTimer(initialCoarse); - //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); - Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - IpplTimings::stopTimer(initialCoarse); - + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + } + else { + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); - IpplTimings::startTimer(initialComm); + IpplTimings::startTimer(timeCommunication); if(rankTime < sizeTime-1) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -678,8 +483,9 @@ int main(int argc, char *argv[]){ buf->resetWritePos(); MPI_Wait(&request, MPI_STATUS_IGNORE); } - IpplTimings::stopTimer(initialComm); + IpplTimings::stopTimer(timeCommunication); #else + //Note the CPU version has not been tested. Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); Kokkos::parallel_for(nloc, generate_random, Dim>( @@ -687,11 +493,11 @@ int main(int argc, char *argv[]){ minU, maxU)); Kokkos::fence(); - //Ippl::Comm->barrier(); + Ippl::Comm->barrier(); #endif - - msg << "Parareal Penning trap" + msg << "Parareal " + << TestName << endl << "Slice dT: " << dtSlice << endl @@ -700,7 +506,6 @@ int main(int argc, char *argv[]){ << "No. of coarse time steps: " << ntCoarse << endl << "Tolerance: " << tol - //<< " Max. iterations: " << maxIter << " No. of cycles: " << nCycles << endl << "Np= " << Total_particles @@ -711,66 +516,9 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(particleCreation); msg << "particles created and initial conditions assigned " << endl; - - //Copy initial conditions as they are needed later - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R0.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pcoarse->P0.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - ////Get initial guess for ranks other than 0 by propagating the coarse solver - //IpplTimings::startTimer(coarsePropagator); - //if (Ippl::Comm->rank() > 0) { - // Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, Ippl::Comm->rank()*ntCoarse, dtCoarse, tStartMySlice, Bext); - //} - // - //Ippl::Comm->barrier(); - // - //IpplTimings::stopTimer(coarsePropagator); - - //msg << "First Boris PIC done " << endl; - - // - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - ////Run the coarse integrator to get the values at the end of the time slice - //IpplTimings::startTimer(coarsePropagator); - //Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext); - //IpplTimings::stopTimer(coarsePropagator); - //msg << "Second Boris PIC done " << endl; - - ////Kokkos::deep_copy(Pcoarse->EfieldPICprevIter_m.getView(), Pcoarse->EfieldPIC_m.getView()); - - ////The following might not be needed - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pend->R.getView(), Pcoarse->R.getView()); - //Kokkos::deep_copy(Pend->P.getView(), Pcoarse->P.getView()); - //IpplTimings::stopTimer(deepCopy); - - - //msg << "Starting parareal iterations ..." << endl; - //bool isConverged = false; - //bool isPreviousDomainConverged; - //if(Ippl::Comm->rank() == 0) { - // isPreviousDomainConverged = true; - //} - //else { - // isPreviousDomainConverged = false; - //} - - int sign = 1; - //coarseTol = 1e-3; - //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - //Pcoarse->BorisPIF(Pcoarse->Rfine, Pcoarse->Pfine, (rankTime+1)*ntFine, dtFine, 0, 0, 0, - // Bext, rankTime, rankSpace, fine, spaceComm); for (unsigned int nc=0; nc < nCycles; nc++) { - double tStartMySlice; bool sendCriteria, recvCriteria; bool isConverged = false; @@ -796,13 +544,9 @@ int main(int argc, char *argv[]){ tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); msg.setPrintNode(0); } - //Pcoarse->time_m = tStartMySlice; unsigned int it = 0; while (!isConverged) { - //while ((!isPreviousDomainConverged) || (!isConverged)) { - //for (unsigned int it=0; it < maxIter; it++) { - //Run fine integrator in parallel IpplTimings::startTimer(finePropagator); Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, @@ -814,10 +558,6 @@ int main(int argc, char *argv[]){ Pend->R = Pbegin->R - Pcoarse->R; Pend->P = Pbegin->P - Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gk"); - //Pcoarse->dumpParticleData(it+1, Pbegin->R, Pbegin->P, "Fk"); - - IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); @@ -849,32 +589,27 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(deepCopy); IpplTimings::startTimer(coarsePropagator); - //coarseTol = 1e-4;//(double)(std::pow(0.1,std::min((int)(it+2),4))); - //Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); - Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - //Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + } + else { + Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + } IpplTimings::stopTimer(coarsePropagator); Pend->R = Pend->R + Pcoarse->R; Pend->P = Pend->P + Pcoarse->P; - //Pcoarse->dumpParticleData(it+1, Pcoarse->R, Pcoarse->P, "Gkp1"); - PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); IpplTimings::startTimer(computeErrors); - //double localRerror, localPerror; double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); - //double Rerror = computeRL2Error(Pcoarse->Rfine, Pend->R, length, spaceComm); - //double Perror = computePL2Error(Pcoarse->Pfine, Pend->P, spaceComm); - IpplTimings::stopTimer(computeErrors); if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { isConverged = true; } - IpplTimings::startTimer(timeCommunication); if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); @@ -895,26 +630,16 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writeError(Rerror, Perror, it+1); Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); - //Pcoarse->dumpParticleData(it+1, Pend->R, Pend->P, "Parareal"); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); - + it += 1; - //if(isConverged && isPreviousDomainConverged) { - // break; - //} } - - //std::cout << "Before barrier in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; - //Ippl::Comm->barrier(); + MPI_Barrier(MPI_COMM_WORLD); - //msg << "Communication started in cycle: " << nc+1 << endl; - //std::cout << "Communication started in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; if((nCycles > 1) && (nc < (nCycles - 1))) { - IpplTimings::startTimer(timeCommunication); tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); //send, receive criteria and tStartMySlice are reversed at the end of the cycle @@ -936,12 +661,14 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); + IpplTimings::startTimer(timeCommunication); if(recvCriteria) { size_type bufSize = Pbegin->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); buf->resetReadPos(); } + IpplTimings::stopTimer(timeCommunication); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); @@ -950,15 +677,21 @@ int main(int argc, char *argv[]){ Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); IpplTimings::stopTimer(deepCopy); - Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); - //Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + } + else { + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); IpplTimings::startTimer(deepCopy); Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); IpplTimings::stopTimer(deepCopy); - + IpplTimings::startTimer(timeCommunication); if(sendCriteria) { size_type bufSize = Pend->packedSize(nloc); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); @@ -968,14 +701,6 @@ int main(int argc, char *argv[]){ MPI_Wait(&request, MPI_STATUS_IGNORE); } IpplTimings::stopTimer(timeCommunication); - //std::cout << "Communication finished in cycle: " << nc+1 << "for rank: " << Ippl::Comm->rank() << std::endl; - //Ippl::Comm->barrier(); - - //msg << "Communication finished in cycle: " << nc+1 << endl; - //IpplTimings::startTimer(deepCopy); - //Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); - //Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); - //IpplTimings::stopTimer(deepCopy); sign *= -1; } } From fff184da8275a176c9fd2276255cff8731938766 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Sat, 2 Mar 2024 20:24:26 +0100 Subject: [PATCH 112/117] Momentum difference seems to stem from different initial seed only --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 32 +++++++++++-------- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 4 +-- alpine/PinT/BumponTailInstabilityPinT.cpp | 5 +-- alpine/PinT/ChargedParticlesPinT.hpp | 12 ++++--- alpine/PinT/PenningTrapPinT.cpp | 5 +-- 5 files changed, 34 insertions(+), 24 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index bb762d408..7028cf094 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -523,22 +523,26 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double charge = temp; - Vector_t totalMomentum = 0.0; - - Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, Vector_t& valL){ - valL += (-qView(i)) * Pview(i); - }, Kokkos::Sum>(totalMomentum)); - - Vector_t globalMom; + Vector_t totalMomentum = 0.0; + + for(size_t d = 0; d < Dim; ++d) { + double tempD = 0.0; + Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + valL += (-qView(i)) * Pview(i)[d]; + }, Kokkos::Sum(tempD)); + totalMomentum[d] = tempD; + } + + Vector_t globalMom; - double magMomentum = 0.0; - for(size_t d = 0; d < Dim; ++d) { - MPI_Allreduce(&totalMomentum[d], &globalMom[d], 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - magMomentum += globalMom[d] * globalMom[d]; - } + double magMomentum = 0.0; + for(size_t d = 0; d < Dim; ++d) { + MPI_Allreduce(&totalMomentum[d], &globalMom[d], 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + magMomentum += globalMom[d] * globalMom[d]; + } - magMomentum = std::sqrt(magMomentum); + magMomentum = std::sqrt(magMomentum); if (Ippl::Comm->rank() == 0) { std::stringstream fname; diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index 54984352e..fc01f8228 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -308,7 +308,7 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - //P->dumpEnergy(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); double alpha = -0.5 * dt; @@ -387,7 +387,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - //P->dumpEnergy(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 0e405720a..94fd4dd8a 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -45,8 +45,7 @@ // #include "ChargedParticlesPinT.hpp" -#include "StatesBeginSlice.hpp" -#include "StatesEndSlice.hpp" +#include "StatesSlice.hpp" #include #include #include @@ -397,6 +396,8 @@ int main(int argc, char *argv[]){ Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); + Pcoarse->coarsetype_m = argv[19]; + if(Pcoarse->coarsetype_m == "PIC") { Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 31835f9e4..b971bfa33 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -420,10 +420,14 @@ class ChargedParticlesPinT : public ippl::ParticleBase { Vector_t totalMomentum = 0.0; - Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, Vector_t& valL){ - valL += (-qView(i)) * Pview(i); - }, Kokkos::Sum>(totalMomentum)); + for(size_t d = 0; d < Dim; ++d) { + double tempD = 0.0; + Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + valL += (-qView(i)) * Pview(i)[d]; + }, Kokkos::Sum(tempD)); + totalMomentum[d] = tempD; + } Vector_t globalMom; diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index f5026949c..270fba8c2 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -44,8 +44,7 @@ // #include "ChargedParticlesPinT.hpp" -#include "StatesBeginSlice.hpp" -#include "StatesEndSlice.hpp" +#include "StatesSlice.hpp" #include #include #include @@ -352,6 +351,8 @@ int main(int argc, char *argv[]){ Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); Pcoarse->Sk_m.initialize(meshPIF, FLPIF); + Pcoarse->coarsetype_m = argv[19]; + if(Pcoarse->coarsetype_m == "PIC") { Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); From c4104ed9a9bf678389ead4bf61f02ff12500ef9a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 28 Mar 2024 09:49:42 +0100 Subject: [PATCH 113/117] Uncommitted changes committed --- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 4 ++-- alpine/PinT/BumponTailInstabilityPinT.cpp | 2 +- alpine/PinT/ChargedParticlesPinT.hpp | 22 +++++++++++----------- alpine/PinT/LandauDampingPinT.cpp | 2 +- alpine/PinT/PenningTrapPinT.cpp | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index fc01f8228..54984352e 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -308,7 +308,7 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - P->dumpEnergy(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); double alpha = -0.5 * dt; @@ -387,7 +387,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - P->dumpEnergy(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 94fd4dd8a..6147c543c 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -683,7 +683,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index b971bfa33..31feaeb28 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -697,8 +697,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter, int rankTime, int rankSpace, + const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, + const unsigned int& /*iter*/, int /*rankTime*/, int /*rankSpace*/, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -729,8 +729,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - dumpFieldEnergy(nc, iter, rankTime, rankSpace); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpFieldEnergy(nc, iter, rankTime, rankSpace); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - dumpFieldEnergy(nc, iter, rankTime, rankSpace); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpFieldEnergy(nc, iter, rankTime, rankSpace); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } @@ -782,9 +782,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter, const double& Bext, - int rankTime, int rankSpace, + const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, + const unsigned int& /*iter*/, const double& Bext, + int /*rankTime*/, int /*rankSpace*/, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -813,7 +813,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } double alpha = -0.5 * dt; @@ -899,7 +899,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 947a4ed5d..f627ccc7f 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -630,7 +630,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 270fba8c2..a5e90b42f 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -631,7 +631,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); From 7a61baa7edde3a1ba942250a494b8cc79c9e0e38 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 23 Apr 2024 10:42:16 +0200 Subject: [PATCH 114/117] Bug in shape function corrected --- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 2 +- alpine/PinT/BumponTailInstabilityPinT.cpp | 2 +- alpine/PinT/ChargedParticlesPinT.hpp | 24 +-- alpine/PinT/LandauDampingPinT.cpp | 4 +- alpine/PinT/PenningTrapPinT.cpp | 2 +- src/FFT/FFT.hpp | 4 +- test/FFT/TestNUFFT1.cpp | 183 +++++++++--------- test/FFT/TestNUFFT2.cpp | 20 +- 8 files changed, 120 insertions(+), 121 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 7028cf094..959fd119c 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -604,7 +604,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); - double kh = kVec[d] * dx[d]; + double kh = kVec[d] * dx[d] / 2; bool isNotZero = (kh != 0.0); double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); double arg = isNotZero * (Kokkos::sin(kh) * factor) + diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 6147c543c..94fd4dd8a 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -683,7 +683,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 31feaeb28..d94309c60 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -524,7 +524,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); - double kh = kVec[d] * dx[d]; + double kh = kVec[d] * dx[d] / 2; bool isNotZero = (kh != 0.0); double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); double arg = isNotZero * (Kokkos::sin(kh) * factor) + @@ -697,8 +697,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, - const unsigned int& /*iter*/, int /*rankTime*/, int /*rankSpace*/, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, int rankTime, int rankSpace, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -729,8 +729,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - //dumpFieldEnergy(nc, iter, rankTime, rankSpace); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpFieldEnergy(nc, iter, rankTime, rankSpace); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - //dumpFieldEnergy(nc, iter, rankTime, rankSpace); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpFieldEnergy(nc, iter, rankTime, rankSpace); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } @@ -782,9 +782,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, - const unsigned int& /*iter*/, const double& Bext, - int /*rankTime*/, int /*rankSpace*/, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, const double& Bext, + int rankTime, int rankSpace, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -813,7 +813,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } double alpha = -0.5 * dt; @@ -899,7 +899,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index f627ccc7f..99f625eb9 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -302,6 +302,7 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; + //Vector_t kw = {1.0, 1.0, 1.0}; Vector_t alpha = {0.05, 0.05, 0.05}; //Vector_t alpha = {0.5, 0.5, 0.5}; Vector_t rmin(0.0); @@ -436,6 +437,7 @@ int main(int argc, char *argv[]){ if(rankTime == 0) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); + //Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(79 + 100*rankSpace)); Kokkos::parallel_for(nloc, generate_random, Dim>( Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); @@ -630,7 +632,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index a5e90b42f..270fba8c2 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -631,7 +631,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index e33552322..07f36cee9 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -929,7 +929,9 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i) { for(size_t d = 0; d < Dim; ++d) { - tempR[d](i) = (Rview(i)[d] - origin[d]) * (2.0 * pi / Len[d]); + //tempR[d](i) = (Rview(i)[d] - (twopiFactor * 2.0 * pi)) * (2.0 * pi / Len[d]); + tempR[d](i) = Rview(i)[d] * (2.0 * pi / Len[d]); + //tempR[d](i) = Rview(i)[d]; } tempQ(i).x = Qview(i); tempQ(i).y = 0.0; diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index 0e261b035..0f6a6ba3e 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -73,7 +73,7 @@ int main(int argc, char *argv[]) { typedef Bunch bunch_type; - ippl::Vector pt = {512, 512, 512}; + ippl::Vector pt = {16, 16, 16}; ippl::Index I(pt[0]); ippl::Index J(pt[1]); ippl::Index K(pt[2]); @@ -85,22 +85,21 @@ int main(int argc, char *argv[]) { ippl::FieldLayout layout(owned, decomp); + typedef ippl::Vector Vector_t; + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + //Vector_t minU = {0.0, 0.0, 0.0}; + //Vector_t maxU = {25.0, 25.0, 25.0}; + std::array dx = { - 2.0 * pi / double(pt[0]), - 2.0 * pi / double(pt[1]), - 2.0 * pi / double(pt[2]), + (maxU[0] - minU[0]) / double(pt[0]), + (maxU[1] - minU[1]) / double(pt[1]), + (maxU[2] - minU[2]) / double(pt[2]), }; - - //std::array dx = { - // 25.0 / double(pt[0]), - // 25.0 / double(pt[1]), - // 25.0 / double(pt[2]), - //}; - typedef ippl::Vector Vector_t; + Vector_t hx = {dx[0], dx[1], dx[2]}; - Vector_t origin = {-pi, -pi, -pi}; - //Vector_t origin = {0, 0, 0}; + Vector_t origin = {minU[0], minU[1], minU[2]}; ippl::UniformCartesian mesh(owned, hx, origin); playout_type pl(layout, mesh); @@ -111,7 +110,7 @@ int main(int argc, char *argv[]) { using size_type = ippl::detail::size_type; - size_type Np = std::pow(512,3) * 5; + size_type Np = std::pow(16,3); typedef ippl::Field, dim> field_type; @@ -123,7 +122,7 @@ int main(int argc, char *argv[]) { fftParams.add("gpu_method", 1); fftParams.add("gpu_sort", 0); fftParams.add("gpu_kerevalmeth", 1); - fftParams.add("tolerance", 1e-6); + fftParams.add("tolerance", 1e-12); fftParams.add("use_cufinufft_defaults", false); @@ -134,12 +133,6 @@ int main(int argc, char *argv[]) { int type = 1; - Vector_t minU = {-pi, -pi, -pi}; - Vector_t maxU = {pi, pi, pi}; - //Vector_t minU = {0.0, 0.0, 0.0}; - //Vector_t maxU = {25.0, 25.0, 25.0}; - - size_type nloc = Np/Ippl::Comm->size(); bunch.create(nloc); @@ -175,82 +168,82 @@ int main(int argc, char *argv[]) { auto Qview = bunch.Q.getView(); Kokkos::complex imag = {0.0, 1.0}; - //size_t flatN = pt[0] * pt[1] * pt[2]; - //auto fview = field_dft.getView(); + size_t flatN = pt[0] * pt[1] * pt[2]; + auto fview = field_dft.getView(); - //typedef Kokkos::TeamPolicy<> team_policy; - //typedef Kokkos::TeamPolicy<>::member_type member_type; - - //Kokkos::parallel_for("NUDFT type 1", - // team_policy(flatN, Kokkos::AUTO), - // KOKKOS_LAMBDA(const member_type& teamMember) { - // const size_t flatIndex = teamMember.league_rank(); - // - // const int k = (int)(flatIndex / (pt[0] * pt[1])); - // const int flatIndex2D = flatIndex - (k * pt[0] * pt[1]); - // const int i = flatIndex2D % pt[0]; - // const int j = (int)(flatIndex2D / pt[0]); - // - // Kokkos::complex reducedValue = 0.0; - // ippl::Vector iVec = {i, j, k}; - // ippl::VectorkVec; - // for(size_t d = 0; d < 3; ++d) { - // kVec[d] = (2.0 * pi / (maxU[d] - minU[d])) * (iVec[d] - (pt[d] / 2)); - // } - // Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, nloc), - // [=](const size_t idx, Kokkos::complex& innerReduce) - // { - // double arg = 0.0; - // for(size_t d = 0; d < 3; ++d) { - // arg += kVec[d]*Rview(idx)[d]; - // } - // const double& val = Qview(idx); - - // innerReduce += (Kokkos::cos(arg) - // - imag * Kokkos::sin(arg)) * val; - // }, Kokkos::Sum>(reducedValue)); - - // if(teamMember.team_rank() == 0) { - // fview(i+nghost,j+nghost,k+nghost) = reducedValue; - // } - - // }); - // - //typename field_type::HostMirror rhoNUDFT_host = field_dft.getHostMirror(); - //Kokkos::deep_copy(rhoNUDFT_host, field_dft.getView()); - //std::stringstream pname; - //pname << "data/FieldFFT_"; - //pname << Ippl::Comm->rank(); - //pname << ".csv"; - //Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); - //pcsvout.precision(10); - //pcsvout.setf(std::ios::scientific, std::ios::floatfield); - //pcsvout << "rho" << endl; - //for (int i = 0; i< pt[0]; i++) { - // for (int j = 0; j< pt[1]; j++) { - // for (int k = 0; k< pt[2]; k++) { - // pcsvout << field_result(i+nghost,j+nghost, k+nghost) << endl; - // } - // } - //} - //std::stringstream pname2; - //pname2 << "data/FieldDFT_"; - //pname2 << Ippl::Comm->rank(); - //pname2 << ".csv"; - //Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); - //pcsvout2.precision(10); - //pcsvout2.setf(std::ios::scientific, std::ios::floatfield); - //pcsvout2 << "rho" << endl; - //for (int i = 0; i< pt[0]; i++) { - // for (int j = 0; j< pt[1]; j++) { - // for (int k = 0; k< pt[2]; k++) { - // pcsvout2 << rhoNUDFT_host(i+nghost,j+nghost, k+nghost) << endl; - // } - // } - // } - // Ippl::Comm->barrier(); + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + Kokkos::parallel_for("NUDFT type 1", + team_policy(flatN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type& teamMember) { + const size_t flatIndex = teamMember.league_rank(); + + const int k = (int)(flatIndex / (pt[0] * pt[1])); + const int flatIndex2D = flatIndex - (k * pt[0] * pt[1]); + const int i = flatIndex2D % pt[0]; + const int j = (int)(flatIndex2D / pt[0]); + + Kokkos::complex reducedValue = 0.0; + ippl::Vector iVec = {i, j, k}; + ippl::VectorkVec; + for(size_t d = 0; d < 3; ++d) { + kVec[d] = (2.0 * pi / (maxU[d] - minU[d])) * (iVec[d] - (pt[d] / 2)); + } + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, nloc), + [=](const size_t idx, Kokkos::complex& innerReduce) + { + double arg = 0.0; + for(size_t d = 0; d < 3; ++d) { + arg += kVec[d]*Rview(idx)[d]; + } + const double& val = Qview(idx); + + innerReduce += (Kokkos::cos(arg) + - imag * Kokkos::sin(arg)) * val; + }, Kokkos::Sum>(reducedValue)); + + if(teamMember.team_rank() == 0) { + fview(i+nghost,j+nghost,k+nghost) = reducedValue; + } + + }); + + typename field_type::HostMirror rhoNUDFT_host = field_dft.getHostMirror(); + Kokkos::deep_copy(rhoNUDFT_host, field_dft.getView()); + std::stringstream pname; + pname << "data/FieldFFT_"; + pname << Ippl::Comm->rank(); + pname << ".csv"; + Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout.precision(10); + pcsvout.setf(std::ios::scientific, std::ios::floatfield); + pcsvout << "rho" << endl; + for (int i = 0; i< pt[0]; i++) { + for (int j = 0; j< pt[1]; j++) { + for (int k = 0; k< pt[2]; k++) { + pcsvout << field_result(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + std::stringstream pname2; + pname2 << "data/FieldDFT_"; + pname2 << Ippl::Comm->rank(); + pname2 << ".csv"; + Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout2.precision(10); + pcsvout2.setf(std::ios::scientific, std::ios::floatfield); + pcsvout2 << "rho" << endl; + for (int i = 0; i< pt[0]; i++) { + for (int j = 0; j< pt[1]; j++) { + for (int k = 0; k< pt[2]; k++) { + pcsvout2 << rhoNUDFT_host(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + Ippl::Comm->barrier(); @@ -259,7 +252,7 @@ int main(int argc, char *argv[]) { double arg = 0.0; for(size_t d = 0; d < dim; ++d) { - arg += kVec[d]*Rview(idx)[d]; + arg += (2 * pi / (hx[d] * pt[d])) * kVec[d] * Rview(idx)[d]; } valL += (Kokkos::cos(arg) diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp index f55351db7..9d47a9607 100644 --- a/test/FFT/TestNUFFT2.cpp +++ b/test/FFT/TestNUFFT2.cpp @@ -106,17 +106,21 @@ int main(int argc, char *argv[]) { ippl::FieldLayout layout(owned, decomp); + typedef ippl::Vector Vector_t; + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + //Vector_t minU = {0.0, 0.0, 0.0}; + //Vector_t maxU = {25.0, 25.0, 25.0}; + std::array dx = { - 2.0 * pi / double(pt[0]), - 2.0 * pi / double(pt[1]), - 2.0 * pi / double(pt[2]), + (maxU[0] - minU[0]) / double(pt[0]), + (maxU[1] - minU[1]) / double(pt[1]), + (maxU[2] - minU[2]) / double(pt[2]), }; - - typedef ippl::Vector Vector_t; //typedef ippl::Vector, 3> CxVector_t; Vector_t hx = {dx[0], dx[1], dx[2]}; - Vector_t origin = {-pi, -pi, -pi}; + Vector_t origin = {minU[0], minU[1], minU[2]}; ippl::UniformCartesian mesh(owned, hx, origin); playout_type pl(layout, mesh); @@ -148,8 +152,6 @@ int main(int argc, char *argv[]) { int type = 2; - Vector_t minU = {-pi, -pi, -pi}; - Vector_t maxU = {pi, pi, pi}; size_type nloc = Np/Ippl::Comm->size(); @@ -203,7 +205,7 @@ int main(int argc, char *argv[]) { ippl::Vector iVec = {i, j, k}; double arg = 0.0; for(size_t d = 0; d < dim; ++d) { - arg += (iVec[d] - (pt[d]/2)) * Rview(idx)[d]; + arg += (2 * pi / (hx[d] * pt[d])) * (iVec[d] - (pt[d]/2)) * Rview(idx)[d]; } valL += (Kokkos::cos(arg) From 3a14e8a6c6d6f1e1f7c0f5c1f6efeec3eef7af06 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 26 Apr 2024 11:10:37 +0200 Subject: [PATCH 115/117] Added a comment --- alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp | 2 ++ alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 959fd119c..486709163 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -480,6 +480,8 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Nhalf[d] = domainHalf[d].length(); } + //Heffte needs FFTshifted field whereas the field from cuFINUFFT + //is not shifted. Hence, here we do the shift. Kokkos::parallel_for("Transfer complex rho to half domain", mdrange_type({0, 0, 0}, {Nhalf[0], diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index 54984352e..fc01f8228 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -308,7 +308,7 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - //P->dumpEnergy(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); double alpha = -0.5 * dt; @@ -387,7 +387,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - //P->dumpEnergy(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } From 2bae52889a3f6982be242721ea282c5bc969b89b Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 30 Apr 2024 08:14:55 +0200 Subject: [PATCH 116/117] Variable name changed related to the bugfix and dumping commented for speedup benchmarks --- .../BumponTailInstabilityPIF.cpp | 8 ++--- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 8 ++--- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 8 ++--- alpine/ElectrostaticPIF/PenningTrapPIF.cpp | 4 +-- alpine/PinT/BumponTailInstabilityPinT.cpp | 2 +- alpine/PinT/ChargedParticlesPinT.hpp | 30 +++++++++---------- alpine/PinT/LandauDampingPinT.cpp | 2 +- alpine/PinT/PenningTrapPinT.cpp | 2 +- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp index 3ef320c57..b716a4ab9 100644 --- a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -345,8 +345,8 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - P->dumpBumponTail(); - P->dumpEnergy(); + //P->dumpBumponTail(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); // begin main timestep loop @@ -386,8 +386,8 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - P->dumpBumponTail(); - P->dumpEnergy(); + //P->dumpBumponTail(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 486709163..39129695b 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -606,10 +606,10 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); - double kh = kVec[d] * dx[d] / 2; - bool isNotZero = (kh != 0.0); - double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); - double arg = isNotZero * (Kokkos::sin(kh) * factor) + + double khbytwo = kVec[d] * dx[d] / 2; + bool isNotZero = (khbytwo != 0.0); + double factor = (1.0 / (khbytwo + ((!isNotZero) * 1.0))); + double arg = isNotZero * (Kokkos::sin(khbytwo) * factor) + (!isNotZero) * 1.0; //Fourier transform of CIC Sk *= std::pow(arg, order); diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index fe5e8b68c..d360eff59 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -307,8 +307,8 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); - P->dumpEnergy(); + //P->dumpLandau(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); // begin main timestep loop @@ -348,8 +348,8 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); - P->dumpEnergy(); + //P->dumpLandau(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp index fc01f8228..54984352e 100644 --- a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -308,7 +308,7 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - P->dumpEnergy(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); double alpha = -0.5 * dt; @@ -387,7 +387,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - P->dumpEnergy(); + //P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 94fd4dd8a..6147c543c 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -683,7 +683,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index d94309c60..538e8f56d 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -524,10 +524,10 @@ class ChargedParticlesPinT : public ippl::ParticleBase { double Sk = 1.0; for(size_t d = 0; d < Dim; ++d) { kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); - double kh = kVec[d] * dx[d] / 2; - bool isNotZero = (kh != 0.0); - double factor = (1.0 / (kh + ((!isNotZero) * 1.0))); - double arg = isNotZero * (Kokkos::sin(kh) * factor) + + double khbytwo = kVec[d] * dx[d] / 2; + bool isNotZero = (khbytwo != 0.0); + double factor = (1.0 / (khbytwo + ((!isNotZero) * 1.0))); + double arg = isNotZero * (Kokkos::sin(khbytwo) * factor) + (!isNotZero) * 1.0; //Fourier transform of CIC Sk *= std::pow(arg, order); @@ -697,8 +697,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter, int rankTime, int rankSpace, + const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, + const unsigned int& /*iter*/, int /*rankTime*/, int /*rankSpace*/, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -729,8 +729,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - dumpFieldEnergy(nc, iter, rankTime, rankSpace); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpFieldEnergy(nc, iter, rankTime, rankSpace); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - dumpFieldEnergy(nc, iter, rankTime, rankSpace); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpFieldEnergy(nc, iter, rankTime, rankSpace); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } @@ -782,9 +782,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& nc, - const unsigned int& iter, const double& Bext, - int rankTime, int rankSpace, + const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, + const unsigned int& /*iter*/, const double& Bext, + int /*rankTime*/, int /*rankSpace*/, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -813,7 +813,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } double alpha = -0.5 * dt; @@ -899,7 +899,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 99f625eb9..6f7959ed9 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -632,7 +632,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index 270fba8c2..a5e90b42f 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -631,7 +631,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); From bf5582b22801aa414c0cb5791e743b5ad898871a Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Wed, 26 Jun 2024 07:38:37 +0200 Subject: [PATCH 117/117] State prior to submission with file writings enabled --- .../BumponTailInstabilityPIF.cpp | 8 +++---- .../ElectrostaticPIF/ChargedParticlesPIF.hpp | 18 +++++++-------- alpine/ElectrostaticPIF/LandauDampingPIF.cpp | 8 +++---- alpine/PinT/BumponTailInstabilityPinT.cpp | 2 +- alpine/PinT/ChargedParticlesPinT.hpp | 22 +++++++++---------- alpine/PinT/LandauDampingPinT.cpp | 2 +- alpine/PinT/PenningTrapPinT.cpp | 2 +- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp index b716a4ab9..3ef320c57 100644 --- a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -345,8 +345,8 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - //P->dumpBumponTail(); - //P->dumpEnergy(); + P->dumpBumponTail(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); // begin main timestep loop @@ -386,8 +386,8 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - //P->dumpBumponTail(); - //P->dumpEnergy(); + P->dumpBumponTail(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp index 39129695b..ecc061798 100644 --- a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -303,7 +303,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { double fieldEnergy = 0.0; - double ExAmp = 0.0; + double EzAmp = 0.0; auto rhoview = rho_m.getView(); const int nghost = rho_m.getNghost(); @@ -324,7 +324,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Kokkos::complex imag = {0.0, 1.0}; double pi = std::acos(-1.0); - Kokkos::parallel_reduce("Ex energy and Max", + Kokkos::parallel_reduce("Ez energy and Max", mdrange_type({0, 0, 0}, {N[0], N[1], @@ -340,15 +340,15 @@ class ChargedParticlesPIF : public ippl::ParticleBase { Vector kVec; double Dr = 0.0; for(size_t d = 0; d < Dim; ++d) { - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); Dr += kVec[d] * kVec[d]; } Kokkos::complex Ek = {0.0, 0.0}; - if(Dr != 0.0) { - Ek = -(imag * kVec[2] * rhoview(i+nghost,j+nghost,k+nghost) / Dr); - } + auto rho = rhoview(i+nghost,j+nghost,k+nghost); + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[2] * rho * factor); double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); tlSum += myVal; @@ -357,7 +357,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { if(myValMax > tlMax) tlMax = myValMax; - }, Kokkos::Sum(fieldEnergy), Kokkos::Max(ExAmp)); + }, Kokkos::Sum(fieldEnergy), Kokkos::Max(EzAmp)); Kokkos::fence(); @@ -381,7 +381,7 @@ class ChargedParticlesPIF : public ippl::ParticleBase { csvout << time_m << " " << fieldEnergy << " " - << ExAmp << endl; + << EzAmp << endl; } diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp index d360eff59..e07e249fc 100644 --- a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -307,8 +307,8 @@ int main(int argc, char *argv[]){ P->gather(); IpplTimings::startTimer(dumpDataTimer); - //P->dumpLandau(); - //P->dumpEnergy(); + P->dumpBumponTail(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); // begin main timestep loop @@ -348,8 +348,8 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - //P->dumpLandau(); - //P->dumpEnergy(); + P->dumpBumponTail(); + P->dumpEnergy(); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; } diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp index 6147c543c..94fd4dd8a 100644 --- a/alpine/PinT/BumponTailInstabilityPinT.cpp +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -683,7 +683,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp index 538e8f56d..7790c8f26 100644 --- a/alpine/PinT/ChargedParticlesPinT.hpp +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -697,8 +697,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void LeapFrogPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, - const unsigned int& /*iter*/, int /*rankTime*/, int /*rankSpace*/, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, int rankTime, int rankSpace, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -729,8 +729,8 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - //dumpFieldEnergy(nc, iter, rankTime, rankSpace); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpFieldEnergy(nc, iter, rankTime, rankSpace); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } for (unsigned int it=0; it { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - //dumpFieldEnergy(nc, iter, rankTime, rankSpace); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpFieldEnergy(nc, iter, rankTime, rankSpace); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } @@ -782,9 +782,9 @@ class ChargedParticlesPinT : public ippl::ParticleBase { void BorisPIF(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int& nt, - const double& dt, const double& tStartMySlice, const unsigned& /*nc*/, - const unsigned int& /*iter*/, const double& Bext, - int /*rankTime*/, int /*rankSpace*/, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, const double& Bext, + int rankTime, int rankSpace, const std::string& propagator, MPI_Comm& spaceComm) { static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); @@ -813,7 +813,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if((time_m == 0.0) && (propagator == "Fine")) { IpplTimings::startTimer(dumpData); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } double alpha = -0.5 * dt; @@ -899,7 +899,7 @@ class ChargedParticlesPinT : public ippl::ParticleBase { if(propagator == "Fine") { IpplTimings::startTimer(dumpData); - //dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); IpplTimings::stopTimer(dumpData); } } diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp index 6f7959ed9..99f625eb9 100644 --- a/alpine/PinT/LandauDampingPinT.cpp +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -632,7 +632,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm); diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp index a5e90b42f..270fba8c2 100644 --- a/alpine/PinT/PenningTrapPinT.cpp +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -631,7 +631,7 @@ int main(int argc, char *argv[]){ << endl; IpplTimings::startTimer(dumpData); - //Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); IpplTimings::stopTimer(dumpData); MPI_Barrier(spaceComm);