From 6c5c1869ed7c1cdab60f901e72d860c4d53b4d41 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 22 Apr 2021 15:32:09 -0400 Subject: [PATCH 01/24] Adding set num threads --- time_integrators.f90 | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/time_integrators.f90 b/time_integrators.f90 index 3f1b908..8460bad 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -37,8 +37,11 @@ subroutine imex_rk(vtk_print, save_nusselt) integer :: nthreads, myid integer, EXTERNAL :: OMP_GET_THREAD_NUM, OMP_GET_NUM_THREADS real(dp), EXTERNAL :: OMP_GET_WTIME +EXTERNAL :: OMP_SET_NUM_THREADS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +call OMP_SET_NUM_THREADS(8) + if (present(vtk_print)) then wvtk = .true. nprint = vtk_print @@ -97,7 +100,7 @@ subroutine imex_rk(vtk_print, save_nusselt) finish = OMP_GET_WTIME() write(*,*) " - calc_explicit(1) timing: ", finish-start, "(s)" start = OMP_GET_WTIME() - !$OMP PARALLEL DO num_threads(16) private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) do it = 1,Nx ! kx loop ! Compute phi1 and T1 call calc_vari_mod(tmp_phi, tmp_T, acoeffs(1,1), 1,& @@ -141,7 +144,7 @@ subroutine imex_rk(vtk_print, save_nusselt) ! STAGE 2 :: !::::::::::: start = OMP_GET_WTIME() - !$OMP PARALLEL DO num_threads(16) private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) do it = 1,Nx ! kx loop ! Compute phi2 and T2 call calc_vari_mod(tmp_phi, tmp_T, acoeffs(2,2), 2,& @@ -185,7 +188,7 @@ subroutine imex_rk(vtk_print, save_nusselt) ! STAGE 3 :: !::::::::::: start = OMP_GET_WTIME() - !$OMP PARALLEL DO num_threads(16) private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) do it = 1,Nx ! kx loop ! Compute phi3 and T3 call calc_vari_mod(tmp_phi, tmp_T, acoeffs(3,3), 3,& @@ -240,7 +243,7 @@ subroutine imex_rk(vtk_print, save_nusselt) & b(3)*(K3_T(2:Ny-1,:) + K4hat_T(2:Ny-1,:))) ! Get ux and uy - !$OMP PARALLEL DO num_threads(16) private(tmp_uy, it) schedule(dynamic) + !$OMP PARALLEL DO private(tmp_uy, it) schedule(dynamic) do it = 1,Nx ! Solve for v call calc_vi_mod(tmp_uy, phi(:,it), kx(it)) @@ -607,25 +610,25 @@ subroutine calc_explicit(stage) start = OMP_GET_WTIME() select case(stage) case (1) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K1hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do !$OMP END PARALLEL DO case (2) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K2hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do !$OMP END PARALLEL DO case (3) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K3hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do !$OMP END PARALLEL DO case (4) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K4hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do @@ -635,7 +638,7 @@ subroutine calc_explicit(stage) write(*,*) " - - l1 timing: ", finish-start, "(s)" start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) schedule(dynamic) +!$OMP PARALLEL DO schedule(dynamic) do i=1,Nx ! Compute dx(T) in Fourier space nlT (:,i) = kx(i)*Ti(:,i) @@ -650,7 +653,7 @@ subroutine calc_explicit(stage) nlT = CI*nlT start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) private(tnlT, tnlphi, tT, tux, tuy, tphi) schedule(dynamic) +!$OMP PARALLEL DO private(tnlT, tnlphi, tT, tux, tuy, tphi) schedule(dynamic) do j = 1,Ny ! Bring everything to physical space tnlT = nlT(j,:) @@ -678,7 +681,7 @@ subroutine calc_explicit(stage) ! Calculate nonlinear term start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) private(tmp_T) schedule(dynamic) +!$OMP PARALLEL DO private(tmp_T) schedule(dynamic) do i = 1,Nx ! Temperature tmp_T = Ti(:,i) @@ -692,7 +695,7 @@ subroutine calc_explicit(stage) ! Bring nonlinear terms back to Fourier space start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) private(tnlT, tnlphi) schedule(dynamic) +!$OMP PARALLEL DO private(tnlT, tnlphi) schedule(dynamic) do j = 1,Ny tnlT = nlT(j,:) tnlphi = nlphi(j,:) @@ -718,7 +721,7 @@ subroutine calc_explicit(stage) start = OMP_GET_WTIME() select case (stage) case (1) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K1hat_phi(:,i) = K1hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K1hat_phi(:,i) = K1hat_phi(:,i) - CI*kx(i)*nlphi(:,i) @@ -726,7 +729,7 @@ subroutine calc_explicit(stage) !$OMP END PARALLEL DO K1hat_T = -nlT case (2) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K2hat_phi(:,i) = K2hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K2hat_phi(:,i) = K2hat_phi(:,i) - CI*kx(i)*nlphi(:,i) @@ -734,7 +737,7 @@ subroutine calc_explicit(stage) !$OMP END PARALLEL DO K2hat_T = -nlT case (3) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K3hat_phi(:,i) = K3hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K3hat_phi(:,i) = K3hat_phi(:,i) - CI*kx(i)*nlphi(:,i) @@ -742,7 +745,7 @@ subroutine calc_explicit(stage) !$OMP END PARALLEL DO K3hat_T = -nlT case (4) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K4hat_phi(:,i) = K4hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K4hat_phi(:,i) = K4hat_phi(:,i) - CI*kx(i)*nlphi(:,i) From dfe7fc791bebd3ce0b916cd69fa3637174dac7c0 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 22 Apr 2021 17:44:30 -0400 Subject: [PATCH 02/24] Starting MPI implementation --- Makefile | 9 +- input.data | 4 +- time_integrators.f90 | 2 +- time_loop_MPI.f90 | 197 +++++++++++++++++++++++++++++++++++++++++++ write_pack.f90 | 33 +++++--- 5 files changed, 225 insertions(+), 20 deletions(-) create mode 100644 time_loop_MPI.f90 diff --git a/Makefile b/Makefile index 684c2f9..d9d79e7 100755 --- a/Makefile +++ b/Makefile @@ -1,12 +1,13 @@ -FC=gfortran +FC=mpif90 FFLAGS= -c -O3 LIBFLAGS2 = -L/usr/local/fftw/lib LDFLAGS = -I/usr/local/fftw/include -#MAIN = Ra_loop -#MAIN = Ra_loop_no_opt -MAIN = time_loop +# MAIN = Ra_loop +# MAIN = Ra_loop_no_opt +# MAIN = time_loop +MAIN = time_loop_MPI OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o PROGRAMS = $(MAIN).exe diff --git a/input.data b/input.data index ca77d7c..893b815 100755 --- a/input.data +++ b/input.data @@ -2,6 +2,6 @@ 5.0e+03, 1, 1.1 5.0, 0.1 -1.0, 1.0 -1600, 1275, 1 -0, 0, 0 +256, 204, 1 0, 0, 0 +1, 0, 0 diff --git a/time_integrators.f90 b/time_integrators.f90 index 8460bad..846696f 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -40,7 +40,7 @@ subroutine imex_rk(vtk_print, save_nusselt) EXTERNAL :: OMP_SET_NUM_THREADS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -call OMP_SET_NUM_THREADS(8) +call OMP_SET_NUM_THREADS(1) if (present(vtk_print)) then wvtk = .true. diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 new file mode 100644 index 0000000..1c828ca --- /dev/null +++ b/time_loop_MPI.f90 @@ -0,0 +1,197 @@ +program time_loop + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +use global +use strings +use write_pack +use interpolation_pack +use allocate_vars +use statistics +use mesh_pack +use time_integrators + +implicit none +include 'mpif.h' + +integer :: ntokens, ios +integer :: Nxc, Nyc, Nzc +integer :: nRa, ii, jj +integer :: refine_flag_x, refine_flag_y, refine_flag_z +integer :: vtk_flag, rstrt_flag, opt_flag +logical :: wvtk, save_restart, calc_opt +logical :: refine_x, refine_y, refine_z, refine_xy +logical :: fTexist, fuyexist +logical :: ex_Tptrb +real(dp) :: temp +real(dp) :: dxc +real(dp) :: Ra, dRa, Nu, dalpha +real(dp), allocatable, dimension(:) :: xpc, ypc, zpc +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:,:) :: uyc, Tc +character(10) :: citer +character(9) :: fiter +character(80) :: line +character(80) :: tokens(80) + +! MPI specific variables +integer :: mpierror, num_procs +integer :: proc_id +character(4) :: proc_id_str +integer status(MPI_STATUS_SIZE) + + +! Initialize MPI. +call MPI_Init(mpierror) +call MPI_Comm_size(MPI_COMM_WORLD, num_procs, mpierror) +call MPI_Comm_rank(MPI_COMM_WORLD, proc_id, mpierror) + +! Read file. +open(2,file="input.data", status="old") +do ii = 1,7 +read(2,'(A)') line +call parse(line,' ,', tokens, ntokens) +if (ntokens > 0) then + select case(ii) + case (1) + call value(tokens(1), Pr, ios) + call value(tokens(2), alpha, ios) + call value(tokens(3), dalpha, ios) + case (2) + call value(tokens(1), Ra, ios) + call value(tokens(2), nRa, ios) + call value(tokens(3), dRa, ios) + case (3) + call value(tokens(1), t_final, ios) + call value(tokens(2), dt, ios) + dt_init = dt + case (4) + call value(tokens(1), ybot, ios) + call value(tokens(2), ytop, ios) + case (5) + call value(tokens(1), Nx, ios) + call value(tokens(2), Ny, ios) + call value(tokens(3), Nz, ios) + case (6) + call value(tokens(1), refine_flag_x, ios) + call value(tokens(2), refine_flag_y, ios) + call value(tokens(3), refine_flag_z, ios) + if (refine_flag_x == 1) then + if (refine_flag_y == 1) then + refine_xy = .true. + refine_x = .false. + refine_y = .false. + else + refine_x = .true. + refine_y = .false. + refine_xy = .false. + end if + else if (refine_flag_y == 1) then + refine_x = .false. + refine_y = .true. + refine_xy = .false. + else if (refine_flag_z == 1) then + write(*,*) "Refinement in z-direction not available yet!" + stop + else + refine_x = .false. + refine_y = .false. + refine_z = .false. + refine_xy = .false. + end if + case (7) + call value(tokens(1), vtk_flag, ios) + call value(tokens(2), rstrt_flag, ios) + call value(tokens(2), opt_flag, ios) + if (vtk_flag == 1) then + wvtk = .true. + else + wvtk = .false. + end if + if (rstrt_flag == 1) then + save_restart = .true. + else + save_restart = .false. + end if + if (opt_flag == 1) then + calc_opt = .true. + else + calc_opt = .false. + end if + end select +end if +end do +close(unit=2) + +! Create FFT plans +planuy = fftw_plan_dft_1d(Nx,tuy,tuy, FFTW_FORWARD,FFTW_ESTIMATE) +iplanuy = fftw_plan_dft_1d(Nx,tuy,tuy, FFTW_BACKWARD,FFTW_ESTIMATE) + +planux = fftw_plan_dft_1d(Nx,tux,tux, FFTW_FORWARD,FFTW_ESTIMATE) +iplanux = fftw_plan_dft_1d(Nx,tux,tux, FFTW_BACKWARD,FFTW_ESTIMATE) + +planphi = fftw_plan_dft_1d(Nx,tphi,tphi, FFTW_FORWARD,FFTW_ESTIMATE) +iplanphi = fftw_plan_dft_1d(Nx,tphi,tphi, FFTW_BACKWARD,FFTW_ESTIMATE) + +planT = fftw_plan_dft_1d(Nx,tT,tT, FFTW_FORWARD,FFTW_ESTIMATE) +iplanT = fftw_plan_dft_1d(Nx,tT,tT, FFTW_BACKWARD,FFTW_ESTIMATE) + +plannlT = fftw_plan_dft_1d(Nx,tnlT,tnlT, FFTW_FORWARD,FFTW_ESTIMATE) +iplannlT = fftw_plan_dft_1d(Nx,tnlT,tnlT, FFTW_BACKWARD,FFTW_ESTIMATE) + +plannlphi = fftw_plan_dft_1d(Nx,tnlphi,tnlphi, FFTW_FORWARD,FFTW_ESTIMATE) +iplannlphi = fftw_plan_dft_1d(Nx,tnlphi,tnlphi, FFTW_BACKWARD,FFTW_ESTIMATE) + +call global_params +call global_allocations + +xR = pi / alpha +xL = -pi / alpha +Lx = xR - xL + +if (Nx == 1) then + dx = 1.0_dp +else + dx = Lx / (real(Nx,kind=dp)) +end if + +if (proc_id == 0) then + write(*,*) "Ny = ", Ny, " divided among ", num_procs, " processors -> ", & + Ny / num_procs, " rows per processor." +end if + +Ny = Ny / 4 + +call cosine_mesh(xp,yp,zp, Nx,Ny,Nz) ! get coordinates +call dxdydz(dynu, xp,yp,zp) ! get mesh spacing for nonuniform grids +call y_mesh_params ! get metric coefficients for nonuniform grids +dymin = minval(dynu) +dxmin = sqrt(dx**2.0_dp + dymin**2.0_dp) + +! Fourier modes +do ii = 1,Nx/2 + kx_modes(ii) = real(ii,kind=dp) - 1.0_dp +end do +do ii = Nx/2+1, Nx + kx_modes(ii) = real(ii-Nx,kind=dp) - 1.0_dp +end do +kx = alpha*kx_modes + +! Write initial field to vtk +if (wvtk) then + write(proc_id_str, "(I3.3)") proc_id + write(*,*) proc_id_str + call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space + end if + +write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." + + + + + +call MPI_Finalize(mpierror) + +end program time_loop + + \ No newline at end of file diff --git a/write_pack.f90 b/write_pack.f90 index 08f53d4..7d7b857 100755 --- a/write_pack.f90 +++ b/write_pack.f90 @@ -88,7 +88,7 @@ end subroutine write_mat_cmplx !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine write_vtk_structured_grid(iter) +subroutine write_vtk_structured_grid(iter, process_id) use global @@ -99,13 +99,17 @@ subroutine write_vtk_structured_grid(iter) character(9) :: fiter integer :: n integer :: ii, jj, kk +character(3), intent(in), optional :: process_id write(citer, "(I10)") 1000000000 + iter fiter = citer(2:10) -n = (Nx+1)*Ny*Nz - -open(unit=2, file=vtkloc//vtkname//fiter//gtype, action="write", status="replace") +n = (Nx+1)*Ny*Nx +if (PRESENT(process_id)) then + open(unit=2, file=vtkloc//vtkname//"_P"//process_id//"_"//fiter//gtype, action="write", status="replace") +else + open(unit=2, file=vtkloc//vtkname//fiter//gtype, action="write", status="replace") +endif write(2, "(a)") "# vtk DataFile Version 3.1" write(2, "(a)") "Thermal Convection Data" write(2, "(a)") "ASCII" @@ -180,18 +184,22 @@ subroutine write_vtk_structured_grid(iter) end subroutine write_vtk_structured_grid !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine write_to_vtk(step, physical) +subroutine write_to_vtk(step, physical, process_id) -integer, intent(in) :: step -logical, intent(in) :: physical -integer :: j +integer, intent(in) :: step +logical, intent(in) :: physical +character(3), intent(in), optional :: process_id +integer :: j -if (physical) then - - call write_vtk_structured_grid(step) +if (physical) then + if (PRESENT(process_id)) then + write(*,*) "writing file with ", process_id + call write_vtk_structured_grid(step, process_id) + else + call write_vtk_structured_grid(step) + endif else - do j = 1,Ny ! Bring everything to physical space tT = T(j,:) @@ -220,7 +228,6 @@ subroutine write_to_vtk(step, physical) T = T / real(Nx, kind=dp) ux = ux / real(Nx, kind=dp) uy = uy / real(Nx, kind=dp) - end if end subroutine write_to_vtk From 769e8cbc5ab8d71f09f8a2349170e4548ab7e661 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 22 Apr 2021 21:33:02 -0400 Subject: [PATCH 03/24] Adding multi-node vtk output --- time_loop_MPI.f90 | 20 +++++++++++++------- write_pack.f90 | 1 - 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 index 1c828ca..ee40286 100644 --- a/time_loop_MPI.f90 +++ b/time_loop_MPI.f90 @@ -38,6 +38,7 @@ program time_loop integer :: mpierror, num_procs integer :: proc_id character(4) :: proc_id_str +real(dp) :: mpi_spacing_y integer status(MPI_STATUS_SIZE) @@ -123,6 +124,18 @@ program time_loop end do close(unit=2) +! Print MPI division. +if (proc_id == 0) then + write(*,*) "Ny = ", Ny, " divided among ", num_procs, " processors -> ", & + Ny / num_procs, " rows per processor." +end if + +! Initialize MPI variables. +Ny = Ny / 4 +mpi_spacing_y = (ytop - ybot) / num_procs +ybot = ybot + proc_id * mpi_spacing_y +ytop = ybot + mpi_spacing_y + ! Create FFT plans planuy = fftw_plan_dft_1d(Nx,tuy,tuy, FFTW_FORWARD,FFTW_ESTIMATE) iplanuy = fftw_plan_dft_1d(Nx,tuy,tuy, FFTW_BACKWARD,FFTW_ESTIMATE) @@ -155,12 +168,6 @@ program time_loop dx = Lx / (real(Nx,kind=dp)) end if -if (proc_id == 0) then - write(*,*) "Ny = ", Ny, " divided among ", num_procs, " processors -> ", & - Ny / num_procs, " rows per processor." -end if - -Ny = Ny / 4 call cosine_mesh(xp,yp,zp, Nx,Ny,Nz) ! get coordinates call dxdydz(dynu, xp,yp,zp) ! get mesh spacing for nonuniform grids @@ -180,7 +187,6 @@ program time_loop ! Write initial field to vtk if (wvtk) then write(proc_id_str, "(I3.3)") proc_id - write(*,*) proc_id_str call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space end if diff --git a/write_pack.f90 b/write_pack.f90 index 7d7b857..a450353 100755 --- a/write_pack.f90 +++ b/write_pack.f90 @@ -194,7 +194,6 @@ subroutine write_to_vtk(step, physical, process_id) if (physical) then if (PRESENT(process_id)) then - write(*,*) "writing file with ", process_id call write_vtk_structured_grid(step, process_id) else call write_vtk_structured_grid(step) From 08952774d27ab8ff876a9599c3f8145fe1a75ed1 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 22 Apr 2021 22:14:24 -0400 Subject: [PATCH 04/24] Wrap up initalization --- time_integrators.f90 | 4 +- time_loop_MPI.f90 | 147 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 139 insertions(+), 12 deletions(-) diff --git a/time_integrators.f90 b/time_integrators.f90 index 846696f..a8365c7 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -100,7 +100,7 @@ subroutine imex_rk(vtk_print, save_nusselt) finish = OMP_GET_WTIME() write(*,*) " - calc_explicit(1) timing: ", finish-start, "(s)" start = OMP_GET_WTIME() - !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) do it = 1,Nx ! kx loop ! Compute phi1 and T1 call calc_vari_mod(tmp_phi, tmp_T, acoeffs(1,1), 1,& @@ -144,7 +144,7 @@ subroutine imex_rk(vtk_print, save_nusselt) ! STAGE 2 :: !::::::::::: start = OMP_GET_WTIME() - !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) do it = 1,Nx ! kx loop ! Compute phi2 and T2 call calc_vari_mod(tmp_phi, tmp_T, acoeffs(2,2), 2,& diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 index ee40286..222a632 100644 --- a/time_loop_MPI.f90 +++ b/time_loop_MPI.f90 @@ -124,12 +124,6 @@ program time_loop end do close(unit=2) -! Print MPI division. -if (proc_id == 0) then - write(*,*) "Ny = ", Ny, " divided among ", num_procs, " processors -> ", & - Ny / num_procs, " rows per processor." -end if - ! Initialize MPI variables. Ny = Ny / 4 mpi_spacing_y = (ytop - ybot) / num_procs @@ -168,7 +162,6 @@ program time_loop dx = Lx / (real(Nx,kind=dp)) end if - call cosine_mesh(xp,yp,zp, Nx,Ny,Nz) ! get coordinates call dxdydz(dynu, xp,yp,zp) ! get mesh spacing for nonuniform grids call y_mesh_params ! get metric coefficients for nonuniform grids @@ -188,16 +181,150 @@ program time_loop if (wvtk) then write(proc_id_str, "(I3.3)") proc_id call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space - end if +end if -write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." +! Initialize fields. +call init_fields(ex_Tptrb, Ra) +call init_to_fourier(ex_Tptrb) +! Get nu0 and kappa0 +call global_params_Ra(Ra) +if (proc_id == 0) then + write(*,'(A70)') ' ' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') ' TWO-DIMENSIONAL THERMAL CONVECTION |' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,47X,A)') 'COMPUTATION SIZE:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(20X,A23,I5,21X,A)') 'Nx = ', Nx, '|' + write(*,'(20X,A23,I5,21X,A)') 'Ny = ', Ny, '|' + write(*,'(20X,A23,I5,21X,A)') 'Nz = ', Nz, '|' + write(*,'(20X,A23,I5,21X,A)') 'Number of time steps = ', nt, '|' + write(*,'(20X,A23,I5,21X,A)') 'Number of Fourier modes = ', Nf, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,45X,A)') 'PROBLEM PARAMETERS:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Prandtl number (Pr) = ', Pr, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Rayleigh number (Ra) = ', Ra, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Reynolds number (Re) = ', sqrt(Ra/(16.0_dp*Pr)), '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,42X,A)') 'PHYSICAL PROBLEM SIZE:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'alpha = ', alpha, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Left coordinate of box = ', xL, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Right coordinate of box = ', xR, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of bottom wall = ', ybot, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of top wall = ', ytop, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Box width = ', Lx, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Box height = ', Ly, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Aspect Ratio = ', Lx/Ly, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Integration time (T) = ', t_final, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Time step size (Delta t) = ', dt, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') ' ' + + flush(6) + ! Print MPI division. + write(*,*) "Ny = ", Ny*num_procs, " divided among ", num_procs, " processors -> ", & + Ny, " rows per processor." +end if +call MPI_BARRIER(MPI_COMM_WORLD, mpierror) +write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." call MPI_Finalize(mpierror) end program time_loop - \ No newline at end of file +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine init_fields(ex_Tptrb,Ra) + +use global + +implicit none + +logical, intent(out) :: ex_Tptrb +real(dp), intent(in) :: Ra +integer :: ii + +! Initialize fields. +if (Ra < 1710.0_dp) then + ex_Tptrb = .true. + do ii = 1,Nx + Tptrb(:,ii) = 0.5_dp*2.0_dp*cos(alpha*xp(ii))*cos(pi*yp/2.0_dp) + T(:,ii) = -yp + Tptrb(:,ii) + end do +else + ex_Tptrb = .false. + do ii = 1,Nx + T(:,ii) = -yp + 0.5_dp*2.0_dp*cos(alpha*xp(ii))*cos(pi*yp/2.0_dp) + end do +end if + +end subroutine init_fields + +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine init_to_fourier(ex_Tptrb) + +use global + +implicit none + +logical, intent(in) :: ex_Tptrb +integer :: ii, jj + +! Bring temperature and velocity to Fourier space. +do jj = 1,Ny + tT = T(jj,:) + tuy = uy(jj,:) + call fftw_execute_dft(planT, tT, tT) + call fftw_execute_dft(planuy, tuy, tuy) + ! Truncate modes + do ii = 1,Nx + if (abs(kx(ii))/alpha >= Nf/2) then + tT(ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + tuy(ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + end if + end do + T(jj,:) = tT + uy(jj,:) = tuy + ! If temperature perturbation needed. + if (ex_Tptrb) then + tT = Tptrb(jj,:) + call fftw_execute_dft(planT, tT, tT) + ! Truncate modes + do ii = 1,Nx + if (abs(kx(ii))/alpha >= Nf/2) then + tT(ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + end if + end do + Tptrb(jj,:) = tT + end if +end do +T = T / real(Nx, kind=dp) +uy = uy / real(Nx, kind=dp) + +if (ex_Tptrb) then + Tptrb = Tptrb / real(Nx, kind=dp) +end if + +! Calculate phi and ux from uy +do ii = 1,Nx + if (kx(ii) /= 0.0_dp) then + tmp_uy = uy(:,ii) + !ux(:,ii) = -CI*d1y(tmp_uy)/kx(ii) + ux(:,ii) = CI*d1y(tmp_uy)/kx(ii) + else if (kx(ii) == 0.0_dp) then + ux(:,ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + phi(:,ii) = -kx(ii)**2.0_dp*uy(:,ii) + d2y(uy(:,ii)) +end do + +end subroutine init_to_fourier \ No newline at end of file From 62886a80a653b40a8d61f7954ed6de5ebd700236 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Fri, 23 Apr 2021 11:06:33 -0400 Subject: [PATCH 05/24] Vtk experiments --- Makefile | 4 ++-- time_integrators.f90 | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d9d79e7..0861c74 100755 --- a/Makefile +++ b/Makefile @@ -6,8 +6,8 @@ LDFLAGS = -I/usr/local/fftw/include # MAIN = Ra_loop # MAIN = Ra_loop_no_opt -# MAIN = time_loop -MAIN = time_loop_MPI +MAIN = time_loop +#MAIN = time_loop_MPI OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o PROGRAMS = $(MAIN).exe diff --git a/time_integrators.f90 b/time_integrators.f90 index a8365c7..6c49184 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -266,8 +266,6 @@ subroutine imex_rk(vtk_print, save_nusselt) end if !call update_dt - ! Don't write vtk for now. - wvtk = .false. if (wvtk) then if (mod(nti,vtk_print) == 0) then call write_to_vtk(nti, .false.) ! false = Fourier space From 91c30a06ccee937df14bf1b8970cf0a3c6dd43ce Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Fri, 23 Apr 2021 11:13:19 -0400 Subject: [PATCH 06/24] Vtk experiments --- input.data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/input.data b/input.data index 893b815..894b925 100755 --- a/input.data +++ b/input.data @@ -2,6 +2,6 @@ 5.0e+03, 1, 1.1 5.0, 0.1 -1.0, 1.0 -256, 204, 1 +64, 51, 1 0, 0, 0 1, 0, 0 From 5527a75f088cc8fb2e134c7ee1a9e73723e323b8 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Sat, 24 Apr 2021 10:55:56 -0400 Subject: [PATCH 07/24] Initialization complete --- Makefile | 9 +- input.data | 2 +- mesh_pack.f90 | 45 ++++++++ time_integrators.f90 | 230 --------------------------------------- time_integrators_MPI.f90 | 64 +++++++++++ time_loop_MPI.f90 | 9 +- write_pack.f90 | 8 +- 7 files changed, 128 insertions(+), 239 deletions(-) create mode 100644 time_integrators_MPI.f90 diff --git a/Makefile b/Makefile index 0861c74..6db4fb2 100755 --- a/Makefile +++ b/Makefile @@ -6,10 +6,10 @@ LDFLAGS = -I/usr/local/fftw/include # MAIN = Ra_loop # MAIN = Ra_loop_no_opt -MAIN = time_loop -#MAIN = time_loop_MPI +# MAIN = time_loop +MAIN = time_loop_MPI -OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o +OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o time_integrators_MPI.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o PROGRAMS = $(MAIN).exe all: $(PROGRAMS) @@ -53,6 +53,9 @@ statistics.o : statistics.f90 time_integrators.o : time_integrators.f90 $(FC) -fopenmp $(FFLAGS) time_integrators.f90 +time_integrators_MPI.o : time_integrators_MPI.f90 + $(FC) -fopenmp $(FFLAGS) time_integrators_MPI.f90 + jacobians.o : jacobians.f90 $(FC) $(FFLAGS) jacobians.f90 diff --git a/input.data b/input.data index 894b925..cc633a4 100755 --- a/input.data +++ b/input.data @@ -2,6 +2,6 @@ 5.0e+03, 1, 1.1 5.0, 0.1 -1.0, 1.0 -64, 51, 1 +64, 48, 1 0, 0, 0 1, 0, 0 diff --git a/mesh_pack.f90 b/mesh_pack.f90 index b7239b2..034c0cf 100755 --- a/mesh_pack.f90 +++ b/mesh_pack.f90 @@ -42,6 +42,51 @@ subroutine cosine_mesh(xcoor,ycoor,zcoor, numx,numy,numz,dxin) end subroutine cosine_mesh ! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +subroutine cosine_mesh_MPI(xcoor,ycoor,zcoor, numx,numy,numz, proc_id,num_procs,dxin) + +implicit none + +integer, intent(in) :: numx, numy, numz +integer, intent(in) :: proc_id, num_procs +integer :: mpierror +integer :: ii, jj, kk +real(dp) :: c +real(dp), optional, intent(in) :: dxin +real(dp) :: dxj +real(dp), allocatable, dimension(:), intent(out) :: xcoor, ycoor, zcoor +integer :: total_ny, start_ny + +if (present(dxin)) then + dxj = dxin +else + dxj = dx +end if + +allocate(xcoor(numx), stat=alloc_err) +allocate(ycoor(numy), stat=alloc_err) +allocate(zcoor(numz), stat=alloc_err) +call check_alloc_err(alloc_err) + +! Create the grid. +xcoor(1) = xL +zcoor(1) = 0.0_dp +total_ny = numy * num_procs +start_ny = proc_id * numy +do jj = 1,numy + c = (real(jj+start_ny,kind=dp) - 1.0_dp) / (real(total_ny,kind=dp) - 1.0_dp) + ycoor(jj) = -cos(c*pi) +end do +do ii = 2,numx + xcoor(ii) = xcoor(ii-1) + dxj +end do +do kk = 2,numz + zcoor(kk) = 0.0_dp +end do + +end subroutine cosine_mesh_MPI +! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + subroutine dxdydz(dyj, xcoor,ycoor,zcoor) implicit none diff --git a/time_integrators.f90 b/time_integrators.f90 index 6c49184..6536e92 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -54,236 +54,6 @@ subroutine imex_rk(vtk_print, save_nusselt) call write_to_vtk(0, .false.) ! false = Fourier space end if -dt = dt_init - -call init_bc(acoeffs(1,1)) - -time = 0.0_dp - -dtmax = 0.5_dp -dtmin = 1.0e-4_dp - -dt_ramp = 1.1_dp - -dt_old = dt - -nti = 0 - -! Format for writing out single values. -1000 format(E25.16E3) - -! Time integration -do ! while (time < t_final) - start_overall = OMP_GET_WTIME() - - dt_final = t_final - time - - if (dt_final <= dt) then - time = t_final - else - time = time + dt - end if - - write(*,*) "time = ", time, "dt = ", dt - - nti = nti + 1 - - !::::::::::: - ! STAGE 1 :: - !::::::::::: - phii = phi - Ti = T - uxi = ux - uyi = uy - start = OMP_GET_WTIME() - call calc_explicit(1) - finish = OMP_GET_WTIME() - write(*,*) " - calc_explicit(1) timing: ", finish-start, "(s)" - start = OMP_GET_WTIME() - !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) - do it = 1,Nx ! kx loop - ! Compute phi1 and T1 - call calc_vari_mod(tmp_phi, tmp_T, acoeffs(1,1), 1,& - kx(it), phi(2:Ny-1,it),& - K1hat_phi(2:Ny-1,it),K2hat_phi(2:Ny-1,it),K3hat_phi(2:Ny-1,it),& - K1hat_T(2:Ny-1,it),K2hat_T(2:Ny-1,it),K3hat_T(2:Ny-1,it),& - K1_phi(2:Ny-1,it), K2_phi(2:Ny-1,it), K1_T(2:Ny-1,it), K2_T(2:Ny-1,it),& - T(:,it)) - ! Compute v1 from phi1 - call calc_vi_mod(tmp_uy, tmp_phi, kx(it)) - ! BOUNDAY CONDITIONS! - call update_bcs_mod(tmp_phi1,tmp_uy1, tmp_phi,tmp_uy,dyv1_T(it),dyv2_T(it),& - dyv1_B(it),dyv2_B(it),& - V1(:,it),V2(:,it),phi1(:,it),phi2(:,it)) - tmp_phi = tmp_phi1 - tmp_uy = tmp_uy1 - call calc_implicit_mod(tmp_K_phi,tmp_K_T, tmp_phi,tmp_T, kx(it)) - K1_phi(:,it) = tmp_K_phi - K1_T(:,it) = tmp_K_T - ! Compute u1 from v1 - if (kx(it) /= 0.0_dp) then - !uxi(:,it) = -CI*d1y(tmp_uy)/kx(it) - uxi(:,it) = CI*d1y(tmp_uy)/kx(it) - else if (kx(it) == 0.0_dp) then - uxi(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! - end if - phii(:,it) = tmp_phi - Ti (:,it) = tmp_T - uyi (:,it) = tmp_uy - end do - !$OMP END PARALLEL DO - finish = OMP_GET_WTIME() - write(*,*) " - stage 1 mid timing: ", finish-start, "(s)" - ! Compute K2hat - start = OMP_GET_WTIME() - call calc_explicit(2) - finish = OMP_GET_WTIME() - write(*,*) " - calc_explicit(2) timing: ", finish-start, "(s)" - - !::::::::::: - ! STAGE 2 :: - !::::::::::: - start = OMP_GET_WTIME() - !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) - do it = 1,Nx ! kx loop - ! Compute phi2 and T2 - call calc_vari_mod(tmp_phi, tmp_T, acoeffs(2,2), 2,& - kx(it), phi(2:Ny-1,it),& - K1hat_phi(2:Ny-1,it),K2hat_phi(2:Ny-1,it),K3hat_phi(2:Ny-1,it),& - K1hat_T(2:Ny-1,it),K2hat_T(2:Ny-1,it),K3hat_T(2:Ny-1,it),& - K1_phi(2:Ny-1,it), K2_phi(2:Ny-1,it), K1_T(2:Ny-1,it), K2_T(2:Ny-1,it),& - T(:,it)) - ! Compute v1 from phi1 - call calc_vi_mod(tmp_uy, tmp_phi, kx(it)) - ! BOUNDAY CONDITIONS! - call update_bcs_mod(tmp_phi1,tmp_uy1, tmp_phi,tmp_uy,dyv1_T(it),dyv2_T(it),& - dyv1_B(it),dyv2_B(it),& - V1(:,it),V2(:,it),phi1(:,it),phi2(:,it)) - tmp_phi = tmp_phi1 - tmp_uy = tmp_uy1 - ! Compute K2_T and K2_phi - call calc_implicit_mod(tmp_K_phi,tmp_K_T, tmp_phi,tmp_T, kx(it)) - K2_phi(:,it) = tmp_K_phi - K2_T(:,it) = tmp_K_T - ! Compute u1 from v1 - if (kx(it) /= 0.0_dp) then - !uxi(:,it) = -CI*d1y(tmp_uy)/kx(it) - uxi(:,it) = CI*d1y(tmp_uy)/kx(it) - else if (kx(it) == 0.0_dp) then - uxi(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! - end if - phii(:,it) = tmp_phi - Ti (:,it) = tmp_T - uyi (:,it) = tmp_uy - end do - finish = OMP_GET_WTIME() - write(*,*) " - stage 2 mid timing: ", finish-start, "(s)" - ! Compute K3hat - start = OMP_GET_WTIME() - call calc_explicit(3) - finish = OMP_GET_WTIME() - write(*,*) " - calc_explicit(3) timing: ", finish-start, "(s)" - - !::::::::::: - ! STAGE 3 :: - !::::::::::: - start = OMP_GET_WTIME() - !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) - do it = 1,Nx ! kx loop - ! Compute phi3 and T3 - call calc_vari_mod(tmp_phi, tmp_T, acoeffs(3,3), 3,& - kx(it), phi(2:Ny-1,it),& - K1hat_phi(2:Ny-1,it),K2hat_phi(2:Ny-1,it),K3hat_phi(2:Ny-1,it),& - K1hat_T(2:Ny-1,it),K2hat_T(2:Ny-1,it),K3hat_T(2:Ny-1,it),& - K1_phi(2:Ny-1,it), K2_phi(2:Ny-1,it), K1_T(2:Ny-1,it), K2_T(2:Ny-1,it),& - T(:,it)) - ! Compute v1 from phi1 - call calc_vi_mod(tmp_uy, tmp_phi, kx(it)) - ! BOUNDAY CONDITIONS! - call update_bcs_mod(tmp_phi1,tmp_uy1, tmp_phi,tmp_uy,dyv1_T(it),dyv2_T(it),& - dyv1_B(it),dyv2_B(it),& - V1(:,it),V2(:,it),phi1(:,it),phi2(:,it)) - tmp_phi = tmp_phi1 - tmp_uy = tmp_uy1 - ! Compute K3_T and K3_phi - call calc_implicit_mod(tmp_K_phi,tmp_K_T, tmp_phi,tmp_T, kx(it)) - K3_phi(:,it) = tmp_K_phi - K3_T(:,it) = tmp_K_T - ! Compute u1 from v1 - if (kx(it) /= 0.0_dp) then - !uxi(:,it) = -CI*d1y(tmp_uy)/kx(it) - uxi(:,it) = CI*d1y(tmp_uy)/kx(it) - else if (kx(it) == 0.0_dp) then - uxi(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! - end if - phii(:,it) = tmp_phi - Ti (:,it) = tmp_T - uyi (:,it) = tmp_uy - end do - !$OMP END PARALLEL DO - finish = OMP_GET_WTIME() - write(*,*) " - stage 3 mid timing: ", finish-start, "(s)" - ! Compute K4hat - start = OMP_GET_WTIME() - call calc_explicit(4) - finish = OMP_GET_WTIME() - write(*,*) " - calc_explicit(4) timing: ", finish-start, "(s)" - - ! UPDATE SOLUTIONS - - start = OMP_GET_WTIME() - ! Get phi - phi(2:Ny-1,:) = phi(2:Ny-1,:) + dt*(b(1)*(K1_phi(2:Ny-1,:) + K2hat_phi(2:Ny-1,:)) + & - & b(2)*(K2_phi(2:Ny-1,:) + K3hat_phi(2:Ny-1,:)) + & - & b(3)*(K3_phi(2:Ny-1,:) + K4hat_phi(2:Ny-1,:))) - - ! Get temperature - T(2:Ny-1,:) = T(2:Ny-1,:) + dt*(b(1)*(K1_T(2:Ny-1,:) + K2hat_T(2:Ny-1,:)) + & - & b(2)*(K2_T(2:Ny-1,:) + K3hat_T(2:Ny-1,:)) + & - & b(3)*(K3_T(2:Ny-1,:) + K4hat_T(2:Ny-1,:))) - - ! Get ux and uy - !$OMP PARALLEL DO private(tmp_uy, it) schedule(dynamic) - do it = 1,Nx - ! Solve for v - call calc_vi_mod(tmp_uy, phi(:,it), kx(it)) - uy(:,it) = tmp_uy - ! Solve for u - if (kx(it) /= 0.0_dp) then - !ux(:,it) = -CI*d1y(tmp_uy)/kx(it) - ux(:,it) = CI*d1y(tmp_uy)/kx(it) - else if (kx(it) == 0.0_dp) then - ux(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! - end if - end do - !$OMP END PARALLEL DO - finish = OMP_GET_WTIME() - write(*,*) " - update sols timing: ", finish-start, "(s)" - - - if (time == t_final) then - exit - end if - - !call update_dt - if (wvtk) then - if (mod(nti,vtk_print) == 0) then - call write_to_vtk(nti, .false.) ! false = Fourier space - end if - end if - - ! Calculate nusselt number. - if (save_nusselt) then - call nusselt(nusselt_num, .true.) ! true = Fourier space - write(8000, fmt=1000) nusselt_num - flush(8000) - end if - - finish_overall = OMP_GET_WTIME() - write(*,*) "overall timing: ", finish_overall-start_overall, "(s)" - -end do ! time loop - end subroutine imex_rk !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 new file mode 100644 index 0000000..8cf7fa8 --- /dev/null +++ b/time_integrators_MPI.f90 @@ -0,0 +1,64 @@ +module time_integrators_MPI + +use fftw +use global +use write_pack +use allocate_vars +use bc_setup +use statistics +use omp_lib + +integer :: it, jt, kkt +integer :: info +real(dp) :: time, dtmax, dtmin, dt_old, dt_ramp, dt_final + +contains + +subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt) + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!! This progam solves the equations of thermal convection using a Fourier +!! spectral method in the x-direction and a 2nd order finite difference scheme in +!! the y-direction. +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + +implicit none + +character(3), intent(in) :: proc_id_str +integer, optional, intent(in) :: vtk_print +logical, optional, intent(in) :: save_nusselt + +integer :: nti +integer :: nprint +logical :: wvtk +real(dp) :: nusselt_num +real(dp) :: start, finish +real(dp) :: start_overall, finish_overall +integer :: nthreads, myid +integer, EXTERNAL :: OMP_GET_THREAD_NUM, OMP_GET_NUM_THREADS +real(dp), EXTERNAL :: OMP_GET_WTIME +EXTERNAL :: OMP_SET_NUM_THREADS +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +call OMP_SET_NUM_THREADS(1) + +if (present(vtk_print)) then + wvtk = .true. + nprint = vtk_print +else + wvtk = .false. + nprint = 100 +end if +write(*,*) "imex_rk_MPI from proc ", proc_id_str + +if (wvtk) then + call write_to_vtk(0, .false., proc_id_str) ! false = Fourier space +end if + + +end subroutine imex_rk_MPI + + +end module time_integrators_MPI + \ No newline at end of file diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 index 222a632..380d93c 100644 --- a/time_loop_MPI.f90 +++ b/time_loop_MPI.f90 @@ -10,7 +10,7 @@ program time_loop use allocate_vars use statistics use mesh_pack -use time_integrators +use time_integrators_MPI implicit none include 'mpif.h' @@ -38,7 +38,7 @@ program time_loop integer :: mpierror, num_procs integer :: proc_id character(4) :: proc_id_str -real(dp) :: mpi_spacing_y +real(dp) :: mpi_spacing_y integer status(MPI_STATUS_SIZE) @@ -162,7 +162,7 @@ program time_loop dx = Lx / (real(Nx,kind=dp)) end if -call cosine_mesh(xp,yp,zp, Nx,Ny,Nz) ! get coordinates +call cosine_mesh_MPI(xp,yp,zp, Nx,Ny,Nz, proc_id, num_procs) ! get coordinates call dxdydz(dynu, xp,yp,zp) ! get mesh spacing for nonuniform grids call y_mesh_params ! get metric coefficients for nonuniform grids dymin = minval(dynu) @@ -239,6 +239,9 @@ program time_loop write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." +! Get solution with time integration +call imex_rk_MPI(proc_id_str, 1, .true.) ! true causes writing of nusselt number. + call MPI_Finalize(mpierror) end program time_loop diff --git a/write_pack.f90 b/write_pack.f90 index a450353..dd1311d 100755 --- a/write_pack.f90 +++ b/write_pack.f90 @@ -104,7 +104,7 @@ subroutine write_vtk_structured_grid(iter, process_id) write(citer, "(I10)") 1000000000 + iter fiter = citer(2:10) -n = (Nx+1)*Ny*Nx +n = (Nx+1)*Ny*Nz if (PRESENT(process_id)) then open(unit=2, file=vtkloc//vtkname//"_P"//process_id//"_"//fiter//gtype, action="write", status="replace") else @@ -211,7 +211,11 @@ subroutine write_to_vtk(step, physical, process_id) ux(j,:) = tux uy(j,:) = tuy end do - call write_vtk_structured_grid(step) + if (PRESENT(process_id)) then + call write_vtk_structured_grid(step, process_id) + else + call write_vtk_structured_grid(step) + endif do j = 1,Ny ! Bring everything to Fourier space tT = T(j,:) From 86844b7262b6b1668ad9560d39d233ee22f8b524 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Sat, 24 Apr 2021 10:58:24 -0400 Subject: [PATCH 08/24] reset time_integrators --- time_integrators.f90 | 233 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) diff --git a/time_integrators.f90 b/time_integrators.f90 index 6536e92..7cd60c9 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -54,6 +54,239 @@ subroutine imex_rk(vtk_print, save_nusselt) call write_to_vtk(0, .false.) ! false = Fourier space end if +dt = dt_init + +call init_bc(acoeffs(1,1)) + +time = 0.0_dp + +dtmax = 0.5_dp +dtmin = 1.0e-4_dp + +dt_ramp = 1.1_dp + +dt_old = dt + +nti = 0 + +! Format for writing out single values. +1000 format(E25.16E3) + +! Time integration +do ! while (time < t_final) + start_overall = OMP_GET_WTIME() + + dt_final = t_final - time + + if (dt_final <= dt) then + time = t_final + else + time = time + dt + end if + + write(*,*) "time = ", time, "dt = ", dt + + nti = nti + 1 + + !::::::::::: + ! STAGE 1 :: + !::::::::::: + phii = phi + Ti = T + uxi = ux + uyi = uy + start = OMP_GET_WTIME() + call calc_explicit(1) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(1) timing: ", finish-start, "(s)" + start = OMP_GET_WTIME() + !$OMP PARALLEL DO num_threads(16) private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi1 and T1 + call calc_vari_mod(tmp_phi, tmp_T, acoeffs(1,1), 1,& + kx(it), phi(2:Ny-1,it),& + K1hat_phi(2:Ny-1,it),K2hat_phi(2:Ny-1,it),K3hat_phi(2:Ny-1,it),& + K1hat_T(2:Ny-1,it),K2hat_T(2:Ny-1,it),K3hat_T(2:Ny-1,it),& + K1_phi(2:Ny-1,it), K2_phi(2:Ny-1,it), K1_T(2:Ny-1,it), K2_T(2:Ny-1,it),& + T(:,it)) + ! Compute v1 from phi1 + call calc_vi_mod(tmp_uy, tmp_phi, kx(it)) + ! BOUNDAY CONDITIONS! + call update_bcs_mod(tmp_phi1,tmp_uy1, tmp_phi,tmp_uy,dyv1_T(it),dyv2_T(it),& + dyv1_B(it),dyv2_B(it),& + V1(:,it),V2(:,it),phi1(:,it),phi2(:,it)) + tmp_phi = tmp_phi1 + tmp_uy = tmp_uy1 + call calc_implicit_mod(tmp_K_phi,tmp_K_T, tmp_phi,tmp_T, kx(it)) + K1_phi(:,it) = tmp_K_phi + K1_T(:,it) = tmp_K_T + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + !uxi(:,it) = -CI*d1y(tmp_uy)/kx(it) + uxi(:,it) = CI*d1y(tmp_uy)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + phii(:,it) = tmp_phi + Ti (:,it) = tmp_T + uyi (:,it) = tmp_uy + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 1 mid timing: ", finish-start, "(s)" + ! Compute K2hat + start = OMP_GET_WTIME() + call calc_explicit(2) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(2) timing: ", finish-start, "(s)" + + !::::::::::: + ! STAGE 2 :: + !::::::::::: + start = OMP_GET_WTIME() + !$OMP PARALLEL DO num_threads(16) private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi2 and T2 + call calc_vari_mod(tmp_phi, tmp_T, acoeffs(2,2), 2,& + kx(it), phi(2:Ny-1,it),& + K1hat_phi(2:Ny-1,it),K2hat_phi(2:Ny-1,it),K3hat_phi(2:Ny-1,it),& + K1hat_T(2:Ny-1,it),K2hat_T(2:Ny-1,it),K3hat_T(2:Ny-1,it),& + K1_phi(2:Ny-1,it), K2_phi(2:Ny-1,it), K1_T(2:Ny-1,it), K2_T(2:Ny-1,it),& + T(:,it)) + ! Compute v1 from phi1 + call calc_vi_mod(tmp_uy, tmp_phi, kx(it)) + ! BOUNDAY CONDITIONS! + call update_bcs_mod(tmp_phi1,tmp_uy1, tmp_phi,tmp_uy,dyv1_T(it),dyv2_T(it),& + dyv1_B(it),dyv2_B(it),& + V1(:,it),V2(:,it),phi1(:,it),phi2(:,it)) + tmp_phi = tmp_phi1 + tmp_uy = tmp_uy1 + ! Compute K2_T and K2_phi + call calc_implicit_mod(tmp_K_phi,tmp_K_T, tmp_phi,tmp_T, kx(it)) + K2_phi(:,it) = tmp_K_phi + K2_T(:,it) = tmp_K_T + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + !uxi(:,it) = -CI*d1y(tmp_uy)/kx(it) + uxi(:,it) = CI*d1y(tmp_uy)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + phii(:,it) = tmp_phi + Ti (:,it) = tmp_T + uyi (:,it) = tmp_uy + end do + finish = OMP_GET_WTIME() + write(*,*) " - stage 2 mid timing: ", finish-start, "(s)" + ! Compute K3hat + start = OMP_GET_WTIME() + call calc_explicit(3) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(3) timing: ", finish-start, "(s)" + + !::::::::::: + ! STAGE 3 :: + !::::::::::: + start = OMP_GET_WTIME() + !$OMP PARALLEL DO num_threads(16) private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi3 and T3 + call calc_vari_mod(tmp_phi, tmp_T, acoeffs(3,3), 3,& + kx(it), phi(2:Ny-1,it),& + K1hat_phi(2:Ny-1,it),K2hat_phi(2:Ny-1,it),K3hat_phi(2:Ny-1,it),& + K1hat_T(2:Ny-1,it),K2hat_T(2:Ny-1,it),K3hat_T(2:Ny-1,it),& + K1_phi(2:Ny-1,it), K2_phi(2:Ny-1,it), K1_T(2:Ny-1,it), K2_T(2:Ny-1,it),& + T(:,it)) + ! Compute v1 from phi1 + call calc_vi_mod(tmp_uy, tmp_phi, kx(it)) + ! BOUNDAY CONDITIONS! + call update_bcs_mod(tmp_phi1,tmp_uy1, tmp_phi,tmp_uy,dyv1_T(it),dyv2_T(it),& + dyv1_B(it),dyv2_B(it),& + V1(:,it),V2(:,it),phi1(:,it),phi2(:,it)) + tmp_phi = tmp_phi1 + tmp_uy = tmp_uy1 + ! Compute K3_T and K3_phi + call calc_implicit_mod(tmp_K_phi,tmp_K_T, tmp_phi,tmp_T, kx(it)) + K3_phi(:,it) = tmp_K_phi + K3_T(:,it) = tmp_K_T + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + !uxi(:,it) = -CI*d1y(tmp_uy)/kx(it) + uxi(:,it) = CI*d1y(tmp_uy)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + phii(:,it) = tmp_phi + Ti (:,it) = tmp_T + uyi (:,it) = tmp_uy + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 3 mid timing: ", finish-start, "(s)" + ! Compute K4hat + start = OMP_GET_WTIME() + call calc_explicit(4) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(4) timing: ", finish-start, "(s)" + + ! UPDATE SOLUTIONS + + start = OMP_GET_WTIME() + ! Get phi + phi(2:Ny-1,:) = phi(2:Ny-1,:) + dt*(b(1)*(K1_phi(2:Ny-1,:) + K2hat_phi(2:Ny-1,:)) + & + & b(2)*(K2_phi(2:Ny-1,:) + K3hat_phi(2:Ny-1,:)) + & + & b(3)*(K3_phi(2:Ny-1,:) + K4hat_phi(2:Ny-1,:))) + + ! Get temperature + T(2:Ny-1,:) = T(2:Ny-1,:) + dt*(b(1)*(K1_T(2:Ny-1,:) + K2hat_T(2:Ny-1,:)) + & + & b(2)*(K2_T(2:Ny-1,:) + K3hat_T(2:Ny-1,:)) + & + & b(3)*(K3_T(2:Ny-1,:) + K4hat_T(2:Ny-1,:))) + + ! Get ux and uy + !$OMP PARALLEL DO num_threads(16) private(tmp_uy, it) schedule(dynamic) + do it = 1,Nx + ! Solve for v + call calc_vi_mod(tmp_uy, phi(:,it), kx(it)) + uy(:,it) = tmp_uy + ! Solve for u + if (kx(it) /= 0.0_dp) then + !ux(:,it) = -CI*d1y(tmp_uy)/kx(it) + ux(:,it) = CI*d1y(tmp_uy)/kx(it) + else if (kx(it) == 0.0_dp) then + ux(:,it) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - update sols timing: ", finish-start, "(s)" + + + if (time == t_final) then + exit + end if + + !call update_dt + ! Don't write vtk for now. + wvtk = .false. + if (wvtk) then + if (mod(nti,vtk_print) == 0) then + call write_to_vtk(nti, .false.) ! false = Fourier space + end if + end if + + ! Calculate nusselt number. + if (save_nusselt) then + call nusselt(nusselt_num, .true.) ! true = Fourier space + write(8000, fmt=1000) nusselt_num + flush(8000) + end if + + finish_overall = OMP_GET_WTIME() + write(*,*) "overall timing: ", finish_overall-start_overall, "(s)" + +end do ! time loop + + end subroutine imex_rk !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! From 9f0e205494576dc19f5f23c7efcd1452fec25254 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Mon, 26 Apr 2021 14:29:35 -0400 Subject: [PATCH 09/24] fixing spacing --- mesh_pack.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mesh_pack.f90 b/mesh_pack.f90 index 034c0cf..8d80ef2 100755 --- a/mesh_pack.f90 +++ b/mesh_pack.f90 @@ -49,7 +49,7 @@ subroutine cosine_mesh_MPI(xcoor,ycoor,zcoor, numx,numy,numz, proc_id,num_procs, integer, intent(in) :: numx, numy, numz integer, intent(in) :: proc_id, num_procs -integer :: mpierror +integer :: mpierror integer :: ii, jj, kk real(dp) :: c real(dp), optional, intent(in) :: dxin From 7afdcc56253ef4644c158f8d6be9a4e6e01f3b19 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Mon, 26 Apr 2021 18:57:22 -0400 Subject: [PATCH 10/24] supporting MPI dynu calculations --- mesh_pack.f90 | 52 +++++++++++++++++++++++++++++++++++++++++++++++ time_loop_MPI.f90 | 7 +++---- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/mesh_pack.f90 b/mesh_pack.f90 index 8d80ef2..8d937f5 100755 --- a/mesh_pack.f90 +++ b/mesh_pack.f90 @@ -1,6 +1,7 @@ module mesh_pack use global +include 'mpif.h' contains @@ -107,6 +108,57 @@ subroutine dxdydz(dyj, xcoor,ycoor,zcoor) end do end subroutine dxdydz + +! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +subroutine dxdydz_MPI(dyj, xcoor,ycoor,zcoor, proc_id, num_procs) + +implicit none + +real(dp), dimension(:), intent(in) :: xcoor, ycoor, zcoor +integer, intent(in) :: proc_id, num_procs +real(dp), allocatable, dimension(:), intent(out) :: dyj +integer :: alloc_err, mpierror +integer :: numy, jj +real(dp) :: prev_element + +numy = size(ycoor) + +if (proc_id == 0) then + allocate(dyj(numy-1), stat=alloc_err) + call check_alloc_err(alloc_err) + ! Calculate grid spacing + do jj = 2,numy + dyj(jj-1) = ycoor(jj) - ycoor(jj-1) + end do + ! Send last element to next process to the right. + call MPI_SEND(ycoor(numy), 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, mpierror) +else if (proc_id == num_procs - 1) then + ! Recieve first element from next process to the left. + call MPI_RECV(prev_element, 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + allocate(dyj(numy), stat=alloc_err) + call check_alloc_err(alloc_err) + ! Calculate grid spacing + dyj(1) = ycoor(1) - prev_element + do jj = 2,numy + dyj(jj) = ycoor(jj) - ycoor(jj-1) + end do +else + ! Recieve first element from next process to the left. + call MPI_RECV(prev_element, 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send last element to next process to the right. + call MPI_SEND(ycoor(numy), 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, mpierror) + allocate(dyj(numy), stat=alloc_err) + call check_alloc_err(alloc_err) + ! Calculate grid spacing + dyj(1) = ycoor(1) - prev_element + do jj = 2,numy + dyj(jj) = ycoor(jj) - ycoor(jj-1) + end do +end if + + +end subroutine dxdydz_MPI ! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: subroutine y_mesh_params diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 index 380d93c..c2fffd9 100644 --- a/time_loop_MPI.f90 +++ b/time_loop_MPI.f90 @@ -13,9 +13,8 @@ program time_loop use time_integrators_MPI implicit none -include 'mpif.h' -integer :: ntokens, ios +integer :: ntokens, ios, i integer :: Nxc, Nyc, Nzc integer :: nRa, ii, jj integer :: refine_flag_x, refine_flag_y, refine_flag_z @@ -129,6 +128,7 @@ program time_loop mpi_spacing_y = (ytop - ybot) / num_procs ybot = ybot + proc_id * mpi_spacing_y ytop = ybot + mpi_spacing_y +write(proc_id_str, "(I3.3)") proc_id ! Create FFT plans planuy = fftw_plan_dft_1d(Nx,tuy,tuy, FFTW_FORWARD,FFTW_ESTIMATE) @@ -163,7 +163,7 @@ program time_loop end if call cosine_mesh_MPI(xp,yp,zp, Nx,Ny,Nz, proc_id, num_procs) ! get coordinates -call dxdydz(dynu, xp,yp,zp) ! get mesh spacing for nonuniform grids +call dxdydz_MPI(dynu, xp,yp,zp, proc_id, num_procs) ! get mesh spacing for nonuniform grids call y_mesh_params ! get metric coefficients for nonuniform grids dymin = minval(dynu) dxmin = sqrt(dx**2.0_dp + dymin**2.0_dp) @@ -179,7 +179,6 @@ program time_loop ! Write initial field to vtk if (wvtk) then - write(proc_id_str, "(I3.3)") proc_id call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space end if From fdc01beb3bc6c81573d3b521e131b9c6d1f612ae Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Mon, 26 Apr 2021 22:14:42 -0400 Subject: [PATCH 11/24] supporting MPI construction of g1,g2,g3,h1,h2,h3 --- mesh_pack.f90 | 89 +++++++++++++++++++++++++ time_loop_MPI.f90 | 164 +++++++++++++++++++++++++++------------------- 2 files changed, 184 insertions(+), 69 deletions(-) diff --git a/mesh_pack.f90 b/mesh_pack.f90 index 8d937f5..cec5b2d 100755 --- a/mesh_pack.f90 +++ b/mesh_pack.f90 @@ -192,6 +192,95 @@ subroutine y_mesh_params end subroutine y_mesh_params ! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine y_mesh_params_MPI (proc_id, num_procs) + +implicit none + +integer, intent(in) :: proc_id, num_procs +real(dp) :: recv_val +integer :: mpierror, jj + +! Calculate metric coefficients +! g is for second derivatives +! h is for first derivatives + +! First element +if (proc_id == 0) then + call MPI_RECV(recv_val, 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! First elems + g1(1) = 2.0_dp / (dynu(1)*(dynu(1)+dynu(2))) + g2(1) = -2.0_dp / (dynu(1)*dynu(2)) + g3(1) = 2.0_dp / (dynu(2)*(dynu(1)+dynu(2))) + + h1(1) = -(2.0_dp*dynu(1)+dynu(2)) / (dynu(1)*(dynu(1)+dynu(2))) + h2(1) = (dynu(1) + dynu(2)) / (dynu(1)*dynu(2)) + h3(1) = -dynu(1) / (dynu(2)*(dynu(1)+dynu(2))) + + ! Middle Ny-2 elements. + do jj = 2,Ny-1 + g1(jj) = 2.0_dp / (dynu(jj-1)*(dynu(jj)+dynu(jj-1))) + g2(jj) = -2.0_dp / (dynu(jj-1)*dynu(jj)) + g3(jj) = 2.0_dp / (dynu( jj )*(dynu(jj)+dynu(jj-1))) + + h1(jj) = -dynu(jj) / (dynu(jj-1)*(dynu(jj-1) + dynu(jj))) + h2(jj) = (dynu(jj) - dynu(jj-1)) / (dynu(jj-1)*dynu(jj)) + h3(jj) = dynu(jj-1) / (dynu(jj)*(dynu(jj-1) + dynu(jj))) + end do + + ! Last elems + g1(Ny) = 2.0_dp / (dynu(Ny-1)*(recv_val+dynu(Ny-1))) + g2(Ny) = -2.0_dp / (dynu(Ny-1)*recv_val) + g3(Ny) = 2.0_dp / (recv_val*(recv_val+dynu(Ny-1))) + + h1(Ny) = -recv_val / (dynu(Ny-1)*(dynu(Ny-1) + recv_val)) + h2(Ny) = (recv_val - dynu(Ny-1)) / (dynu(Ny-1)*recv_val) + h3(Ny) = dynu(Ny-1) / (recv_val*(dynu(Ny-1) + recv_val)) +else if (proc_id == num_procs - 1) then + call MPI_SEND(dynu(1), 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, mpierror) + do jj = 1,Ny-1 + g1(jj) = 2.0_dp / (dynu(jj)*(dynu(jj+1)+dynu(jj))) + g2(jj) = -2.0_dp / (dynu(jj)*dynu(jj+1)) + g3(jj) = 2.0_dp / (dynu(jj+1)*(dynu(jj+1)+dynu(jj))) + + h1(jj) = -dynu(jj+1) / (dynu(jj)*(dynu(jj) + dynu(jj+1))) + h2(jj) = (dynu(jj+1) - dynu(jj)) / (dynu(jj)*dynu(jj+1)) + h3(jj) = dynu(jj) / (dynu(jj+1)*(dynu(jj) + dynu(jj+1))) + end do + h1(Ny) = dynu(Ny) / (dynu(Ny-1)*(dynu(Ny-1)+dynu(Ny))) + h2(Ny) = -(dynu(Ny-1) + dynu(Ny)) / (dynu(Ny-1)*dynu(Ny)) + h3(Ny) = (2.0_dp*dynu(Ny) + dynu(Ny-1)) / (dynu(Ny)*(dynu(Ny-1) + dynu(Ny))) + + g1(Ny) = 2.0_dp / (dynu(Ny-1)*(dynu(Ny-1) + dynu(Ny))) + g2(Ny) = -2.0_dp / (dynu(Ny)*dynu(Ny-1)) + g3(Ny) = 2.0_dp / (dynu(Ny)*(dynu(Ny) + dynu(Ny-1))) + +else + call MPI_SEND(dynu(1), 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, mpierror) + call MPI_RECV(recv_val, 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! All but last elems. + do jj = 1,Ny-1 + g1(jj) = 2.0_dp / (dynu(jj)*(dynu(jj+1)+dynu(jj))) + g2(jj) = -2.0_dp / (dynu(jj)*dynu(jj+1)) + g3(jj) = 2.0_dp / (dynu(jj+1)*(dynu(jj+1)+dynu(jj))) + + h1(jj) = -dynu(jj+1) / (dynu(jj)*(dynu(jj) + dynu(jj+1))) + h2(jj) = (dynu(jj+1) - dynu(jj)) / (dynu(jj)*dynu(jj+1)) + h3(jj) = dynu(jj) / (dynu(jj+1)*(dynu(jj) + dynu(jj+1))) + end do + ! Last elems + g1(Ny) = 2.0_dp / (dynu(Ny)*(recv_val+dynu(Ny))) + g2(Ny) = -2.0_dp / (dynu(Ny)*recv_val) + g3(Ny) = 2.0_dp / (recv_val*(recv_val+dynu(Ny))) + + h1(Ny) = -recv_val / (dynu(Ny)*(dynu(Ny) + recv_val)) + h2(Ny) = (recv_val - dynu(Ny)) / (dynu(Ny)*recv_val) + h3(Ny) = dynu(Ny) / (recv_val*(dynu(Ny) + recv_val)) +end if + +end subroutine y_mesh_params_MPI +! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + subroutine uniform_mesh(xcoor,ycoor,zcoor, numx,numy,numz) implicit none diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 index c2fffd9..e01673b 100644 --- a/time_loop_MPI.f90 +++ b/time_loop_MPI.f90 @@ -164,82 +164,108 @@ program time_loop call cosine_mesh_MPI(xp,yp,zp, Nx,Ny,Nz, proc_id, num_procs) ! get coordinates call dxdydz_MPI(dynu, xp,yp,zp, proc_id, num_procs) ! get mesh spacing for nonuniform grids -call y_mesh_params ! get metric coefficients for nonuniform grids -dymin = minval(dynu) -dxmin = sqrt(dx**2.0_dp + dymin**2.0_dp) +call y_mesh_params_MPI(proc_id, num_procs) ! get metric coefficients for nonuniform grids -! Fourier modes -do ii = 1,Nx/2 - kx_modes(ii) = real(ii,kind=dp) - 1.0_dp -end do -do ii = Nx/2+1, Nx - kx_modes(ii) = real(ii-Nx,kind=dp) - 1.0_dp -end do -kx = alpha*kx_modes +open(unit=9000, file=proc_id_str//"g1.txt", action="write", status="unknown") +open(unit=9001, file=proc_id_str//"g2.txt", action="write", status="unknown") +open(unit=9002, file=proc_id_str//"g3.txt", action="write", status="unknown") +open(unit=9003, file=proc_id_str//"h1.txt", action="write", status="unknown") +open(unit=9004, file=proc_id_str//"h2.txt", action="write", status="unknown") +open(unit=9005, file=proc_id_str//"h3.txt", action="write", status="unknown") -! Write initial field to vtk -if (wvtk) then - call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space -end if +call MPI_BARRIER(MPI_COMM_WORLD, mpierror) -! Initialize fields. -call init_fields(ex_Tptrb, Ra) -call init_to_fourier(ex_Tptrb) - -! Get nu0 and kappa0 -call global_params_Ra(Ra) - -if (proc_id == 0) then - write(*,'(A70)') ' ' - write(*,'(A70)') '*********************************************************************|' - write(*,'(A70)') ' TWO-DIMENSIONAL THERMAL CONVECTION |' - write(*,'(A70)') '*********************************************************************|' - write(*,'(5X,A,47X,A)') 'COMPUTATION SIZE:', '|' - write(*,'(A69,A)') ' ','|' - write(*,'(20X,A23,I5,21X,A)') 'Nx = ', Nx, '|' - write(*,'(20X,A23,I5,21X,A)') 'Ny = ', Ny, '|' - write(*,'(20X,A23,I5,21X,A)') 'Nz = ', Nz, '|' - write(*,'(20X,A23,I5,21X,A)') 'Number of time steps = ', nt, '|' - write(*,'(20X,A23,I5,21X,A)') 'Number of Fourier modes = ', Nf, '|' - write(*,'(A69,A)') ' ','|' - write(*,'(A70)') '*********************************************************************|' - write(*,'(5X,A,45X,A)') 'PROBLEM PARAMETERS:', '|' - write(*,'(A69,A)') ' ','|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Prandtl number (Pr) = ', Pr, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Rayleigh number (Ra) = ', Ra, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Reynolds number (Re) = ', sqrt(Ra/(16.0_dp*Pr)), '|' - write(*,'(A69,A)') ' ','|' - write(*,'(A70)') '*********************************************************************|' - write(*,'(5X,A,42X,A)') 'PHYSICAL PROBLEM SIZE:', '|' - write(*,'(A69,A)') ' ','|' - write(*,'(10X,A32,ES16.8,11X,A)') 'alpha = ', alpha, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Left coordinate of box = ', xL, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Right coordinate of box = ', xR, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of bottom wall = ', ybot, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of top wall = ', ytop, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Box width = ', Lx, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Box height = ', Ly, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Aspect Ratio = ', Lx/Ly, '|' - write(*,'(A69,A)') ' ','|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Integration time (T) = ', t_final, '|' - write(*,'(10X,A32,ES16.8,11X,A)') 'Time step size (Delta t) = ', dt, '|' - write(*,'(A69,A)') ' ','|' - write(*,'(A70)') '*********************************************************************|' - write(*,'(A70)') '*********************************************************************|' - write(*,'(A70)') ' ' +do i=1,Ny + write (9000,*) g1(i) + write (9001,*) g2(i) + write (9002,*) g3(i) + write (9003,*) h1(i) + write (9004,*) h2(i) + write (9005,*) h3(i) +end do + +close(unit=9000) +close(unit=9001) +close(unit=9002) +close(unit=9003) +close(unit=9004) +close(unit=9005) + +! dymin = minval(dynu) +! dxmin = sqrt(dx**2.0_dp + dymin**2.0_dp) + +! ! Fourier modes +! do ii = 1,Nx/2 +! kx_modes(ii) = real(ii,kind=dp) - 1.0_dp +! end do +! do ii = Nx/2+1, Nx +! kx_modes(ii) = real(ii-Nx,kind=dp) - 1.0_dp +! end do +! kx = alpha*kx_modes + +! ! Write initial field to vtk +! if (wvtk) then +! call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space +! end if + +! ! Initialize fields. +! call init_fields(ex_Tptrb, Ra) +! call init_to_fourier(ex_Tptrb) + +! ! Get nu0 and kappa0 +! call global_params_Ra(Ra) + +! if (proc_id == 0) then +! write(*,'(A70)') ' ' +! write(*,'(A70)') '*********************************************************************|' +! write(*,'(A70)') ' TWO-DIMENSIONAL THERMAL CONVECTION |' +! write(*,'(A70)') '*********************************************************************|' +! write(*,'(5X,A,47X,A)') 'COMPUTATION SIZE:', '|' +! write(*,'(A69,A)') ' ','|' +! write(*,'(20X,A23,I5,21X,A)') 'Nx = ', Nx, '|' +! write(*,'(20X,A23,I5,21X,A)') 'Ny = ', Ny, '|' +! write(*,'(20X,A23,I5,21X,A)') 'Nz = ', Nz, '|' +! write(*,'(20X,A23,I5,21X,A)') 'Number of time steps = ', nt, '|' +! write(*,'(20X,A23,I5,21X,A)') 'Number of Fourier modes = ', Nf, '|' +! write(*,'(A69,A)') ' ','|' +! write(*,'(A70)') '*********************************************************************|' +! write(*,'(5X,A,45X,A)') 'PROBLEM PARAMETERS:', '|' +! write(*,'(A69,A)') ' ','|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Prandtl number (Pr) = ', Pr, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Rayleigh number (Ra) = ', Ra, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Reynolds number (Re) = ', sqrt(Ra/(16.0_dp*Pr)), '|' +! write(*,'(A69,A)') ' ','|' +! write(*,'(A70)') '*********************************************************************|' +! write(*,'(5X,A,42X,A)') 'PHYSICAL PROBLEM SIZE:', '|' +! write(*,'(A69,A)') ' ','|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'alpha = ', alpha, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Left coordinate of box = ', xL, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Right coordinate of box = ', xR, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of bottom wall = ', ybot, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of top wall = ', ytop, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Box width = ', Lx, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Box height = ', Ly, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Aspect Ratio = ', Lx/Ly, '|' +! write(*,'(A69,A)') ' ','|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Integration time (T) = ', t_final, '|' +! write(*,'(10X,A32,ES16.8,11X,A)') 'Time step size (Delta t) = ', dt, '|' +! write(*,'(A69,A)') ' ','|' +! write(*,'(A70)') '*********************************************************************|' +! write(*,'(A70)') '*********************************************************************|' +! write(*,'(A70)') ' ' - flush(6) - ! Print MPI division. - write(*,*) "Ny = ", Ny*num_procs, " divided among ", num_procs, " processors -> ", & - Ny, " rows per processor." -end if +! flush(6) +! ! Print MPI division. +! write(*,*) "Ny = ", Ny*num_procs, " divided among ", num_procs, " processors -> ", & +! Ny, " rows per processor." +! end if -call MPI_BARRIER(MPI_COMM_WORLD, mpierror) +! call MPI_BARRIER(MPI_COMM_WORLD, mpierror) -write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." +! write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." -! Get solution with time integration -call imex_rk_MPI(proc_id_str, 1, .true.) ! true causes writing of nusselt number. +! ! Get solution with time integration +! call imex_rk_MPI(proc_id_str, 1, .true.) ! true causes writing of nusselt number. call MPI_Finalize(mpierror) From b2864f5ec43e189f3ec4f7b04dc1687de4641733 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Tue, 27 Apr 2021 12:16:43 -0400 Subject: [PATCH 12/24] support boundary condition initialization in MPU --- bc_setup.f90 | 165 +++++++++++++++++++++++++++++++++++++++ time_integrators_MPI.f90 | 22 +++++- time_loop_MPI.f90 | 161 ++++++++++++++++---------------------- 3 files changed, 253 insertions(+), 95 deletions(-) diff --git a/bc_setup.f90 b/bc_setup.f90 index 10e412d..893aca1 100755 --- a/bc_setup.f90 +++ b/bc_setup.f90 @@ -4,6 +4,7 @@ module bc_setup use write_pack implicit none +include 'mpif.h' contains @@ -138,4 +139,168 @@ subroutine init_bc(aii) end subroutine init_bc +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) + +implicit none + +real(dp), intent(in) :: aii +integer, intent(in) :: proc_id, num_procs +character(3), optional, intent(in) :: proc_id_str +real(dp) :: pnu, qnu +real(dp) :: wavex + +real(dp), allocatable, dimension(:) :: d, dl, du +real(dp), allocatable, dimension(:) :: phi1_b, phi1_t +real(dp), allocatable, dimension(:) :: g1_total, g2_total, g3_total +real(dp), allocatable, dimension(:) :: recv_col, recv_col2, last_row_recv +real(dp), allocatable, dimension(:) :: phi2_b, phi2_t +real(dp), allocatable, dimension(:,:) :: Fphi12, FV12 + + +integer :: ii, jj, i, total_ny, otherproc, start +integer :: info, mpierror + +! Only do allocation on main node. +if(proc_id == 0) then + total_ny = Ny * num_procs + + ! Allocate local variables + allocate(Fphi12(total_ny-2,2), FV12(total_ny-2,2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(d(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(dl(total_ny-3), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(du(total_ny-3), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(phi1_b(Nx), phi1_t(Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(phi2_b(Nx), phi2_t(Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + + ! Need to pull all of g1, g2, g3 onto main node for calculations. + allocate(g1_total(total_ny), g2_total(total_ny), g3_total(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + start = otherproc * Ny + 1 + call MPI_RECV(g1_total(start), Ny, MPI_DOUBLE, otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(start), Ny, MPI_DOUBLE, otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(start), Ny, MPI_DOUBLE, otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + Fphi12 = 0.0_dp + FV12 = 0.0_dp + d = 0.0_dp + dl = 0.0_dp + du = 0.0_dp + phi1_b = 1.0_dp + phi1_t = 0.0_dp + phi2_b = 0.0_dp + phi2_t = 1.0_dp + + phi1_b(1) = 0.0_dp + phi2_t(1) = 0.0_dp + + ! Compute phi1 and phi2 from D2 phi_i = 0 where + ! D2 = -(alpha*kx)^2 + dy^2 and phi1 satisfies the BCs + ! phi1 = 1 at the bottom wall and phi1 = 0 at the top wall. + ! phi2 satisfies phi2 = 0 at the bottom wall and phi2 = 1 + ! at the top wall. + + ! A comment on how these arrays are formed. Since we are only + ! dealing with Dirichlet conditions at the walls, we only need arrays of size + ! Ny-2, i.e. we don't need the values at the walls. Thus, in the calculations + ! below, F(1) would correspond to point number 2 and F(Ny-2) would correspond + ! to point Ny-1. + + ! Some parameters + pnu = nu0*dt*aii + + do ii = 1,Nx + + Fphi12 = 0.0_dp + + if (abs(kx(ii)/alpha) > Nf/2) then + phi1_b(ii) = 0.0_dp + phi2_t(ii) = 0.0_dp + end if + + wavex = kx(ii) + + qnu = 1.0_dp + pnu*wavex**2.0_dp + + do jj = 2,total_ny-1 + d(jj-1) = qnu - pnu*g2_total(jj) + end do + + do jj = 2,total_ny-2 + du(jj-1) = -pnu*g3_total(jj) + end do + + do jj = 3,total_ny-1 + dl(jj-2) = -pnu*g1_total(jj) + end do + + Fphi12(1,1) = pnu*g1_total(2)*phi1_b(ii) + Fphi12(total_ny-2,2) = pnu*g3_total(total_ny-1)*phi2_t(ii) + + ! Solve the system Aphi phi = Fphi + call dgtsv(total_ny-2, 2, dl, d, du, Fphi12, total_ny-2, info) + + ! Put phi1 and phi2 together in main node. + phi1(1,ii) = phi1_b(ii) + phi1(2:Ny,ii) = Fphi12(1:Ny-1,1) + phi2(1,ii) = 0.0_dp + phi2(2:Ny,ii) = Fphi12(1:Ny-1,2) + + ! Send phi1 and phi2 columns to each node. + do otherproc = 1,num_procs-1 + start = otherproc * Ny + ! write(*,*) start, Fphi12 + call MPI_SEND(Fphi12(start,1), Ny, MPI_DOUBLE, otherproc, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(Fphi12(start,2), Ny, MPI_DOUBLE, otherproc, 46, MPI_COMM_WORLD, mpierror) + end do + end do + + ! Send last row to final proc. + call MPI_SEND(phi2_t(1), Nx, MPI_DOUBLE, num_procs-1, 47, MPI_COMM_WORLD, mpierror) + +else + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), 12, MPI_DOUBLE, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), 12, MPI_DOUBLE, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), 12, MPI_DOUBLE, 0, 44, MPI_COMM_WORLD, mpierror) + + allocate(recv_col(Ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(recv_col2(Ny), stat=alloc_err) + call check_alloc_err(alloc_err) + + ! Receive rows of phi1 and phi2. + do ii = 1,Nx + call MPI_RECV(recv_col, Ny, MPI_DOUBLE, 0, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(recv_col2, Ny, MPI_DOUBLE, 0, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + phi1(1:Ny, ii) = recv_col + phi2(1:Ny, ii) = recv_col2 + end do + + ! Handle boundary of last processor. + if (proc_id == num_procs - 1) then + allocate(last_row_recv(Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + call MPI_RECV(last_row_recv, Nx, MPI_DOUBLE, 0, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + phi1(Ny,1:Nx) = 0.0_dp + phi2(Ny,1:Nx) = last_row_recv + end if +end if + +end subroutine init_bc_MPI + end module bc_setup diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 8cf7fa8..0e27029 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -14,7 +14,7 @@ module time_integrators_MPI contains -subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt) +subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! This progam solves the equations of thermal convection using a Fourier @@ -28,8 +28,9 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt) character(3), intent(in) :: proc_id_str integer, optional, intent(in) :: vtk_print logical, optional, intent(in) :: save_nusselt +integer, intent(in) :: proc_id, num_procs -integer :: nti +integer :: nti, i, j integer :: nprint logical :: wvtk real(dp) :: nusselt_num @@ -50,12 +51,29 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt) wvtk = .false. nprint = 100 end if + write(*,*) "imex_rk_MPI from proc ", proc_id_str if (wvtk) then call write_to_vtk(0, .false., proc_id_str) ! false = Fourier space end if +dt = dt_init + +call init_bc_MPI(acoeffs(1,1), proc_id, num_procs, proc_id_str) + +open(unit=9000, file="P"//proc_id_str//"phi1.txt", action="write", status="unknown") +open(unit=9001, file="P"//proc_id_str//"phi2.txt", action="write", status="unknown") + +do i=1,Ny + do j=1,Nx + write (9000,*) phi1(i,j) + write (9001,*) phi2(i,j) + end do +end do + +close(unit=9000) +close(unit=9001) end subroutine imex_rk_MPI diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 index e01673b..0e2956b 100644 --- a/time_loop_MPI.f90 +++ b/time_loop_MPI.f90 @@ -166,106 +166,81 @@ program time_loop call dxdydz_MPI(dynu, xp,yp,zp, proc_id, num_procs) ! get mesh spacing for nonuniform grids call y_mesh_params_MPI(proc_id, num_procs) ! get metric coefficients for nonuniform grids -open(unit=9000, file=proc_id_str//"g1.txt", action="write", status="unknown") -open(unit=9001, file=proc_id_str//"g2.txt", action="write", status="unknown") -open(unit=9002, file=proc_id_str//"g3.txt", action="write", status="unknown") -open(unit=9003, file=proc_id_str//"h1.txt", action="write", status="unknown") -open(unit=9004, file=proc_id_str//"h2.txt", action="write", status="unknown") -open(unit=9005, file=proc_id_str//"h3.txt", action="write", status="unknown") +dymin = minval(dynu) +dxmin = sqrt(dx**2.0_dp + dymin**2.0_dp) -call MPI_BARRIER(MPI_COMM_WORLD, mpierror) - -do i=1,Ny - write (9000,*) g1(i) - write (9001,*) g2(i) - write (9002,*) g3(i) - write (9003,*) h1(i) - write (9004,*) h2(i) - write (9005,*) h3(i) +! Fourier modes +do ii = 1,Nx/2 + kx_modes(ii) = real(ii,kind=dp) - 1.0_dp end do +do ii = Nx/2+1, Nx + kx_modes(ii) = real(ii-Nx,kind=dp) - 1.0_dp +end do +kx = alpha*kx_modes + +! Write initial field to vtk +if (wvtk) then + call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space +end if -close(unit=9000) -close(unit=9001) -close(unit=9002) -close(unit=9003) -close(unit=9004) -close(unit=9005) - -! dymin = minval(dynu) -! dxmin = sqrt(dx**2.0_dp + dymin**2.0_dp) - -! ! Fourier modes -! do ii = 1,Nx/2 -! kx_modes(ii) = real(ii,kind=dp) - 1.0_dp -! end do -! do ii = Nx/2+1, Nx -! kx_modes(ii) = real(ii-Nx,kind=dp) - 1.0_dp -! end do -! kx = alpha*kx_modes - -! ! Write initial field to vtk -! if (wvtk) then -! call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space -! end if - -! ! Initialize fields. -! call init_fields(ex_Tptrb, Ra) -! call init_to_fourier(ex_Tptrb) - -! ! Get nu0 and kappa0 -! call global_params_Ra(Ra) - -! if (proc_id == 0) then -! write(*,'(A70)') ' ' -! write(*,'(A70)') '*********************************************************************|' -! write(*,'(A70)') ' TWO-DIMENSIONAL THERMAL CONVECTION |' -! write(*,'(A70)') '*********************************************************************|' -! write(*,'(5X,A,47X,A)') 'COMPUTATION SIZE:', '|' -! write(*,'(A69,A)') ' ','|' -! write(*,'(20X,A23,I5,21X,A)') 'Nx = ', Nx, '|' -! write(*,'(20X,A23,I5,21X,A)') 'Ny = ', Ny, '|' -! write(*,'(20X,A23,I5,21X,A)') 'Nz = ', Nz, '|' -! write(*,'(20X,A23,I5,21X,A)') 'Number of time steps = ', nt, '|' -! write(*,'(20X,A23,I5,21X,A)') 'Number of Fourier modes = ', Nf, '|' -! write(*,'(A69,A)') ' ','|' -! write(*,'(A70)') '*********************************************************************|' -! write(*,'(5X,A,45X,A)') 'PROBLEM PARAMETERS:', '|' -! write(*,'(A69,A)') ' ','|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Prandtl number (Pr) = ', Pr, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Rayleigh number (Ra) = ', Ra, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Reynolds number (Re) = ', sqrt(Ra/(16.0_dp*Pr)), '|' -! write(*,'(A69,A)') ' ','|' -! write(*,'(A70)') '*********************************************************************|' -! write(*,'(5X,A,42X,A)') 'PHYSICAL PROBLEM SIZE:', '|' -! write(*,'(A69,A)') ' ','|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'alpha = ', alpha, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Left coordinate of box = ', xL, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Right coordinate of box = ', xR, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of bottom wall = ', ybot, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of top wall = ', ytop, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Box width = ', Lx, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Box height = ', Ly, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Aspect Ratio = ', Lx/Ly, '|' -! write(*,'(A69,A)') ' ','|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Integration time (T) = ', t_final, '|' -! write(*,'(10X,A32,ES16.8,11X,A)') 'Time step size (Delta t) = ', dt, '|' -! write(*,'(A69,A)') ' ','|' -! write(*,'(A70)') '*********************************************************************|' -! write(*,'(A70)') '*********************************************************************|' -! write(*,'(A70)') ' ' +! Initialize fields. +call init_fields(ex_Tptrb, Ra) +call init_to_fourier(ex_Tptrb) + +! Get nu0 and kappa0 +call global_params_Ra(Ra) + +if (proc_id == 0) then + write(*,'(A70)') ' ' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') ' TWO-DIMENSIONAL THERMAL CONVECTION |' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,47X,A)') 'COMPUTATION SIZE:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(20X,A23,I5,21X,A)') 'Nx = ', Nx, '|' + write(*,'(20X,A23,I5,21X,A)') 'Ny = ', Ny, '|' + write(*,'(20X,A23,I5,21X,A)') 'Nz = ', Nz, '|' + write(*,'(20X,A23,I5,21X,A)') 'Number of time steps = ', nt, '|' + write(*,'(20X,A23,I5,21X,A)') 'Number of Fourier modes = ', Nf, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,45X,A)') 'PROBLEM PARAMETERS:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Prandtl number (Pr) = ', Pr, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Rayleigh number (Ra) = ', Ra, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Reynolds number (Re) = ', sqrt(Ra/(16.0_dp*Pr)), '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,42X,A)') 'PHYSICAL PROBLEM SIZE:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'alpha = ', alpha, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Left coordinate of box = ', xL, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Right coordinate of box = ', xR, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of bottom wall = ', ybot, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of top wall = ', ytop, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Box width = ', Lx, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Box height = ', Ly, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Aspect Ratio = ', Lx/Ly, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Integration time (T) = ', t_final, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Time step size (Delta t) = ', dt, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') ' ' -! flush(6) -! ! Print MPI division. -! write(*,*) "Ny = ", Ny*num_procs, " divided among ", num_procs, " processors -> ", & -! Ny, " rows per processor." -! end if + flush(6) + ! Print MPI division. + write(*,*) "Ny = ", Ny*num_procs, " divided among ", num_procs, " processors -> ", & + Ny, " rows per processor." +end if -! call MPI_BARRIER(MPI_COMM_WORLD, mpierror) +call MPI_BARRIER(MPI_COMM_WORLD, mpierror) -! write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." +write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." -! ! Get solution with time integration -! call imex_rk_MPI(proc_id_str, 1, .true.) ! true causes writing of nusselt number. +! Get solution with time integration +call imex_rk_MPI(proc_id_str, 1, .true., proc_id, num_procs) ! true causes writing of nusselt number. call MPI_Finalize(mpierror) From c1f1f7e6fb6de24bf141505917c0c6fa50b8ffbf Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Tue, 27 Apr 2021 17:51:58 -0400 Subject: [PATCH 13/24] support V1 V2 instantiation with MPI --- bc_setup.f90 | 43 ++++++++++++++++++++++++++++++++++++++-- time_integrators_MPI.f90 | 8 ++++---- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/bc_setup.f90 b/bc_setup.f90 index 893aca1..03346df 100755 --- a/bc_setup.f90 +++ b/bc_setup.f90 @@ -263,10 +263,39 @@ subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) ! Send phi1 and phi2 columns to each node. do otherproc = 1,num_procs-1 start = otherproc * Ny - ! write(*,*) start, Fphi12 call MPI_SEND(Fphi12(start,1), Ny, MPI_DOUBLE, otherproc, 45, MPI_COMM_WORLD, mpierror) call MPI_SEND(Fphi12(start,2), Ny, MPI_DOUBLE, otherproc, 46, MPI_COMM_WORLD, mpierror) end do + + ! Getting V1 and V2 initialized. + FV12(:,1) = Fphi12(:,1) + FV12(:,2) = Fphi12(:,2) + + do jj = 2,total_ny-1 + d(jj-1) = -wavex**2.0_dp + g2_total(jj) + end do + + do jj = 2,total_ny-2 + du(jj-1) = g3_total(jj) + end do + + do jj = 3,total_ny-1 + dl(jj-2) = g1_total(jj) + end do + + call dgtsv(total_ny-2, 2, dl, d, du, FV12, total_ny-2, info) + + ! Place V1, V2 in main node memory. + V1(2:Ny,ii) = FV12(1:Ny-1,1) + V2(2:Ny,ii) = FV12(1:Ny-1,2) + + ! Send V1 and V1 columns to each node. + do otherproc = 1,num_procs-1 + start = otherproc * Ny + call MPI_SEND(FV12(start,1), Ny, MPI_DOUBLE, otherproc, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(FV12(start,2), Ny, MPI_DOUBLE, otherproc, 49, MPI_COMM_WORLD, mpierror) + end do + end do ! Send last row to final proc. @@ -283,7 +312,7 @@ subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) allocate(recv_col2(Ny), stat=alloc_err) call check_alloc_err(alloc_err) - ! Receive rows of phi1 and phi2. + ! Receive cols of phi1 and phi2. do ii = 1,Nx call MPI_RECV(recv_col, Ny, MPI_DOUBLE, 0, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) call MPI_RECV(recv_col2, Ny, MPI_DOUBLE, 0, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) @@ -291,6 +320,14 @@ subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) phi2(1:Ny, ii) = recv_col2 end do + ! Receive cols of V1 and V2. + do ii = 1,Nx + call MPI_RECV(recv_col, Ny, MPI_DOUBLE, 0, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(recv_col2, Ny, MPI_DOUBLE, 0, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + V1(1:Ny, ii) = recv_col + V2(1:Ny, ii) = recv_col2 + end do + ! Handle boundary of last processor. if (proc_id == num_procs - 1) then allocate(last_row_recv(Nx), stat=alloc_err) @@ -298,6 +335,8 @@ subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) call MPI_RECV(last_row_recv, Nx, MPI_DOUBLE, 0, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) phi1(Ny,1:Nx) = 0.0_dp phi2(Ny,1:Nx) = last_row_recv + V1(Ny, 1:Nx) = 0.0_dp + V2(Ny, 1:Nx) = 0.0_dp end if end if diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 0e27029..9b6abec 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -62,13 +62,13 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) call init_bc_MPI(acoeffs(1,1), proc_id, num_procs, proc_id_str) -open(unit=9000, file="P"//proc_id_str//"phi1.txt", action="write", status="unknown") -open(unit=9001, file="P"//proc_id_str//"phi2.txt", action="write", status="unknown") +open(unit=9000, file="P"//proc_id_str//"V1.txt", action="write", status="unknown") +open(unit=9001, file="P"//proc_id_str//"V2.txt", action="write", status="unknown") do i=1,Ny do j=1,Nx - write (9000,*) phi1(i,j) - write (9001,*) phi2(i,j) + write (9000,*) V1(i,j) + write (9001,*) V2(i,j) end do end do From c37b311138a52671eca72faa9d599647ad052872 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Tue, 27 Apr 2021 18:24:50 -0400 Subject: [PATCH 14/24] calculate wall derivatives with MPI --- bc_setup.f90 | 13 +++++++++++++ time_integrators_MPI.f90 | 39 +++++++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/bc_setup.f90 b/bc_setup.f90 index 03346df..31f5f56 100755 --- a/bc_setup.f90 +++ b/bc_setup.f90 @@ -340,6 +340,19 @@ subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) end if end if +! Calculate wall derivative on first and last processor. +if (proc_id == 0) then + do ii = 1,Nx + dyv1_B(ii) = h1(1)*V1(1,ii) + h2(1)*V1(2,ii) + h3(1)*V1(3,ii) + dyv2_B(ii) = h1(1)*V2(1,ii) + h2(1)*V2(2,ii) + h3(1)*V2(3,ii) + end do +else if (proc_id == num_procs - 1) then + do ii = 1,Nx + dyv1_T(ii) = h1(Ny)*V1(Ny-2,ii) + h2(Ny)*V1(Ny-1,ii) + h3(Ny)*V1(Ny,ii) + dyv2_T(ii) = h1(Ny)*V2(Ny-2,ii) + h2(Ny)*V2(Ny-1,ii) + h3(Ny)*V2(Ny,ii) + end do +end if + end subroutine init_bc_MPI end module bc_setup diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 9b6abec..0498468 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -62,18 +62,33 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) call init_bc_MPI(acoeffs(1,1), proc_id, num_procs, proc_id_str) -open(unit=9000, file="P"//proc_id_str//"V1.txt", action="write", status="unknown") -open(unit=9001, file="P"//proc_id_str//"V2.txt", action="write", status="unknown") - -do i=1,Ny - do j=1,Nx - write (9000,*) V1(i,j) - write (9001,*) V2(i,j) - end do -end do - -close(unit=9000) -close(unit=9001) +if (proc_id == 0) then + open(unit=9000, file="P000dyv1_b.txt", action="write", status="unknown") + open(unit=9001, file="P000dyv2_b.txt", action="write", status="unknown") + + do i=1,Nx + write (9000,*) dyv1_B(i) + write (9001,*) dyv2_B(i) + end do + + close(unit=9000) + close(unit=9001) +end if + +if (proc_id == num_procs-1) then + open(unit=9000, file="P003dyv1_t.txt", action="write", status="unknown") + open(unit=9001, file="P003dyv2_t.txt", action="write", status="unknown") + + do i=1,Nx + write (9000,*) dyv1_T(i) + write (9001,*) dyv2_T(i) + end do + + close(unit=9000) + close(unit=9001) +end if + + end subroutine imex_rk_MPI From 5fa988a792fd9c419ae47712e8c8e151b5d268c8 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 29 Apr 2021 00:03:11 -0400 Subject: [PATCH 15/24] working on time loop --- bc_setup.f90 | 9 +- time_integrators_MPI.f90 | 575 +++++++++++++++++++++++++++++++++++++-- time_loop_MPI.f90 | 1 + 3 files changed, 556 insertions(+), 29 deletions(-) diff --git a/bc_setup.f90 b/bc_setup.f90 index 31f5f56..0c127eb 100755 --- a/bc_setup.f90 +++ b/bc_setup.f90 @@ -2,9 +2,10 @@ module bc_setup use global use write_pack +use mesh_pack implicit none -include 'mpif.h' +! include 'mpif.h' contains @@ -303,9 +304,9 @@ subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) else ! Send g1, g2, g3 to first node. - call MPI_SEND(g1(1), 12, MPI_DOUBLE, 0, 42, MPI_COMM_WORLD, mpierror) - call MPI_SEND(g2(1), 12, MPI_DOUBLE, 0, 43, MPI_COMM_WORLD, mpierror) - call MPI_SEND(g3(1), 12, MPI_DOUBLE, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 44, MPI_COMM_WORLD, mpierror) allocate(recv_col(Ny), stat=alloc_err) call check_alloc_err(alloc_err) diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 0498468..00da549 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -30,13 +30,20 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) logical, optional, intent(in) :: save_nusselt integer, intent(in) :: proc_id, num_procs -integer :: nti, i, j -integer :: nprint +integer :: nti, i, j, mpierror, total_ny +integer :: nprint, otherproc logical :: wvtk real(dp) :: nusselt_num real(dp) :: start, finish real(dp) :: start_overall, finish_overall -integer :: nthreads, myid +integer :: nthreads, myid, stind +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_phi_MPI, tmp_T_MPI, phi_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_phi_MPI, K2hat_phi_MPI, K3hat_phi_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_T_MPI, K2hat_T_MPI, K3hat_T_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: T_MPI, demo_T_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:,:) :: phiout, Tout +real(dp), allocatable, dimension(:) :: g1_total, g2_total, g3_total integer, EXTERNAL :: OMP_GET_THREAD_NUM, OMP_GET_NUM_THREADS real(dp), EXTERNAL :: OMP_GET_WTIME EXTERNAL :: OMP_SET_NUM_THREADS @@ -62,36 +69,554 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) call init_bc_MPI(acoeffs(1,1), proc_id, num_procs, proc_id_str) -if (proc_id == 0) then - open(unit=9000, file="P000dyv1_b.txt", action="write", status="unknown") - open(unit=9001, file="P000dyv2_b.txt", action="write", status="unknown") - - do i=1,Nx - write (9000,*) dyv1_B(i) - write (9001,*) dyv2_B(i) - end do - - close(unit=9000) - close(unit=9001) +time = 0.0_dp + +dtmax = 0.5_dp +dtmin = 1.0e-4_dp + +dt_ramp = 1.1_dp + +dt_old = dt + +nti = 0 + +! Format for writing out single values.x +1000 format(E25.16E3) + +if (proc_id == 0) then + total_ny = Ny * num_procs + allocate(tmp_phi_MPI(total_ny), tmp_T_MPI(total_ny), phi_MPI(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(K1hat_phi_MPI(total_ny-2), K2hat_phi_MPI(total_ny-2), K3hat_phi_MPI(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(K1hat_T_MPI(total_ny-2), K2hat_T_MPI(total_ny-2), K3hat_T_MPI(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(K1_phi_MPI(total_ny-2), K2_phi_MPI(total_ny-2), K1_T_MPI(total_ny-2), K2_T_MPI(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(T_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(g1_total(total_ny), g2_total(total_ny), g3_total(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) end if -if (proc_id == num_procs-1) then - open(unit=9000, file="P003dyv1_t.txt", action="write", status="unknown") - open(unit=9001, file="P003dyv2_t.txt", action="write", status="unknown") - - do i=1,Nx - write (9000,*) dyv1_T(i) - write (9001,*) dyv2_T(i) +allocate(phiout(Ny,Nx), Tout(Ny,Nx), stat=alloc_err) +call check_alloc_err(alloc_err) + +call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + +! Time integration +do ! while (time < t_final) + start_overall = OMP_GET_WTIME() + + dt_final = t_final - time + + if (dt_final <= dt) then + time = t_final + else + time = time + dt + end if + + write(*,*) "time = ", time, "dt = ", dt + + nti = nti + 1 + + !::::::::::: + ! STAGE 1 :: + !::::::::::: + phii = phi + Ti = T + uxi = ux + uyi = uy + start = OMP_GET_WTIME() + call calc_explicit_MPI(1, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(1) timing: ", finish-start, "(s)" + start = OMP_GET_WTIME() + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,1 ! kx loop + ! Compute phi1 and T1 + if (proc_id == 0) then + ! Need to pull all of the variables into local main memory. + phi_MPI(1:Ny-1) = phi(2:Ny,it) + K1hat_phi_MPI(1:Ny-1) = K1hat_phi(2:Ny,it) + K2hat_phi_MPI(1:Ny-1) = K2hat_phi(2:Ny,it) + K3hat_phi_MPI(1:Ny-1) = K3hat_phi(2:Ny,it) + K1hat_T_MPI(1:Ny-1) = K1hat_T(2:Ny,it) + K2hat_T_MPI(1:Ny-1) = K2hat_T(2:Ny,it) + K3hat_t_MPI(1:Ny-1) = K3hat_T(2:Ny,it) + K1_phi_MPI(1:Ny-1) = K1_phi(2:Ny,it) + K2_phi_MPI(1:Ny-1) = K2_phi(2:Ny,it) + K1_T_MPI(1:Ny-1) = K1_T(2:Ny,it) + K2_T_MPI(1:Ny-1) = K2_T(2:Ny,it) + T_MPI(1:Ny) = T(1:Ny,it) + + ! Receive pieces from other nodes. + do otherproc = 1,num_procs-2 + stind = otherproc * Ny + call MPI_RECV(phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + + ! Receive from last node. + stind = (num_procs-1) * Ny + call MPI_RECV(phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 54, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 55, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 56, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Call vari mod + call calc_vari_mod_MPI(tmp_phi_MPI, tmp_T_MPI, acoeffs(1,1), 1, total_ny,& + kx(it), phi_MPI,& + K1hat_phi_MPI,K2hat_phi_MPI,K3hat_phi_MPI,& + K1hat_T_MPI,K2hat_T_MPI,K3hat_T_MPI,& + K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI,& + g1_total, g2_total, g3_total,& + T_MPI) + do i = 1,total_ny + write(*,*) i, tmp_T_MPI(i) + end do + + else if (proc_id == num_procs - 1) then + ! Send data to main node + call MPI_SEND(phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + else + ! Send data to main node + call MPI_SEND(phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + end if end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 1 mid timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + ! if (proc_id == 0) then + ! open(unit=9010, file="P"//proc_id_str//"phiout_real.txt", action="write", status="unknown") + ! open(unit=9011, file="P"//proc_id_str//"phiout_im.txt", action="write", status="unknown") + ! do i=1,total_ny + ! do j=1,Nx + ! write (9010,*) REAL(Tout(i,j)) + ! write (9011,*) AIMAG(Tout(i,j)) + ! end do + ! end do + ! close(unit=9010) + ! close(unit=9011) + ! end if + write(*,*) "done writing phi!" + if (time == t_final) then + exit + end if - close(unit=9000) - close(unit=9001) -end if + + end do +end subroutine imex_rk_MPI +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -end subroutine imex_rk_MPI +subroutine calc_explicit_MPI(stage, proc_id, num_procs, proc_id_str) + +integer :: i, j +integer, intent(in) :: stage, proc_id, num_procs +character(3), intent(in) :: proc_id_str +real(dp) :: start, finish + +start = OMP_GET_WTIME() +select case(stage) + case (1) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K1hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO + case (2) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K2hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO + case (3) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K3hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO + case (4) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K4hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO +end select +finish = OMP_GET_WTIME() +write(*,*) " - - l1 timing: ", finish-start, "(s)" + +start = OMP_GET_WTIME() +!$OMP PARALLEL DO schedule(dynamic) +do i=1,Nx + ! Compute dx(T) in Fourier space + nlT (:,i) = kx(i)*Ti(:,i) + ! Compute D2(ux) + nlphi(:,i) = -kx(i)**2.0_dp*uxi(:,i) + d2y(uxi(:,i)) +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() + +write(*,*) " - - l2 timing: ", finish-start, "(s)" + +!nlT = -CI*nlT +nlT = CI*nlT + +start = OMP_GET_WTIME() +!$OMP PARALLEL DO private(tnlT, tnlphi, tT, tux, tuy, tphi) schedule(dynamic) +do j = 1,Ny + ! Bring everything to physical space + tnlT = nlT(j,:) + tnlphi = nlphi(j,:) + tT = Ti(j,:) + tux = uxi(j,:) + tuy = uyi(j,:) + tphi = phii(j,:) + call fftw_execute_dft(iplannlT, tnlT, tnlT) + call fftw_execute_dft(iplannlphi, tnlphi, tnlphi) + call fftw_execute_dft(iplanT, tT, tT) + call fftw_execute_dft(iplanux, tux, tux) + call fftw_execute_dft(iplanuy, tuy, tuy) + call fftw_execute_dft(iplanphi, tphi, tphi) + nlT(j,:) = tnlT + nlphi(j,:) = tnlphi + Ti(j,:) = tT + uxi(j,:) = tux + uyi(j,:) = tuy + phii(j,:) = tphi +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() +write(*,*) " - - l3 timing: ", finish-start, "(s)" + +! Calculate nonlinear term +start = OMP_GET_WTIME() +!$OMP PARALLEL DO private(tmp_T) schedule(dynamic) +do i = 1,Nx + ! Temperature + tmp_T = Ti(:,i) + nlT(:,i) = uxi(:,i)*nlT(:,i) + uyi(:,i)*d1y(tmp_T) + ! phi + nlphi(:,i) = uxi(:,i)*phii(:,i) - uyi(:,i)*nlphi(:,i) +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() +write(*,*) " - - l4 timing: ", finish-start, "(s)" + +! Bring nonlinear terms back to Fourier space +start = OMP_GET_WTIME() +!$OMP PARALLEL DO private(tnlT, tnlphi) schedule(dynamic) +do j = 1,Ny + tnlT = nlT(j,:) + tnlphi = nlphi(j,:) + call fftw_execute_dft(plannlT, tnlT, tnlT) + call fftw_execute_dft(plannlphi, tnlphi, tnlphi) + ! Dealias + do i = 1,Nx + if (abs(kx(i))/alpha >= Nf/2) then + tnlT(i) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + tnlphi(i) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + end if + end do + nlT(j,:) = tnlT + nlphi(j,:) = tnlphi +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() +write(*,*) " - - l5 timing: ", finish-start, "(s)" + +nlT = nlT / real(Nx,kind=dp) +nlphi = nlphi / real(Nx,kind=dp) + +start = OMP_GET_WTIME() +select case (stage) + case (1) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K1hat_phi(:,i) = K1hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K1hat_phi(:,i) = K1hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K1hat_T = -nlT + case (2) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K2hat_phi(:,i) = K2hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K2hat_phi(:,i) = K2hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K2hat_T = -nlT + case (3) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K3hat_phi(:,i) = K3hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K3hat_phi(:,i) = K3hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K3hat_T = -nlT + case (4) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K4hat_phi(:,i) = K4hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K4hat_phi(:,i) = K4hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K4hat_T = -nlT +end select +finish = OMP_GET_WTIME() +write(*,*) " - - l6 timing: ", finish-start, "(s)" + +end subroutine calc_explicit_MPI + +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! +subroutine calc_vari_mod_MPI(phiout,Tout, aii, stage, total_ny, kx_it, phi_in, & + k1hat_phi_in, k2hat_phi_in, k3hat_phi_in,& + k1hat_T_in, k2hat_T_in, k3hat_T_in,& + k1_phi_in, k2_phi_in, k1_T_in, k2_T_in,& + g1_total, g2_total, g3_total,& + T_in) + +real(dp), intent(in) :: aii +integer, intent(in) :: stage, total_ny +real(dp), intent(in) :: kx_it +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1hat_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2hat_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k3hat_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1hat_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2hat_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k3hat_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny), intent(in) :: T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2_T_in +real(dp), dimension(total_ny), intent(in) :: g1_total, g2_total, g3_total +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:), intent(out) :: phiout, Tout +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: Fphi, FT +real(dp), allocatable, dimension(:,:) :: phi_rhs, T_rhs +real(dp), allocatable, dimension(:) :: dphi, duphi, dlphi +real(dp), allocatable, dimension(:) :: ddT, duT, dlT +integer :: i + +allocate(dphi(total_ny-2), ddT(total_ny-2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(duphi(total_ny-3), dlphi(total_ny-3), duT(total_ny-3), dlT(total_ny-3), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(phiout(total_ny), Tout(total_ny), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(Fphi(total_ny-2), FT(total_ny-2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(phi_rhs(total_ny-2,2), T_rhs(total_ny-2,2), stat=alloc_err) +call check_alloc_err(alloc_err) + +dphi = 0.0_dp +ddT = 0.0_dp +duphi = 0.0_dp +dlphi = 0.0_dp +duT = 0.0_dp +dlT = 0.0_dp +phi_rhs = 0.0_dp +T_rhs = 0.0_dp +Fphi = (0.0_dp, 0.0_dp) +FT = (0.0_dp, 0.0_dp) +phiout = (0.0_dp, 0.0_dp) +Tout = (0.0_dp, 0.0_dp) + +! LHS Matrix (tridiagonal, not necessarily symmetric) +do jt = 2,total_ny-1 + ddT (jt-1) = 1.0_dp - kappa0*dt*aii*(-kx_it**2.0_dp + g2_total(jt)) + dphi(jt-1) = 1.0_dp - nu0 *dt*aii*(-kx_it**2.0_dp + g2_total(jt)) +end do + +! do i = 2,total_ny-1 +! write(*,*) i, ddT (i-1) +! end do + +! write(*,*) kappa0 ,dt , aii + + +do jt = 2,total_ny-2 + duT (jt-1) = -kappa0*dt*g3_total(jt)*aii + duphi(jt-1) = -nu0 *dt*g3_total(jt)*aii +end do + +do jt = 3,total_ny-1 + dlT (jt-2) = -kappa0*dt*g1_total(jt)*aii + dlphi(jt-2) = -nu0 *dt*g1_total(jt)*aii +end do + +select case (stage) + case(1) + Fphi = phi_in + dt*ahatcoeffs(2,1)*k1hat_phi_in + FT = T_in(2:total_ny-1) + dt*ahatcoeffs(2,1)*k1hat_T_in + FT(1) = FT(1) + kappa0*dt*aii*g1_total(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3_total(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + + phi_rhs(:,1) = real(Fphi) + phi_rhs(:,2) = aimag(Fphi) + + T_rhs (:,1) = real(FT) + T_rhs (:,2) = aimag(FT) + + call dgtsv(total_ny-2, 2, dlT, ddT, duT, T_rhs, total_ny-2, info) + Tout(2:total_ny-1) = cmplx(T_rhs(:,1), T_rhs(:,2), kind=C_DOUBLE_COMPLEX) + ! Set temperature boundary conditions + Tout(1) = T_in(1) + Tout(total_ny) = T_in(total_ny) + + call dgtsv(total_ny-2, 2, dlphi, dphi, duphi, phi_rhs, total_ny-2, info) + phiout(2:total_ny-1) = cmplx(phi_rhs(:,1), phi_rhs(:,2), kind=C_DOUBLE_COMPLEX) + + case(2) + Fphi = phi_in + dt*(acoeffs(2,1)*k1_phi_in + & + & ahatcoeffs(3,1)*k1hat_phi_in + & + & ahatcoeffs(3,2)*k2hat_phi_in) + FT = T_in(2:total_ny-1) + dt*(acoeffs(2,1)*k1_T_in + & + & ahatcoeffs(3,1)*k1hat_T_in + & + & ahatcoeffs(3,2)*k2hat_T_in) + FT(1) = FT(1) + kappa0*dt*aii*g1(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + + phi_rhs(:,1) = real(Fphi) + phi_rhs(:,2) = aimag(Fphi) + + T_rhs (:,1) = real(FT) + T_rhs (:,2) = aimag(FT) + + call dgtsv(total_ny-2, 2, dlT, ddT, duT, T_rhs, total_ny-2, info) + Tout(2:total_ny-1) = cmplx(T_rhs(:,1), T_rhs(:,2), kind=C_DOUBLE_COMPLEX) + ! Set temperature boundary conditions + Tout(1) = T_in(1) + Tout(total_ny) = T_in(total_ny) + + call dgtsv(total_ny-2, 2, dlphi, dphi, duphi, phi_rhs, total_ny-2, info) + phiout(2:total_ny-1) = cmplx(phi_rhs(:,1), phi_rhs(:,2), kind=C_DOUBLE_COMPLEX) + + case(3) + Fphi = phi_in + dt*(acoeffs(3,1)*k1_phi_in + & + &acoeffs(3,2)*k2_phi_in + & + &ahatcoeffs(4,1)*k1hat_phi_in + & + &ahatcoeffs(4,2)*k2hat_phi_in + & + &ahatcoeffs(4,3)*k3hat_phi_in) + FT = T_in(2:total_ny-1) + dt*(acoeffs(3,1)*k1_T_in + & + &acoeffs(3,2)*k2_T_in + & + &ahatcoeffs(4,1)*k1hat_T_in + & + &ahatcoeffs(4,2)*k2hat_T_in + & + &ahatcoeffs(4,3)*k3hat_T_in) + FT(1) = FT(1) + kappa0*dt*aii*g1(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + + phi_rhs(:,1) = real(Fphi) + phi_rhs(:,2) = aimag(Fphi) + + T_rhs (:,1) = real(FT) + T_rhs (:,2) = aimag(FT) + + call dgtsv(total_ny-2, 2, dlT, ddT, duT, T_rhs, total_ny-2, info) + Tout(2:total_ny-1) = cmplx(T_rhs(:,1), T_rhs(:,2), kind=C_DOUBLE_COMPLEX) + ! Set temperature boundary conditions + Tout(1) = T_in(1) + Tout(total_ny) = T_in(total_ny) + + call dgtsv(total_ny-2, 2, dlphi, dphi, duphi, phi_rhs, total_ny-2, info) + phiout(2:total_ny-1) = cmplx(phi_rhs(:,1), phi_rhs(:,2), kind=C_DOUBLE_COMPLEX) + +end select + +end subroutine calc_vari_mod_MPI +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! end module time_integrators_MPI \ No newline at end of file diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 index 0e2956b..5201f65 100644 --- a/time_loop_MPI.f90 +++ b/time_loop_MPI.f90 @@ -12,6 +12,7 @@ program time_loop use mesh_pack use time_integrators_MPI +! include 'mpif.h' implicit none integer :: ntokens, ios, i From e4fea8830c1ea4baa16747cc2a0bf21e35391671 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 29 Apr 2021 17:12:05 -0400 Subject: [PATCH 16/24] finished stage 1 --- global.f90 | 90 ++++++++- time_integrators_MPI.f90 | 386 +++++++++++++++++++++++++++++++++------ 2 files changed, 424 insertions(+), 52 deletions(-) diff --git a/global.f90 b/global.f90 index e94d8d8..96ca400 100755 --- a/global.f90 +++ b/global.f90 @@ -314,7 +314,7 @@ function d1y(vec) result(d1yvec) complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: vec complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: d1yvec -integer :: alloc_err, jj, n +integer :: alloc_err, jj, n, i n = size(vec) @@ -336,6 +336,35 @@ end function d1y !::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +function d1y_MPI(vec, h1_total, h2_total, h3_total) result(d1yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: vec +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: d1yvec +real(dp), dimension(:), intent(in) :: h1_total, h2_total, h3_total +integer :: alloc_err, jj, n, i + +n = size(vec) + +allocate(d1yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d1y." + stop +end if + +d1yvec = 0.0_dp + +d1yvec(1) = h1_total(1)*vec(1) + h2_total(1)*vec(2) + h3_total(1)*vec(3) +do jj = 2,n-1 + d1yvec(jj) = h1_total(jj)*vec(jj-1) + h2_total(jj)*vec(jj) + h3_total(jj)*vec(jj+1) +end do +d1yvec(n) = h1_total(n)*vec(n-2) + h2_total(n)*vec(n-1) + h3_total(n)*vec(n) + +end function d1y_MPI + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + function d2y(vec) result(d2yvec) implicit none @@ -389,6 +418,65 @@ function d2y(vec) result(d2yvec) end function d2y +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +function d2y_MPI(vec, dynu_in, g1_total, g2_total, g3_total, total_ny) result(d2yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), intent(in) :: vec(:) +complex(C_DOUBLE_COMPLEX), allocatable :: d2yvec(:) +real(dp), dimension(:), intent(in) :: dynu_in, g1_total, g2_total, g3_total +integer, intent(in) :: total_ny +real(dp) :: d1, d2, d3, d4 +real(dp) :: ell1, ell2, ell3, ell4 +real(dp) :: dNm3, dNm2, dNm1, dN +real(dp) :: ellNm3, ellNm2, ellNm1, ellN +integer :: jj, n + +n = size(vec) + +allocate(d2yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d2y." + stop +end if + +d2yvec = 0.0_dp + +d1 = -dynu_in(1)*(dynu_in(1)+dynu_in(2))*(dynu_in(1)+dynu_in(2)+dynu_in(3)) +d2 = dynu_in(1)*dynu_in(2)*(dynu_in(2) + dynu_in(3)) +d3 = -dynu_in(2)*dynu_in(3)*(dynu_in(1) + dynu_in(2)) +d4 = dynu_in(3)*(dynu_in(2)+dynu_in(3))*(dynu_in(1)+dynu_in(2)+dynu_in(3)) + +ell1 = -2.0_dp*(3.0_dp*dynu_in(1) + 2.0_dp*dynu_in(2) + dynu_in(3)) / d1 +ell2 = -2.0_dp*(2.0_dp*(dynu_in(1) + dynu_in(2)) + dynu_in(3)) / d2 +ell3 = -2.0_dp*(2.0_dp*dynu_in(1) + dynu_in(2) + dynu_in(3)) / d3 +ell4 = -2.0_dp*(2.0_dp*dynu_in(1) + dynu_in(2)) / d4 + +dN = dynu_in(total_ny-1)*(dynu_in(total_ny-1)+dynu_in(total_ny-2))*(dynu_in(total_ny-1)+dynu_in(total_ny-2)+dynu_in(total_ny-3)) +dNm1 = -dynu_in(total_ny-1)*dynu_in(total_ny-2)*(dynu_in(total_ny-2)+dynu_in(total_ny-3)) +dNm2 = dynu_in(total_ny-2)*dynu_in(total_ny-3)*(dynu_in(total_ny-1)+dynu_in(total_ny-2)) +dNm3 = -dynu_in(total_ny-3)*(dynu_in(total_ny-2)+dynu_in(total_ny-3))*(dynu_in(total_ny-1)+dynu_in(total_ny-2)+dynu_in(total_ny-3)) + +ellN = 2.0_dp*(3.0_dp*dynu_in(total_ny-1) + 2.0_dp*dynu_in(total_ny-2) + dynu_in(total_ny-3)) / dN +ellNm1 = 2.0_dp*(2.0_dp*(dynu_in(total_ny-1)+dynu_in(total_ny-2)) + dynu_in(total_ny-3)) / dNm1 +ellNm2 = 2.0_dp*(2.0_dp*dynu_in(total_ny-1) + dynu_in(total_ny-2) + dynu_in(total_ny-3)) / dNm2 +ellNm3 = 2.0_dp*(2.0_dp*dynu_in(total_ny-1) + dynu_in(total_ny-2)) / dNm3 + +!d2yvec(1) = g1(1)*vec(1) + g2(1)*vec(2) + g3(1)*vec(3) +d2yvec(1) = ell1*vec(1) + ell2*vec(2) + ell3*vec(3) + ell4*vec(4) +do jj = 2,n-1 + d2yvec(jj) = g1_total(jj)*vec(jj-1) + g2_total(jj)*vec(jj) + g3_total(jj)*vec(jj+1) +end do +!d2yvec(n) = g1(n)*vec(n-2) + g2(n)*vec(n-1) + g3(n)*vec(n) +d2yvec(n) = ellNm3*vec(n-3) + ellNm2*vec(n-2) + & + &ellNm1*vec(n-1) + ellN*vec(n) + +end function d2y_MPI + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + subroutine check_alloc_err(err_mess) integer, intent(in) :: err_mess diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 00da549..9bbb35d 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -41,9 +41,13 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_phi_MPI, K2hat_phi_MPI, K3hat_phi_MPI complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_T_MPI, K2hat_T_MPI, K3hat_T_MPI complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI -complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: T_MPI, demo_T_MPI -complex(C_DOUBLE_COMPLEX), allocatable, dimension(:,:) :: phiout, Tout -real(dp), allocatable, dimension(:) :: g1_total, g2_total, g3_total +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: T_MPI, tmp_uy_MPI, tmp_uy1_MPI, tmp_phi1_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_K_phi_MPI, tmp_K_T_MPI, uxi_MPI +real(dp), allocatable, dimension(:) :: phi1_MPI, phi2_MPI, V1_MPI, V2_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:,:) :: single_phiout, single_Tout +real(dp), allocatable, dimension(:) :: g1_total, g2_total, g3_total, dynu_MPI +real(dp), allocatable, dimension(:) :: h1_total, h2_total, h3_total +real(dp) :: dyv1_T_it_MPI, dyv2_T_it_MPI, h1_end_MPI, h2_end_MPI, h3_end_MPI integer, EXTERNAL :: OMP_GET_THREAD_NUM, OMP_GET_NUM_THREADS real(dp), EXTERNAL :: OMP_GET_WTIME EXTERNAL :: OMP_SET_NUM_THREADS @@ -95,13 +99,20 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) call check_alloc_err(alloc_err) allocate(T_MPI(total_ny), stat=alloc_err) call check_alloc_err(alloc_err) - allocate(g1_total(total_ny), g2_total(total_ny), g3_total(total_ny), stat=alloc_err) + allocate(g1_total(total_ny), g2_total(total_ny), g3_total(total_ny), dynu_MPI(total_ny-1), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(h1_total(total_ny), h2_total(total_ny), h3_total(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(tmp_uy_MPI(total_ny), tmp_uy1_MPI(total_ny), tmp_phi1_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(single_phiout(total_ny, Nx), single_Tout(total_ny,Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(phi1_MPI(total_ny), phi2_MPI(total_ny), V1_MPI(total_ny), V2_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(tmp_K_phi_MPI(total_ny), tmp_K_T_MPI(total_ny), uxi_MPI(total_ny), stat=alloc_err) call check_alloc_err(alloc_err) end if -allocate(phiout(Ny,Nx), Tout(Ny,Nx), stat=alloc_err) -call check_alloc_err(alloc_err) - call MPI_BARRIER(MPI_COMM_WORLD, mpierror) ! Time integration @@ -133,7 +144,7 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) write(*,*) " - calc_explicit(1) timing: ", finish-start, "(s)" start = OMP_GET_WTIME() !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) - do it = 1,1 ! kx loop + do it = 1,Nx ! kx loop ! Compute phi1 and T1 if (proc_id == 0) then ! Need to pull all of the variables into local main memory. @@ -226,67 +237,223 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI,& g1_total, g2_total, g3_total,& T_MPI) - do i = 1,total_ny - write(*,*) i, tmp_T_MPI(i) - end do + + ! Compute v1 from phi1 + call calc_vi_mod_MPI(tmp_uy_MPI, tmp_phi_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + + ! Receive dyv1 and dyv2 + call MPI_RECV(dyv1_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 57, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(dyv2_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 58, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! Receive V1, V2, phi1, phi2 + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(V1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 59, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(V2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 60, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 61, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 62, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local variables to MPI. + V1_MPI(1:Ny) = V1(1:Ny, it) + V2_MPI(1:Ny) = V2(1:Ny, it) + phi1_MPI(1:Ny) = phi1(1:Ny, it) + phi2_MPI(1:Ny) = phi2(1:Ny, it) + + ! Receive end of h1, h2, h3 for last processor. + call MPI_RECV(h1_end_MPI, 1, MPI_DOUBLE, num_procs-1, 63, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_end_MPI, 1, MPI_DOUBLE, num_procs-1, 64, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_end_MPI, 1, MPI_DOUBLE, num_procs-1, 65, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! BOUNDAY CONDITIONS! + call update_bcs_mod_MPI(tmp_phi1_MPI,tmp_uy1_MPI, tmp_phi_MPI,tmp_uy_MPI,& + dyv1_T_it_MPI,dyv2_T_it_MPI,& + dyv1_B(it),dyv2_B(it),& + V1_MPI,V2_MPI,phi1_MPI,phi2_MPI,& + total_ny, h1_end_MPI, h2_end_MPI, h3_end_MPI) + + tmp_phi_MPI = tmp_phi1_MPI + tmp_uy_MPI = tmp_uy1_MPI + + ! Receive dynu from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + call MPI_RECV(dynu_MPI(stind), Ny, MPI_DOUBLE, otherproc, 66, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + dynu_MPI(1:Ny-1) = dynu(1:Ny-1) + + ! Implicit. + call calc_implicit_mod_MPI(tmp_K_phi_MPI,tmp_K_T_MPI, tmp_phi_MPI,tmp_T_MPI,& + kx(it), total_ny, dynu_MPI, g1_total, g2_total, g3_total) + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 67, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 68, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 69, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send K1_phi, K1_T, phii, Ti, uyi, uxi back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_K_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 70, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_K_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 71, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 72, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 73, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 74, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 75, MPI_COMM_WORLD, mpierror) + end do + + ! Set local to main node variables. + K1_phi(1:Ny,it) = tmp_K_phi_MPI(1:Ny) + K1_T(1:Ny,it) = tmp_K_T_MPI(1:Ny) + phii(1:Ny,it) = tmp_phi_MPI(1:Ny) + Ti(1:Ny,it) = tmp_T_MPI(1:Ny) + uyi(1:Ny,it) = tmp_uy_MPI(1:Ny) + uxi(1:Ny,it) = uxi_MPI(1:Ny) + else if (proc_id == num_procs - 1) then ! Send data to main node - call MPI_SEND(phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1hat_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2hat_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K3hat_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1hat_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2hat_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K3hat_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2_phi(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2_T(1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) ! Send g1, g2, g3 to first node. call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send dyv1 and dyv2. + call MPI_SEND(dyv1_T(it), 1, MPI_DOUBLE, 0, 57, MPI_COMM_WORLD, mpierror) + call MPI_SEND(dyv2_T(it), 1, MPI_DOUBLE, 0, 58, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send h1,h2,h3 last elements. + call MPI_SEND(h1(Ny), 1, MPI_DOUBLE, 0, 63, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(Ny), 1, MPI_DOUBLE, 0, 64, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(Ny), 1, MPI_DOUBLE, 0, 65, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K1_phi, K1_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + else ! Send data to main node - call MPI_SEND(phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1hat_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2hat_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K3hat_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1hat_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2hat_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K3hat_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2_phi(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K1_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) - call MPI_SEND(K2_T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) - call MPI_SEND(T(1,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) ! Send g1, g2, g3 to first node. call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K1_phi, K1_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) end if end do !$OMP END PARALLEL DO finish = OMP_GET_WTIME() write(*,*) " - stage 1 mid timing: ", finish-start, "(s)" + + + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) - ! if (proc_id == 0) then - ! open(unit=9010, file="P"//proc_id_str//"phiout_real.txt", action="write", status="unknown") - ! open(unit=9011, file="P"//proc_id_str//"phiout_im.txt", action="write", status="unknown") - ! do i=1,total_ny - ! do j=1,Nx - ! write (9010,*) REAL(Tout(i,j)) - ! write (9011,*) AIMAG(Tout(i,j)) - ! end do - ! end do - ! close(unit=9010) - ! close(unit=9011) - ! end if - write(*,*) "done writing phi!" + ! Compute K2hat + start = OMP_GET_WTIME() + call calc_explicit_MPI(2, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(2) timing: ", finish-start, "(s)" + + open(unit=9010, file="P"//proc_id_str//"uxi_real_stage1.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"uxi_im_stage1.txt", action="write", status="unknown") + do i=1,Ny + do j=1,Nx + write (9010,*) REAL(uxi(i,j)) + write (9011,*) AIMAG(uxi(i,j)) + end do + end do + close(unit=9010) + close(unit=9011) + write(*,*) "done writing uxi!" + if (time == t_final) then exit end if @@ -564,8 +731,8 @@ subroutine calc_vari_mod_MPI(phiout,Tout, aii, stage, total_ny, kx_it, phi_in, & FT = T_in(2:total_ny-1) + dt*(acoeffs(2,1)*k1_T_in + & & ahatcoeffs(3,1)*k1hat_T_in + & & ahatcoeffs(3,2)*k2hat_T_in) - FT(1) = FT(1) + kappa0*dt*aii*g1(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) - FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + FT(1) = FT(1) + kappa0*dt*aii*g1_total(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3_total(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) phi_rhs(:,1) = real(Fphi) phi_rhs(:,2) = aimag(Fphi) @@ -593,8 +760,8 @@ subroutine calc_vari_mod_MPI(phiout,Tout, aii, stage, total_ny, kx_it, phi_in, & &ahatcoeffs(4,1)*k1hat_T_in + & &ahatcoeffs(4,2)*k2hat_T_in + & &ahatcoeffs(4,3)*k3hat_T_in) - FT(1) = FT(1) + kappa0*dt*aii*g1(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) - FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + FT(1) = FT(1) + kappa0*dt*aii*g1_total(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3_total(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) phi_rhs(:,1) = real(Fphi) phi_rhs(:,2) = aimag(Fphi) @@ -616,6 +783,123 @@ subroutine calc_vari_mod_MPI(phiout,Tout, aii, stage, total_ny, kx_it, phi_in, & end subroutine calc_vari_mod_MPI !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! +subroutine calc_vi_mod_MPI(vi, phiin, kx_it, total_ny, g1_total, g2_total, g3_total) +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: phiin +complex(C_DOUBLE_COMPLEX), dimension(:), intent(out) :: vi +real(dp), intent(in) :: kx_it +real(dp), allocatable, dimension(:,:) :: vi_rhs +real(dp), allocatable, dimension(:) :: dvi, dlvi, duvi +integer :: j +integer, intent(in) :: total_ny +real(dp), dimension(:), intent(in) :: g1_total, g2_total, g3_total + +allocate(vi_rhs(total_ny-2,2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(dvi(total_ny-2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(dlvi(total_ny-3), duvi(total_ny-3), stat=alloc_err) +call check_alloc_err(alloc_err) + +vi = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +vi_rhs = 0.0_dp +dvi = 0.0_dp +dlvi = 0.0_dp +duvi = 0.0_dp + +do j = 2,total_ny-1 + dvi(j-1) = -kx_it**2.0_dp + g2_total(j) +end do + +do j = 2,total_ny-2 + duvi(j-1) = g3_total(j) +end do + +do j = 3,total_ny-1 + dlvi(j-2) = g1_total(j) +end do + +vi_rhs(:,1) = real (phiin(2:total_ny-1)) +vi_rhs(:,2) = aimag(phiin(2:total_ny-1)) + +call dgtsv(total_ny-2, 2, dlvi, dvi, duvi, vi_rhs, total_ny-2, info) + +vi(1) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +vi(2:total_ny-1) = cmplx(vi_rhs(:,1), vi_rhs(:,2), kind=C_DOUBLE_COMPLEX) +vi(total_ny) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + +end subroutine calc_vi_mod_MPI + +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine update_bcs_mod_MPI(phiout,vout, phiin,vin,dyv1_T_it,dyv2_T_it,dyv1_B_it,& + dyv2_B_it,V1_in, V2_in, phi1_in, phi2_in,& + total_ny, h1_end, h2_end, h3_end) + +integer, intent(in) :: total_ny +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: phiin, vin +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:), intent(out) :: phiout, vout +real(dp), dimension(2,2) :: C +real(dp) :: detC +complex(dp) :: c1, c2, c1t +complex(dp) :: dyV_T, dyV_B +real(dp), intent(in) :: dyv1_T_it,dyv2_T_it,dyv1_B_it,dyv2_B_it +real(dp), dimension(:), intent(in) :: V1_in, V2_in, phi1_in, phi2_in +real(dp), intent(in) :: h1_end, h2_end, h3_end +allocate(phiout(total_ny), vout(total_ny), stat=alloc_err) +call check_alloc_err(alloc_err) +phiout = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +vout = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +C = 0.0_dp + +C(1,1) = dyv1_T_it +C(1,2) = dyv2_T_it +C(2,1) = dyv1_B_it +C(2,2) = dyv2_B_it + +detC = C(1,1)*C(2,2) - C(1,2)*C(2,1) + +dyV_T = h1_end*vin(total_ny-2) + h2_end*vin(total_ny-1) + h3_end*vin(total_ny) +dyV_B = h1(1)*vin(1) + h2(1)*vin(2) + h3(1)*vin(3) + +! Need to negate b/c want to solve Cx = -c12. +c1 = -dyV_T +c2 = -dyV_B + +! Find c1 and c2. +if (detC == 0.0_dp) then + c1 = (0.0_dp, 0.0_dp) + c2 = (0.0_dp, 0.0_dp) +else + c1t = (C(2,2)*c1 - C(1,2)*c2) / detC + c2 = (C(1,1)*c2 - C(2,1)*c1) / detC + c1 = c1t +end if + +! Update uy and Phi. +vout = vin + c1*V1_in + c2*V2_in +phiout = phiin + c1*phi1_in + c2*phi2_in + +end subroutine update_bcs_mod_MPI + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +subroutine calc_implicit_mod_MPI(Kphi,KT, phiin,Tin, kx_it, total_ny, dynu_in, g1_total, g2_total, g3_total) + +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: phiin, Tin +complex(C_DOUBLE_COMPLEX), dimension(:), intent(out) :: Kphi, KT +real(dp), intent(in) :: kx_it +integer, intent(in) :: total_ny +real(dp), dimension(:), intent(in) :: dynu_in, g1_total, g2_total, g3_total + +Kphi = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +KT = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + +Kphi = nu0 *(-kx_it**2.0_dp*phiin + d2y_MPI(phiin, dynu_in, g1_total, g2_total, g3_total, total_ny)) +KT = kappa0*(-kx_it**2.0_dp*Tin + d2y_MPI(Tin, dynu_in, g1_total, g2_total, g3_total, total_ny)) + +end subroutine calc_implicit_mod_MPI + !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! end module time_integrators_MPI From 56d86c45ae098d8eee512d81f8205ee3b640ac95 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Fri, 30 Apr 2021 09:43:46 -0400 Subject: [PATCH 17/24] finished stage 2 --- global.f90 | 159 ++++++++++++++++++- mesh_pack.f90 | 6 +- time_integrators_MPI.f90 | 332 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 475 insertions(+), 22 deletions(-) diff --git a/global.f90 b/global.f90 index 96ca400..e0c750b 100755 --- a/global.f90 +++ b/global.f90 @@ -3,6 +3,7 @@ module global use fftw implicit none +include 'mpif.h' save ! Get double-precision type @@ -103,10 +104,9 @@ module global complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_phi, tmp_phi1, tmp_T, tmp_uy, tmp_uy1 complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_K_phi, tmp_K_T - - contains + subroutine global_params implicit none @@ -365,6 +365,73 @@ end function d1y_MPI !::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +function d1y_MPI2(vec, proc_id, num_procs) result(d1yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: vec +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: d1yvec +integer, intent(in) :: proc_id, num_procs +integer :: alloc_err, jj, n, i, mpierror +complex(C_DOUBLE_COMPLEX) :: prev_MPI, post_MPI + +n = size(vec) + +allocate(d1yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d1y." + stop +end if + +d1yvec = 0.0_dp + +if (proc_id == 0) then + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 78, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 79, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + d1yvec(1) = h1(1)*vec(1) + h2(1)*vec(2) + h3(1)*vec(3) + do jj = 2,n-1 + d1yvec(jj) = h1(jj)*vec(jj-1) + h2(jj)*vec(jj) + h3(jj)*vec(jj+1) + end do + d1yvec(n) = h1(n)*vec(n-1) + h2(n)*vec(n) + h3(n)*post_MPI +else if (proc_id == num_procs - 1) then + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 78, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 79, MPI_COMM_WORLD, mpierror) + d1yvec(1) = h1(1)*prev_MPI + h2(1)*vec(1) + h3(1)*vec(2) + do jj = 2,n-1 + d1yvec(jj) = h1(jj)*vec(jj-1) + h2(jj)*vec(jj) + h3(jj)*vec(jj+1) + end do + d1yvec(n) = h1(n)*vec(n-2) + h2(n)*vec(n-1) + h3(n)*vec(n) +else + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 78, MPI_COMM_WORLD, mpierror) + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 78, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 79, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 79, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + d1yvec(1) = h1(1)*prev_MPI + h2(1)*vec(1) + h3(1)*vec(2) + do jj = 2,n-1 + d1yvec(jj) = h1(jj)*vec(jj-1) + h2(jj)*vec(jj) + h3(jj)*vec(jj+1) + end do + d1yvec(n) = h1(n)*vec(n-1) + h2(n)*vec(n) + h3(n)*post_MPI +end if + + + +end function d1y_MPI2 + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + function d2y(vec) result(d2yvec) implicit none @@ -477,6 +544,94 @@ end function d2y_MPI !::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +function d2y_MPI2(vec, proc_id, num_procs) result(d2yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), intent(in) :: vec(:) +integer , intent(in) :: proc_id, num_procs +complex(C_DOUBLE_COMPLEX), allocatable :: d2yvec(:) +real(dp) :: d1, d2, d3, d4 +real(dp) :: ell1, ell2, ell3, ell4 +real(dp) :: dNm3, dNm2, dNm1, dN +real(dp) :: ellNm3, ellNm2, ellNm1, ellN +integer :: jj, n, mpierror +complex(C_DOUBLE_COMPLEX) :: prev_MPI, post_MPI + +n = size(vec) + +allocate(d2yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d2y." + stop +end if + +d2yvec = 0.0_dp + +d1 = -dynu(1)*(dynu(1)+dynu(2))*(dynu(1)+dynu(2)+dynu(3)) +d2 = dynu(1)*dynu(2)*(dynu(2) + dynu(3)) +d3 = -dynu(2)*dynu(3)*(dynu(1) + dynu(2)) +d4 = dynu(3)*(dynu(2)+dynu(3))*(dynu(1)+dynu(2)+dynu(3)) + +ell1 = -2.0_dp*(3.0_dp*dynu(1) + 2.0_dp*dynu(2) + dynu(3)) / d1 +ell2 = -2.0_dp*(2.0_dp*(dynu(1) + dynu(2)) + dynu(3)) / d2 +ell3 = -2.0_dp*(2.0_dp*dynu(1) + dynu(2) + dynu(3)) / d3 +ell4 = -2.0_dp*(2.0_dp*dynu(1) + dynu(2)) / d4 + +dN = dynu(Ny)*(dynu(Ny)+dynu(Ny-1))*(dynu(Ny)+dynu(Ny-1)+dynu(Ny-2)) +dNm1 = -dynu(Ny)*dynu(Ny-1)*(dynu(Ny-1)+dynu(Ny-2)) +dNm2 = dynu(Ny-1)*dynu(Ny-2)*(dynu(Ny)+dynu(Ny-1)) +dNm3 = -dynu(Ny-2)*(dynu(Ny-1)+dynu(Ny-2))*(dynu(Ny)+dynu(Ny-1)+dynu(Ny-2)) + +ellN = 2.0_dp*(3.0_dp*dynu(Ny) + 2.0_dp*dynu(Ny-1) + dynu(Ny-2)) / dN +ellNm1 = 2.0_dp*(2.0_dp*(dynu(Ny)+dynu(Ny-1)) + dynu(Ny-2)) / dNm1 +ellNm2 = 2.0_dp*(2.0_dp*dynu(Ny) + dynu(Ny-1) + dynu(Ny-2)) / dNm2 +ellNm3 = 2.0_dp*(2.0_dp*dynu(Ny) + dynu(Ny-1)) / dNm3 + +if (proc_id == 0) then + d2yvec(1) = ell1*vec(1) + ell2*vec(2) + ell3*vec(3) + ell4*vec(4) + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 76, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 77, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + do jj = 2,n-1 + d2yvec(jj) = g1(jj)*vec(jj-1) + g2(jj)*vec(jj) + g3(jj)*vec(jj+1) + end do + d2yvec(n) = g1(n)*vec(n-1) + g2(n)*vec(n) + g3(n)*post_MPI +else if (proc_id == num_procs-1) then + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 76, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 77, MPI_COMM_WORLD, mpierror) + d2yvec(1) = g1(1)*prev_MPI + g2(1)*vec(1) + g3(1)*vec(2) + do jj = 2,n-1 + d2yvec(jj) = g1(jj)*vec(jj-1) + g2(jj)*vec(jj) + g3(jj)*vec(jj+1) + end do + d2yvec(n) = ellNm3*vec(n-3) + ellNm2*vec(n-2) + ellNm1*vec(n-1) + ellN*vec(n) +else + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 76, MPI_COMM_WORLD, mpierror) + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 76, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 77, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 77, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + d2yvec(1) = g1(1)*prev_MPI + g2(1)*vec(1) + g3(1)*vec(2) + do jj = 2,n-1 + d2yvec(jj) = g1(jj)*vec(jj-1) + g2(jj)*vec(jj) + g3(jj)*vec(jj+1) + end do + d2yvec(n) = g1(n)*vec(n-1) + g2(n)*vec(n) + g3(n)*post_MPI +endif + +end function d2y_MPI2 + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + subroutine check_alloc_err(err_mess) integer, intent(in) :: err_mess diff --git a/mesh_pack.f90 b/mesh_pack.f90 index cec5b2d..965550c 100755 --- a/mesh_pack.f90 +++ b/mesh_pack.f90 @@ -1,7 +1,7 @@ module mesh_pack use global -include 'mpif.h' +! include 'mpif.h' contains @@ -107,6 +107,10 @@ subroutine dxdydz(dyj, xcoor,ycoor,zcoor) dyj(jj-1) = ycoor(jj) - ycoor(jj-1) end do +! do jj=1,numy-1 +! write(*,*) jj, dyj(jj) +! end do + end subroutine dxdydz ! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 9bbb35d..28752fa 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -8,6 +8,7 @@ module time_integrators_MPI use statistics use omp_lib + integer :: it, jt, kkt integer :: info real(dp) :: time, dtmax, dtmin, dt_old, dt_ramp, dt_final @@ -433,26 +434,324 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) finish = OMP_GET_WTIME() write(*,*) " - stage 1 mid timing: ", finish-start, "(s)" - - - call MPI_BARRIER(MPI_COMM_WORLD, mpierror) ! Compute K2hat start = OMP_GET_WTIME() call calc_explicit_MPI(2, proc_id, num_procs, proc_id_str) finish = OMP_GET_WTIME() write(*,*) " - calc_explicit(2) timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + + !::::::::::: + ! STAGE 2 :: + !::::::::::: + start = OMP_GET_WTIME() + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi1 and T1 + if (proc_id == 0) then + ! Need to pull all of the variables into local main memory. + phi_MPI(1:Ny-1) = phi(2:Ny,it) + K1hat_phi_MPI(1:Ny-1) = K1hat_phi(2:Ny,it) + K2hat_phi_MPI(1:Ny-1) = K2hat_phi(2:Ny,it) + K3hat_phi_MPI(1:Ny-1) = K3hat_phi(2:Ny,it) + K1hat_T_MPI(1:Ny-1) = K1hat_T(2:Ny,it) + K2hat_T_MPI(1:Ny-1) = K2hat_T(2:Ny,it) + K3hat_t_MPI(1:Ny-1) = K3hat_T(2:Ny,it) + K1_phi_MPI(1:Ny-1) = K1_phi(2:Ny,it) + K2_phi_MPI(1:Ny-1) = K2_phi(2:Ny,it) + K1_T_MPI(1:Ny-1) = K1_T(2:Ny,it) + K2_T_MPI(1:Ny-1) = K2_T(2:Ny,it) + T_MPI(1:Ny) = T(1:Ny,it) + + ! Receive pieces from other nodes. + do otherproc = 1,num_procs-2 + stind = otherproc * Ny + call MPI_RECV(phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + + ! Receive from last node. + stind = (num_procs-1) * Ny + call MPI_RECV(phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 54, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 55, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 56, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Call vari mod + call calc_vari_mod_MPI(tmp_phi_MPI, tmp_T_MPI, acoeffs(2,2), 2, total_ny,& + kx(it), phi_MPI,& + K1hat_phi_MPI,K2hat_phi_MPI,K3hat_phi_MPI,& + K1hat_T_MPI,K2hat_T_MPI,K3hat_T_MPI,& + K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI,& + g1_total, g2_total, g3_total,& + T_MPI) + + ! Compute v1 from phi1 + call calc_vi_mod_MPI(tmp_uy_MPI, tmp_phi_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + + ! Receive dyv1 and dyv2 + call MPI_RECV(dyv1_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 57, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(dyv2_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 58, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! Receive V1, V2, phi1, phi2 + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(V1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 59, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(V2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 60, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 61, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 62, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local variables to MPI. + V1_MPI(1:Ny) = V1(1:Ny, it) + V2_MPI(1:Ny) = V2(1:Ny, it) + phi1_MPI(1:Ny) = phi1(1:Ny, it) + phi2_MPI(1:Ny) = phi2(1:Ny, it) + + ! Receive end of h1, h2, h3 for last processor. + call MPI_RECV(h1_end_MPI, 1, MPI_DOUBLE, num_procs-1, 63, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_end_MPI, 1, MPI_DOUBLE, num_procs-1, 64, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_end_MPI, 1, MPI_DOUBLE, num_procs-1, 65, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! BOUNDAY CONDITIONS! + call update_bcs_mod_MPI(tmp_phi1_MPI,tmp_uy1_MPI, tmp_phi_MPI,tmp_uy_MPI,& + dyv1_T_it_MPI,dyv2_T_it_MPI,& + dyv1_B(it),dyv2_B(it),& + V1_MPI,V2_MPI,phi1_MPI,phi2_MPI,& + total_ny, h1_end_MPI, h2_end_MPI, h3_end_MPI) + + tmp_phi_MPI = tmp_phi1_MPI + tmp_uy_MPI = tmp_uy1_MPI + + ! Receive dynu from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + call MPI_RECV(dynu_MPI(stind), Ny, MPI_DOUBLE, otherproc, 66, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + dynu_MPI(1:Ny-1) = dynu(1:Ny-1) + + ! Implicit. + call calc_implicit_mod_MPI(tmp_K_phi_MPI,tmp_K_T_MPI, tmp_phi_MPI,tmp_T_MPI,& + kx(it), total_ny, dynu_MPI, g1_total, g2_total, g3_total) + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 67, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 68, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 69, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send K2_phi, K2_T, phii, Ti, uyi, uxi back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_K_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 70, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_K_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 71, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 72, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 73, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 74, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 75, MPI_COMM_WORLD, mpierror) + end do + + ! Set local to main node variables. + K2_phi(1:Ny,it) = tmp_K_phi_MPI(1:Ny) + K2_T(1:Ny,it) = tmp_K_T_MPI(1:Ny) + phii(1:Ny,it) = tmp_phi_MPI(1:Ny) + Ti(1:Ny,it) = tmp_T_MPI(1:Ny) + uyi(1:Ny,it) = tmp_uy_MPI(1:Ny) + uxi(1:Ny,it) = uxi_MPI(1:Ny) + + + else if (proc_id == num_procs - 1) then + ! Send data to main node + call MPI_SEND(phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send dyv1 and dyv2. + call MPI_SEND(dyv1_T(it), 1, MPI_DOUBLE, 0, 57, MPI_COMM_WORLD, mpierror) + call MPI_SEND(dyv2_T(it), 1, MPI_DOUBLE, 0, 58, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send h1,h2,h3 last elements. + call MPI_SEND(h1(Ny), 1, MPI_DOUBLE, 0, 63, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(Ny), 1, MPI_DOUBLE, 0, 64, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(Ny), 1, MPI_DOUBLE, 0, 65, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) - open(unit=9010, file="P"//proc_id_str//"uxi_real_stage1.txt", action="write", status="unknown") - open(unit=9011, file="P"//proc_id_str//"uxi_im_stage1.txt", action="write", status="unknown") + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K2_phi, K2_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + else + ! Send data to main node + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K2_phi, K2_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 2 mid timing: ", finish-start, "(s)" + ! Compute K3hat + start = OMP_GET_WTIME() + call calc_explicit_MPI(3, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(3) timing: ", finish-start, "(s)" + + + open(unit=9010, file="P"//proc_id_str//"uyi_real_stage2.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"uyi_im_stage2.txt", action="write", status="unknown") do i=1,Ny do j=1,Nx - write (9010,*) REAL(uxi(i,j)) - write (9011,*) AIMAG(uxi(i,j)) + write (9010,*) REAL(uyi(i,j)) + write (9011,*) AIMAG(uyi(i,j)) end do end do close(unit=9010) close(unit=9011) - write(*,*) "done writing uxi!" + write(*,*) "done writing uyi!" if (time == t_final) then exit @@ -508,7 +807,7 @@ subroutine calc_explicit_MPI(stage, proc_id, num_procs, proc_id_str) ! Compute dx(T) in Fourier space nlT (:,i) = kx(i)*Ti(:,i) ! Compute D2(ux) - nlphi(:,i) = -kx(i)**2.0_dp*uxi(:,i) + d2y(uxi(:,i)) + nlphi(:,i) = -kx(i)**2.0_dp*uxi(:,i) + d2y_MPI2(uxi(:,i), proc_id, num_procs) end do !$OMP END PARALLEL DO finish = OMP_GET_WTIME() @@ -551,7 +850,7 @@ subroutine calc_explicit_MPI(stage, proc_id, num_procs, proc_id_str) do i = 1,Nx ! Temperature tmp_T = Ti(:,i) - nlT(:,i) = uxi(:,i)*nlT(:,i) + uyi(:,i)*d1y(tmp_T) + nlT(:,i) = uxi(:,i)*nlT(:,i) + uyi(:,i)*d1y_MPI2(tmp_T, proc_id, num_procs) ! phi nlphi(:,i) = uxi(:,i)*phii(:,i) - uyi(:,i)*nlphi(:,i) end do @@ -685,13 +984,6 @@ subroutine calc_vari_mod_MPI(phiout,Tout, aii, stage, total_ny, kx_it, phi_in, & dphi(jt-1) = 1.0_dp - nu0 *dt*aii*(-kx_it**2.0_dp + g2_total(jt)) end do -! do i = 2,total_ny-1 -! write(*,*) i, ddT (i-1) -! end do - -! write(*,*) kappa0 ,dt , aii - - do jt = 2,total_ny-2 duT (jt-1) = -kappa0*dt*g3_total(jt)*aii duphi(jt-1) = -nu0 *dt*g3_total(jt)*aii @@ -734,6 +1026,10 @@ subroutine calc_vari_mod_MPI(phiout,Tout, aii, stage, total_ny, kx_it, phi_in, & FT(1) = FT(1) + kappa0*dt*aii*g1_total(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3_total(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + ! do i = 1,total_ny-2 + ! write(*,*) i, k2hat_phi_in(i) + ! end do + phi_rhs(:,1) = real(Fphi) phi_rhs(:,2) = aimag(Fphi) @@ -829,8 +1125,6 @@ subroutine calc_vi_mod_MPI(vi, phiin, kx_it, total_ny, g1_total, g2_total, g3_to end subroutine calc_vi_mod_MPI -!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! - !::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: subroutine update_bcs_mod_MPI(phiout,vout, phiin,vin,dyv1_T_it,dyv2_T_it,dyv1_B_it,& dyv2_B_it,V1_in, V2_in, phi1_in, phi2_in,& From 374b9f8eb9f93c9db3881d5bc1d266465aa17092 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Fri, 30 Apr 2021 10:04:00 -0400 Subject: [PATCH 18/24] finished stage 3 --- time_integrators_MPI.f90 | 306 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 304 insertions(+), 2 deletions(-) diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 28752fa..3aaaaf8 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -734,15 +734,317 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) !$OMP END PARALLEL DO finish = OMP_GET_WTIME() write(*,*) " - stage 2 mid timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) ! Compute K3hat start = OMP_GET_WTIME() call calc_explicit_MPI(3, proc_id, num_procs, proc_id_str) finish = OMP_GET_WTIME() write(*,*) " - calc_explicit(3) timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + + !::::::::::: + ! STAGE 3 :: + !::::::::::: + start = OMP_GET_WTIME() + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi1 and T1 + if (proc_id == 0) then + ! Need to pull all of the variables into local main memory. + phi_MPI(1:Ny-1) = phi(2:Ny,it) + K1hat_phi_MPI(1:Ny-1) = K1hat_phi(2:Ny,it) + K2hat_phi_MPI(1:Ny-1) = K2hat_phi(2:Ny,it) + K3hat_phi_MPI(1:Ny-1) = K3hat_phi(2:Ny,it) + K1hat_T_MPI(1:Ny-1) = K1hat_T(2:Ny,it) + K2hat_T_MPI(1:Ny-1) = K2hat_T(2:Ny,it) + K3hat_t_MPI(1:Ny-1) = K3hat_T(2:Ny,it) + K1_phi_MPI(1:Ny-1) = K1_phi(2:Ny,it) + K2_phi_MPI(1:Ny-1) = K2_phi(2:Ny,it) + K1_T_MPI(1:Ny-1) = K1_T(2:Ny,it) + K2_T_MPI(1:Ny-1) = K2_T(2:Ny,it) + T_MPI(1:Ny) = T(1:Ny,it) + + ! Receive pieces from other nodes. + do otherproc = 1,num_procs-2 + stind = otherproc * Ny + call MPI_RECV(phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + + ! Receive from last node. + stind = (num_procs-1) * Ny + call MPI_RECV(phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 54, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 55, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 56, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Call vari mod + call calc_vari_mod_MPI(tmp_phi_MPI, tmp_T_MPI, acoeffs(3,3), 3, total_ny,& + kx(it), phi_MPI,& + K1hat_phi_MPI,K2hat_phi_MPI,K3hat_phi_MPI,& + K1hat_T_MPI,K2hat_T_MPI,K3hat_T_MPI,& + K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI,& + g1_total, g2_total, g3_total,& + T_MPI) + + ! Compute v1 from phi1 + call calc_vi_mod_MPI(tmp_uy_MPI, tmp_phi_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + + ! Receive dyv1 and dyv2 + call MPI_RECV(dyv1_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 57, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(dyv2_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 58, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! Receive V1, V2, phi1, phi2 + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(V1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 59, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(V2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 60, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 61, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 62, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local variables to MPI. + V1_MPI(1:Ny) = V1(1:Ny, it) + V2_MPI(1:Ny) = V2(1:Ny, it) + phi1_MPI(1:Ny) = phi1(1:Ny, it) + phi2_MPI(1:Ny) = phi2(1:Ny, it) + + ! Receive end of h1, h2, h3 for last processor. + call MPI_RECV(h1_end_MPI, 1, MPI_DOUBLE, num_procs-1, 63, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_end_MPI, 1, MPI_DOUBLE, num_procs-1, 64, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_end_MPI, 1, MPI_DOUBLE, num_procs-1, 65, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! BOUNDAY CONDITIONS! + call update_bcs_mod_MPI(tmp_phi1_MPI,tmp_uy1_MPI, tmp_phi_MPI,tmp_uy_MPI,& + dyv1_T_it_MPI,dyv2_T_it_MPI,& + dyv1_B(it),dyv2_B(it),& + V1_MPI,V2_MPI,phi1_MPI,phi2_MPI,& + total_ny, h1_end_MPI, h2_end_MPI, h3_end_MPI) + + tmp_phi_MPI = tmp_phi1_MPI + tmp_uy_MPI = tmp_uy1_MPI + ! Receive dynu from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + call MPI_RECV(dynu_MPI(stind), Ny, MPI_DOUBLE, otherproc, 66, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + dynu_MPI(1:Ny-1) = dynu(1:Ny-1) + + ! Implicit. + call calc_implicit_mod_MPI(tmp_K_phi_MPI,tmp_K_T_MPI, tmp_phi_MPI,tmp_T_MPI,& + kx(it), total_ny, dynu_MPI, g1_total, g2_total, g3_total) + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 67, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 68, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 69, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send K3_phi, K3_T, phii, Ti, uyi, uxi back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_K_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 70, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_K_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 71, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 72, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 73, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 74, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 75, MPI_COMM_WORLD, mpierror) + end do + + ! Set local to main node variables. + K3_phi(1:Ny,it) = tmp_K_phi_MPI(1:Ny) + K3_T(1:Ny,it) = tmp_K_T_MPI(1:Ny) + phii(1:Ny,it) = tmp_phi_MPI(1:Ny) + Ti(1:Ny,it) = tmp_T_MPI(1:Ny) + uyi(1:Ny,it) = tmp_uy_MPI(1:Ny) + uxi(1:Ny,it) = uxi_MPI(1:Ny) + + + else if (proc_id == num_procs - 1) then + ! Send data to main node + call MPI_SEND(phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send dyv1 and dyv2. + call MPI_SEND(dyv1_T(it), 1, MPI_DOUBLE, 0, 57, MPI_COMM_WORLD, mpierror) + call MPI_SEND(dyv2_T(it), 1, MPI_DOUBLE, 0, 58, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send h1,h2,h3 last elements. + call MPI_SEND(h1(Ny), 1, MPI_DOUBLE, 0, 63, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(Ny), 1, MPI_DOUBLE, 0, 64, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(Ny), 1, MPI_DOUBLE, 0, 65, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K3_phi, K3_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K3_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + else + ! Send data to main node + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K3_phi, K3_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K3_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 3 mid timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + ! Compute K4hat + start = OMP_GET_WTIME() + call calc_explicit_MPI(4, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(4) timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) - open(unit=9010, file="P"//proc_id_str//"uyi_real_stage2.txt", action="write", status="unknown") - open(unit=9011, file="P"//proc_id_str//"uyi_im_stage2.txt", action="write", status="unknown") + open(unit=9010, file="P"//proc_id_str//"uyi_real_stage3.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"uyi_im_stage3.txt", action="write", status="unknown") do i=1,Ny do j=1,Nx write (9010,*) REAL(uyi(i,j)) From 826d2c2ea23c5f61ff9916daa8d8e0af39de38e0 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Fri, 30 Apr 2021 10:19:12 -0400 Subject: [PATCH 19/24] finished update sols --- time_integrators_MPI.f90 | 43 +++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 3aaaaf8..1980417 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -1043,17 +1043,50 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) write(*,*) " - calc_explicit(4) timing: ", finish-start, "(s)" call MPI_BARRIER(MPI_COMM_WORLD, mpierror) - open(unit=9010, file="P"//proc_id_str//"uyi_real_stage3.txt", action="write", status="unknown") - open(unit=9011, file="P"//proc_id_str//"uyi_im_stage3.txt", action="write", status="unknown") + ! UPDATE SOLUTIONS + if (proc_id == 0) then + ! Get phi + phi(2:Ny,:) = phi(2:Ny,:) + dt*(b(1)*(K1_phi(2:Ny,:) + K2hat_phi(2:Ny,:)) + & + & b(2)*(K2_phi(2:Ny,:) + K3hat_phi(2:Ny,:)) + & + & b(3)*(K3_phi(2:Ny,:) + K4hat_phi(2:Ny,:))) + + ! Get temperature + T(2:Ny,:) = T(2:Ny,:) + dt*(b(1)*(K1_T(2:Ny,:) + K2hat_T(2:Ny,:)) + & + & b(2)*(K2_T(2:Ny,:) + K3hat_T(2:Ny,:)) + & + & b(3)*(K3_T(2:Ny,:) + K4hat_T(2:Ny,:))) + else if (proc_id == num_procs - 1) then + ! Get phi + phi(1:Ny-1,:) = phi(1:Ny-1,:) + dt*(b(1)*(K1_phi(1:Ny-1,:) + K2hat_phi(1:Ny-1,:)) + & + & b(2)*(K2_phi(1:Ny-1,:) + K3hat_phi(1:Ny-1,:)) + & + & b(3)*(K3_phi(1:Ny-1,:) + K4hat_phi(1:Ny-1,:))) + + ! Get temperature + T(1:Ny-1,:) = T(1:Ny-1,:) + dt*(b(1)*(K1_T(1:Ny-1,:) + K2hat_T(1:Ny-1,:)) + & + & b(2)*(K2_T(1:Ny-1,:) + K3hat_T(1:Ny-1,:)) + & + & b(3)*(K3_T(1:Ny-1,:) + K4hat_T(1:Ny-1,:))) + else + ! Get phi + phi(1:Ny,:) = phi(1:Ny,:) + dt*(b(1)*(K1_phi(1:Ny,:) + K2hat_phi(1:Ny,:)) + & + & b(2)*(K2_phi(1:Ny,:) + K3hat_phi(1:Ny,:)) + & + & b(3)*(K3_phi(1:Ny,:) + K4hat_phi(1:Ny,:))) + + ! Get temperature + T(1:Ny,:) = T(1:Ny,:) + dt*(b(1)*(K1_T(1:Ny,:) + K2hat_T(1:Ny,:)) + & + & b(2)*(K2_T(1:Ny,:) + K3hat_T(1:Ny,:)) + & + & b(3)*(K3_T(1:Ny,:) + K4hat_T(1:Ny,:))) + end if + + open(unit=9010, file="P"//proc_id_str//"phi_real_update.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"phi_im_update.txt", action="write", status="unknown") do i=1,Ny do j=1,Nx - write (9010,*) REAL(uyi(i,j)) - write (9011,*) AIMAG(uyi(i,j)) + write (9010,*) REAL(phi(i,j)) + write (9011,*) AIMAG(phi(i,j)) end do end do close(unit=9010) close(unit=9011) - write(*,*) "done writing uyi!" + write(*,*) "done writing phi!" if (time == t_final) then exit From de7636462650295e370ec1bc95032620ab2620c2 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Fri, 30 Apr 2021 12:15:52 -0400 Subject: [PATCH 20/24] finished ux uy updates --- time_integrators_MPI.f90 | 97 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 7 deletions(-) diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 1980417..493a821 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -38,7 +38,7 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) real(dp) :: start, finish real(dp) :: start_overall, finish_overall integer :: nthreads, myid, stind -complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_phi_MPI, tmp_T_MPI, phi_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_phi_MPI, tmp_T_MPI, phi_MPI, phi_update_MPI complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_phi_MPI, K2hat_phi_MPI, K3hat_phi_MPI complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_T_MPI, K2hat_T_MPI, K3hat_T_MPI complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI @@ -90,7 +90,7 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) if (proc_id == 0) then total_ny = Ny * num_procs - allocate(tmp_phi_MPI(total_ny), tmp_T_MPI(total_ny), phi_MPI(total_ny-2), stat=alloc_err) + allocate(tmp_phi_MPI(total_ny), tmp_T_MPI(total_ny), phi_MPI(total_ny-2), phi_update_MPI(total_ny), stat=alloc_err) call check_alloc_err(alloc_err) allocate(K1hat_phi_MPI(total_ny-2), K2hat_phi_MPI(total_ny-2), K3hat_phi_MPI(total_ny-2), stat=alloc_err) call check_alloc_err(alloc_err) @@ -1043,6 +1043,7 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) write(*,*) " - calc_explicit(4) timing: ", finish-start, "(s)" call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + start = OMP_GET_WTIME() ! UPDATE SOLUTIONS if (proc_id == 0) then ! Get phi @@ -1075,18 +1076,100 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) & b(2)*(K2_T(1:Ny,:) + K3hat_T(1:Ny,:)) + & & b(3)*(K3_T(1:Ny,:) + K4hat_T(1:Ny,:))) end if + + ! Get ux and uy + if (proc_id == 0) then + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 80, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 81, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 82, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 83, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 84, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 85, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + else + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 80, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 81, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 82, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 83, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 84, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 85, MPI_COMM_WORLD, mpierror) + end if + !$OMP PARALLEL DO private(tmp_uy, it) schedule(dynamic) + do it = 1,Nx + ! Only call from single node. + if (proc_id == 0) then + + do otherproc= 1, num_procs-1 + stind = otherproc * Ny + 1 + ! Receive phi + call MPI_RECV(phi_update_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 86, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local phi + phi_update_MPI(1:Ny) = phi(1:Ny,it) + + ! Solve for v + call calc_vi_mod_MPI(tmp_uy_MPI, phi_update_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + ! Solve for u + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send uy, ux back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 87, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 88, MPI_COMM_WORLD, mpierror) + end do + ! Set local versions. + uy(1:Ny,it) = tmp_uy_MPI(1:Ny) + ux(1:Ny,it) = uxi_MPI(1:Ny) + + else + ! Send phi + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 86, MPI_COMM_WORLD, mpierror) + + ! Receive uy, ux from main node. + call MPI_RECV(uy(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 87,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(ux(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 88,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - update sols timing: ", finish-start, "(s)" - open(unit=9010, file="P"//proc_id_str//"phi_real_update.txt", action="write", status="unknown") - open(unit=9011, file="P"//proc_id_str//"phi_im_update.txt", action="write", status="unknown") + open(unit=9010, file="P"//proc_id_str//"uy_real_update.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"uy_im_update.txt", action="write", status="unknown") do i=1,Ny do j=1,Nx - write (9010,*) REAL(phi(i,j)) - write (9011,*) AIMAG(phi(i,j)) + write (9010,*) REAL(uy(i,j)) + write (9011,*) AIMAG(uy(i,j)) end do end do close(unit=9010) close(unit=9011) - write(*,*) "done writing phi!" + write(*,*) "done writing uy!" if (time == t_final) then exit From 91dfedb5e99683bac66827fc77a57c436c9ecc6e Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Fri, 30 Apr 2021 13:27:32 -0400 Subject: [PATCH 21/24] finished MPI version. --- Makefile | 4 ++-- input.data | 4 ++-- time_integrators_MPI.f90 | 33 +++++++++++++++++++++------------ 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 6db4fb2..8054957 100755 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ PROGRAMS = $(MAIN).exe all: $(PROGRAMS) $(PROGRAMS) : $(OBJECTS) - $(FC) -fopenmp $(LDFLAGS) -o $(PROGRAMS) $(OBJECTS) $(LIBFLAGS1) -llapack -lblas $(LIBFLAGS2) -lfftw3 -lm + $(FC) -fopenmp -fallow-argument-mismatch $(LDFLAGS) -o $(PROGRAMS) $(OBJECTS) $(LIBFLAGS1) -llapack -lblas $(LIBFLAGS2) -lfftw3 -lm fftw.o : fftw.f90 $(FC) $(FFLAGS) fftw.f90 @@ -54,7 +54,7 @@ time_integrators.o : time_integrators.f90 $(FC) -fopenmp $(FFLAGS) time_integrators.f90 time_integrators_MPI.o : time_integrators_MPI.f90 - $(FC) -fopenmp $(FFLAGS) time_integrators_MPI.f90 + $(FC) -fopenmp -fallow-argument-mismatch $(FFLAGS) time_integrators_MPI.f90 jacobians.o : jacobians.f90 $(FC) $(FFLAGS) jacobians.f90 diff --git a/input.data b/input.data index cc633a4..0a17907 100755 --- a/input.data +++ b/input.data @@ -1,6 +1,6 @@ 7.0, 1.5585, 0.1 -5.0e+03, 1, 1.1 -5.0, 0.1 +5.0e+04, 1, 1.1 +10.0, 0.1 -1.0, 1.0 64, 48, 1 0, 0, 0 diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 493a821..905cfc0 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -1158,23 +1158,32 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) !$OMP END PARALLEL DO finish = OMP_GET_WTIME() write(*,*) " - update sols timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) - open(unit=9010, file="P"//proc_id_str//"uy_real_update.txt", action="write", status="unknown") - open(unit=9011, file="P"//proc_id_str//"uy_im_update.txt", action="write", status="unknown") - do i=1,Ny - do j=1,Nx - write (9010,*) REAL(uy(i,j)) - write (9011,*) AIMAG(uy(i,j)) - end do - end do - close(unit=9010) - close(unit=9011) - write(*,*) "done writing uy!" + ! open(unit=9010, file="P"//proc_id_str//"uy_real_update.txt", action="write", status="unknown") + ! open(unit=9011, file="P"//proc_id_str//"uy_im_update.txt", action="write", status="unknown") + ! do i=1,Ny + ! do j=1,Nx + ! write (9010,*) REAL(uy(i,j)) + ! write (9011,*) AIMAG(uy(i,j)) + ! end do + ! end do + ! close(unit=9010) + ! close(unit=9011) + ! write(*,*) "done writing uy!" + + + if (wvtk) then + call write_to_vtk(nti, .false., proc_id_str) ! false = Fourier space + end if + + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + write(*,*) "proc ", proc_id_str, " write vtk complete for t", time + if (time == t_final) then exit end if - end do From 6a29dbebfe12ac96e48818478db16f41998c86de Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Wed, 5 May 2021 18:09:33 -0400 Subject: [PATCH 22/24] finished MPI version. --- time_integrators_MPI.f90 | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 905cfc0..c9c8b5f 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -1182,6 +1182,17 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) write(*,*) "proc ", proc_id_str, " write vtk complete for t", time if (time == t_final) then + open(unit=9010, file="P"//proc_id_str//"T_real_update.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"T_im_update.txt", action="write", status="unknown") + do i=1,Ny + do j=1,Nx + write (9010,*) REAL(T(i,j)) + write (9011,*) AIMAG(T(i,j)) + end do + end do + close(unit=9010) + close(unit=9011) + write(*,*) "done writing T!" exit end if From 9bf9348f1a0816b60852f1fa7ca772a15a9dfd45 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 6 May 2021 10:28:12 -0400 Subject: [PATCH 23/24] small debug --- input.data | 4 ++-- time_integrators.f90 | 14 +++++++++++++- time_integrators_MPI.f90 | 23 ++++++++++++----------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/input.data b/input.data index 0a17907..4b2b6c0 100755 --- a/input.data +++ b/input.data @@ -1,6 +1,6 @@ 7.0, 1.5585, 0.1 -5.0e+04, 1, 1.1 -10.0, 0.1 +5.0e+03, 1, 1.1 +0.1, 0.1 -1.0, 1.0 64, 48, 1 0, 0, 0 diff --git a/time_integrators.f90 b/time_integrators.f90 index 7cd60c9..f4728e9 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -29,7 +29,7 @@ subroutine imex_rk(vtk_print, save_nusselt) logical, optional, intent(in) :: save_nusselt integer :: nti -integer :: nprint +integer :: nprint, i, j logical :: wvtk real(dp) :: nusselt_num real(dp) :: start, finish @@ -72,6 +72,18 @@ subroutine imex_rk(vtk_print, save_nusselt) ! Format for writing out single values. 1000 format(E25.16E3) +open(unit=9010, file="T_real_update.txt", action="write", status="unknown") +open(unit=9011, file="T_im_update.txt", action="write", status="unknown") +do i=1,Ny + do j=1,Nx + write (9010,*) REAL(T(i,j)) + write (9011,*) AIMAG(T(i,j)) + end do +end do +close(unit=9010) +close(unit=9011) +write(*,*) "done writing T!" + ! Time integration do ! while (time < t_final) start_overall = OMP_GET_WTIME() diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index c9c8b5f..5b5c707 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -116,6 +116,18 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) call MPI_BARRIER(MPI_COMM_WORLD, mpierror) +open(unit=9010, file="P"//proc_id_str//"T_real_update.txt", action="write", status="unknown") +open(unit=9011, file="P"//proc_id_str//"T_im_update.txt", action="write", status="unknown") +do i=1,Ny + do j=1,Nx + write (9010,*) REAL(T(i,j)) + write (9011,*) AIMAG(T(i,j)) + end do +end do +close(unit=9010) +close(unit=9011) +write(*,*) "done writing T!" + ! Time integration do ! while (time < t_final) start_overall = OMP_GET_WTIME() @@ -1182,17 +1194,6 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) write(*,*) "proc ", proc_id_str, " write vtk complete for t", time if (time == t_final) then - open(unit=9010, file="P"//proc_id_str//"T_real_update.txt", action="write", status="unknown") - open(unit=9011, file="P"//proc_id_str//"T_im_update.txt", action="write", status="unknown") - do i=1,Ny - do j=1,Nx - write (9010,*) REAL(T(i,j)) - write (9011,*) AIMAG(T(i,j)) - end do - end do - close(unit=9010) - close(unit=9011) - write(*,*) "done writing T!" exit end if From 875100aff8747df737cbb2299a317dbd88fa01c9 Mon Sep 17 00:00:00 2001 From: Michael Neuder Date: Thu, 6 May 2021 10:47:28 -0400 Subject: [PATCH 24/24] small debug --- time_integrators.f90 | 22 +++++++++++----------- time_integrators_MPI.f90 | 30 ++++++++++++++---------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/time_integrators.f90 b/time_integrators.f90 index f4728e9..99d4ff1 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -72,17 +72,6 @@ subroutine imex_rk(vtk_print, save_nusselt) ! Format for writing out single values. 1000 format(E25.16E3) -open(unit=9010, file="T_real_update.txt", action="write", status="unknown") -open(unit=9011, file="T_im_update.txt", action="write", status="unknown") -do i=1,Ny - do j=1,Nx - write (9010,*) REAL(T(i,j)) - write (9011,*) AIMAG(T(i,j)) - end do -end do -close(unit=9010) -close(unit=9011) -write(*,*) "done writing T!" ! Time integration do ! while (time < t_final) @@ -274,6 +263,17 @@ subroutine imex_rk(vtk_print, save_nusselt) if (time == t_final) then + open(unit=9010, file="T_real_update.txt", action="write", status="unknown") + open(unit=9011, file="T_im_update.txt", action="write", status="unknown") + do i=1,Ny + do j=1,Nx + write (9010,*) REAL(T(i,j)) + write (9011,*) AIMAG(T(i,j)) + end do + end do + close(unit=9010) + close(unit=9011) + write(*,*) "done writing T!" exit end if diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 index 5b5c707..73e48ce 100644 --- a/time_integrators_MPI.f90 +++ b/time_integrators_MPI.f90 @@ -116,17 +116,6 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) call MPI_BARRIER(MPI_COMM_WORLD, mpierror) -open(unit=9010, file="P"//proc_id_str//"T_real_update.txt", action="write", status="unknown") -open(unit=9011, file="P"//proc_id_str//"T_im_update.txt", action="write", status="unknown") -do i=1,Ny - do j=1,Nx - write (9010,*) REAL(T(i,j)) - write (9011,*) AIMAG(T(i,j)) - end do -end do -close(unit=9010) -close(unit=9011) -write(*,*) "done writing T!" ! Time integration do ! while (time < t_final) @@ -1184,7 +1173,20 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) ! close(unit=9011) ! write(*,*) "done writing uy!" - + if (time == t_final) then + open(unit=9010, file="P"//proc_id_str//"T_real_update.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"T_im_update.txt", action="write", status="unknown") + do i=1,Ny + do j=1,Nx + write (9010,*) REAL(T(i,j)) + write (9011,*) AIMAG(T(i,j)) + end do + end do + close(unit=9010) + close(unit=9011) + write(*,*) "done writing T!" + exit + end if if (wvtk) then call write_to_vtk(nti, .false., proc_id_str) ! false = Fourier space @@ -1192,10 +1194,6 @@ subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) call MPI_BARRIER(MPI_COMM_WORLD, mpierror) write(*,*) "proc ", proc_id_str, " write vtk complete for t", time - - if (time == t_final) then - exit - end if end do