diff --git a/Makefile b/Makefile index 684c2f9..8054957 100755 --- a/Makefile +++ b/Makefile @@ -1,20 +1,21 @@ -FC=gfortran +FC=mpif90 FFLAGS= -c -O3 LIBFLAGS2 = -L/usr/local/fftw/lib LDFLAGS = -I/usr/local/fftw/include -#MAIN = Ra_loop -#MAIN = Ra_loop_no_opt -MAIN = time_loop +# MAIN = Ra_loop +# MAIN = Ra_loop_no_opt +# MAIN = time_loop +MAIN = time_loop_MPI -OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o +OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o time_integrators_MPI.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o PROGRAMS = $(MAIN).exe all: $(PROGRAMS) $(PROGRAMS) : $(OBJECTS) - $(FC) -fopenmp $(LDFLAGS) -o $(PROGRAMS) $(OBJECTS) $(LIBFLAGS1) -llapack -lblas $(LIBFLAGS2) -lfftw3 -lm + $(FC) -fopenmp -fallow-argument-mismatch $(LDFLAGS) -o $(PROGRAMS) $(OBJECTS) $(LIBFLAGS1) -llapack -lblas $(LIBFLAGS2) -lfftw3 -lm fftw.o : fftw.f90 $(FC) $(FFLAGS) fftw.f90 @@ -52,6 +53,9 @@ statistics.o : statistics.f90 time_integrators.o : time_integrators.f90 $(FC) -fopenmp $(FFLAGS) time_integrators.f90 +time_integrators_MPI.o : time_integrators_MPI.f90 + $(FC) -fopenmp -fallow-argument-mismatch $(FFLAGS) time_integrators_MPI.f90 + jacobians.o : jacobians.f90 $(FC) $(FFLAGS) jacobians.f90 diff --git a/bc_setup.f90 b/bc_setup.f90 index 10e412d..0c127eb 100755 --- a/bc_setup.f90 +++ b/bc_setup.f90 @@ -2,8 +2,10 @@ module bc_setup use global use write_pack +use mesh_pack implicit none +! include 'mpif.h' contains @@ -138,4 +140,220 @@ subroutine init_bc(aii) end subroutine init_bc +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str) + +implicit none + +real(dp), intent(in) :: aii +integer, intent(in) :: proc_id, num_procs +character(3), optional, intent(in) :: proc_id_str +real(dp) :: pnu, qnu +real(dp) :: wavex + +real(dp), allocatable, dimension(:) :: d, dl, du +real(dp), allocatable, dimension(:) :: phi1_b, phi1_t +real(dp), allocatable, dimension(:) :: g1_total, g2_total, g3_total +real(dp), allocatable, dimension(:) :: recv_col, recv_col2, last_row_recv +real(dp), allocatable, dimension(:) :: phi2_b, phi2_t +real(dp), allocatable, dimension(:,:) :: Fphi12, FV12 + + +integer :: ii, jj, i, total_ny, otherproc, start +integer :: info, mpierror + +! Only do allocation on main node. +if(proc_id == 0) then + total_ny = Ny * num_procs + + ! Allocate local variables + allocate(Fphi12(total_ny-2,2), FV12(total_ny-2,2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(d(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(dl(total_ny-3), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(du(total_ny-3), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(phi1_b(Nx), phi1_t(Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(phi2_b(Nx), phi2_t(Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + + ! Need to pull all of g1, g2, g3 onto main node for calculations. + allocate(g1_total(total_ny), g2_total(total_ny), g3_total(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + start = otherproc * Ny + 1 + call MPI_RECV(g1_total(start), Ny, MPI_DOUBLE, otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(start), Ny, MPI_DOUBLE, otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(start), Ny, MPI_DOUBLE, otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + Fphi12 = 0.0_dp + FV12 = 0.0_dp + d = 0.0_dp + dl = 0.0_dp + du = 0.0_dp + phi1_b = 1.0_dp + phi1_t = 0.0_dp + phi2_b = 0.0_dp + phi2_t = 1.0_dp + + phi1_b(1) = 0.0_dp + phi2_t(1) = 0.0_dp + + ! Compute phi1 and phi2 from D2 phi_i = 0 where + ! D2 = -(alpha*kx)^2 + dy^2 and phi1 satisfies the BCs + ! phi1 = 1 at the bottom wall and phi1 = 0 at the top wall. + ! phi2 satisfies phi2 = 0 at the bottom wall and phi2 = 1 + ! at the top wall. + + ! A comment on how these arrays are formed. Since we are only + ! dealing with Dirichlet conditions at the walls, we only need arrays of size + ! Ny-2, i.e. we don't need the values at the walls. Thus, in the calculations + ! below, F(1) would correspond to point number 2 and F(Ny-2) would correspond + ! to point Ny-1. + + ! Some parameters + pnu = nu0*dt*aii + + do ii = 1,Nx + + Fphi12 = 0.0_dp + + if (abs(kx(ii)/alpha) > Nf/2) then + phi1_b(ii) = 0.0_dp + phi2_t(ii) = 0.0_dp + end if + + wavex = kx(ii) + + qnu = 1.0_dp + pnu*wavex**2.0_dp + + do jj = 2,total_ny-1 + d(jj-1) = qnu - pnu*g2_total(jj) + end do + + do jj = 2,total_ny-2 + du(jj-1) = -pnu*g3_total(jj) + end do + + do jj = 3,total_ny-1 + dl(jj-2) = -pnu*g1_total(jj) + end do + + Fphi12(1,1) = pnu*g1_total(2)*phi1_b(ii) + Fphi12(total_ny-2,2) = pnu*g3_total(total_ny-1)*phi2_t(ii) + + ! Solve the system Aphi phi = Fphi + call dgtsv(total_ny-2, 2, dl, d, du, Fphi12, total_ny-2, info) + + ! Put phi1 and phi2 together in main node. + phi1(1,ii) = phi1_b(ii) + phi1(2:Ny,ii) = Fphi12(1:Ny-1,1) + phi2(1,ii) = 0.0_dp + phi2(2:Ny,ii) = Fphi12(1:Ny-1,2) + + ! Send phi1 and phi2 columns to each node. + do otherproc = 1,num_procs-1 + start = otherproc * Ny + call MPI_SEND(Fphi12(start,1), Ny, MPI_DOUBLE, otherproc, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(Fphi12(start,2), Ny, MPI_DOUBLE, otherproc, 46, MPI_COMM_WORLD, mpierror) + end do + + ! Getting V1 and V2 initialized. + FV12(:,1) = Fphi12(:,1) + FV12(:,2) = Fphi12(:,2) + + do jj = 2,total_ny-1 + d(jj-1) = -wavex**2.0_dp + g2_total(jj) + end do + + do jj = 2,total_ny-2 + du(jj-1) = g3_total(jj) + end do + + do jj = 3,total_ny-1 + dl(jj-2) = g1_total(jj) + end do + + call dgtsv(total_ny-2, 2, dl, d, du, FV12, total_ny-2, info) + + ! Place V1, V2 in main node memory. + V1(2:Ny,ii) = FV12(1:Ny-1,1) + V2(2:Ny,ii) = FV12(1:Ny-1,2) + + ! Send V1 and V1 columns to each node. + do otherproc = 1,num_procs-1 + start = otherproc * Ny + call MPI_SEND(FV12(start,1), Ny, MPI_DOUBLE, otherproc, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(FV12(start,2), Ny, MPI_DOUBLE, otherproc, 49, MPI_COMM_WORLD, mpierror) + end do + + end do + + ! Send last row to final proc. + call MPI_SEND(phi2_t(1), Nx, MPI_DOUBLE, num_procs-1, 47, MPI_COMM_WORLD, mpierror) + +else + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 44, MPI_COMM_WORLD, mpierror) + + allocate(recv_col(Ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(recv_col2(Ny), stat=alloc_err) + call check_alloc_err(alloc_err) + + ! Receive cols of phi1 and phi2. + do ii = 1,Nx + call MPI_RECV(recv_col, Ny, MPI_DOUBLE, 0, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(recv_col2, Ny, MPI_DOUBLE, 0, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + phi1(1:Ny, ii) = recv_col + phi2(1:Ny, ii) = recv_col2 + end do + + ! Receive cols of V1 and V2. + do ii = 1,Nx + call MPI_RECV(recv_col, Ny, MPI_DOUBLE, 0, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(recv_col2, Ny, MPI_DOUBLE, 0, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + V1(1:Ny, ii) = recv_col + V2(1:Ny, ii) = recv_col2 + end do + + ! Handle boundary of last processor. + if (proc_id == num_procs - 1) then + allocate(last_row_recv(Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + call MPI_RECV(last_row_recv, Nx, MPI_DOUBLE, 0, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + phi1(Ny,1:Nx) = 0.0_dp + phi2(Ny,1:Nx) = last_row_recv + V1(Ny, 1:Nx) = 0.0_dp + V2(Ny, 1:Nx) = 0.0_dp + end if +end if + +! Calculate wall derivative on first and last processor. +if (proc_id == 0) then + do ii = 1,Nx + dyv1_B(ii) = h1(1)*V1(1,ii) + h2(1)*V1(2,ii) + h3(1)*V1(3,ii) + dyv2_B(ii) = h1(1)*V2(1,ii) + h2(1)*V2(2,ii) + h3(1)*V2(3,ii) + end do +else if (proc_id == num_procs - 1) then + do ii = 1,Nx + dyv1_T(ii) = h1(Ny)*V1(Ny-2,ii) + h2(Ny)*V1(Ny-1,ii) + h3(Ny)*V1(Ny,ii) + dyv2_T(ii) = h1(Ny)*V2(Ny-2,ii) + h2(Ny)*V2(Ny-1,ii) + h3(Ny)*V2(Ny,ii) + end do +end if + +end subroutine init_bc_MPI + end module bc_setup diff --git a/global.f90 b/global.f90 index e94d8d8..e0c750b 100755 --- a/global.f90 +++ b/global.f90 @@ -3,6 +3,7 @@ module global use fftw implicit none +include 'mpif.h' save ! Get double-precision type @@ -103,10 +104,9 @@ module global complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_phi, tmp_phi1, tmp_T, tmp_uy, tmp_uy1 complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_K_phi, tmp_K_T - - contains + subroutine global_params implicit none @@ -314,7 +314,7 @@ function d1y(vec) result(d1yvec) complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: vec complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: d1yvec -integer :: alloc_err, jj, n +integer :: alloc_err, jj, n, i n = size(vec) @@ -336,6 +336,102 @@ end function d1y !::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +function d1y_MPI(vec, h1_total, h2_total, h3_total) result(d1yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: vec +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: d1yvec +real(dp), dimension(:), intent(in) :: h1_total, h2_total, h3_total +integer :: alloc_err, jj, n, i + +n = size(vec) + +allocate(d1yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d1y." + stop +end if + +d1yvec = 0.0_dp + +d1yvec(1) = h1_total(1)*vec(1) + h2_total(1)*vec(2) + h3_total(1)*vec(3) +do jj = 2,n-1 + d1yvec(jj) = h1_total(jj)*vec(jj-1) + h2_total(jj)*vec(jj) + h3_total(jj)*vec(jj+1) +end do +d1yvec(n) = h1_total(n)*vec(n-2) + h2_total(n)*vec(n-1) + h3_total(n)*vec(n) + +end function d1y_MPI + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +function d1y_MPI2(vec, proc_id, num_procs) result(d1yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: vec +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: d1yvec +integer, intent(in) :: proc_id, num_procs +integer :: alloc_err, jj, n, i, mpierror +complex(C_DOUBLE_COMPLEX) :: prev_MPI, post_MPI + +n = size(vec) + +allocate(d1yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d1y." + stop +end if + +d1yvec = 0.0_dp + +if (proc_id == 0) then + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 78, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 79, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + d1yvec(1) = h1(1)*vec(1) + h2(1)*vec(2) + h3(1)*vec(3) + do jj = 2,n-1 + d1yvec(jj) = h1(jj)*vec(jj-1) + h2(jj)*vec(jj) + h3(jj)*vec(jj+1) + end do + d1yvec(n) = h1(n)*vec(n-1) + h2(n)*vec(n) + h3(n)*post_MPI +else if (proc_id == num_procs - 1) then + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 78, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 79, MPI_COMM_WORLD, mpierror) + d1yvec(1) = h1(1)*prev_MPI + h2(1)*vec(1) + h3(1)*vec(2) + do jj = 2,n-1 + d1yvec(jj) = h1(jj)*vec(jj-1) + h2(jj)*vec(jj) + h3(jj)*vec(jj+1) + end do + d1yvec(n) = h1(n)*vec(n-2) + h2(n)*vec(n-1) + h3(n)*vec(n) +else + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 78, MPI_COMM_WORLD, mpierror) + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 78, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 79, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 79, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + d1yvec(1) = h1(1)*prev_MPI + h2(1)*vec(1) + h3(1)*vec(2) + do jj = 2,n-1 + d1yvec(jj) = h1(jj)*vec(jj-1) + h2(jj)*vec(jj) + h3(jj)*vec(jj+1) + end do + d1yvec(n) = h1(n)*vec(n-1) + h2(n)*vec(n) + h3(n)*post_MPI +end if + + + +end function d1y_MPI2 + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + function d2y(vec) result(d2yvec) implicit none @@ -389,6 +485,153 @@ function d2y(vec) result(d2yvec) end function d2y +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +function d2y_MPI(vec, dynu_in, g1_total, g2_total, g3_total, total_ny) result(d2yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), intent(in) :: vec(:) +complex(C_DOUBLE_COMPLEX), allocatable :: d2yvec(:) +real(dp), dimension(:), intent(in) :: dynu_in, g1_total, g2_total, g3_total +integer, intent(in) :: total_ny +real(dp) :: d1, d2, d3, d4 +real(dp) :: ell1, ell2, ell3, ell4 +real(dp) :: dNm3, dNm2, dNm1, dN +real(dp) :: ellNm3, ellNm2, ellNm1, ellN +integer :: jj, n + +n = size(vec) + +allocate(d2yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d2y." + stop +end if + +d2yvec = 0.0_dp + +d1 = -dynu_in(1)*(dynu_in(1)+dynu_in(2))*(dynu_in(1)+dynu_in(2)+dynu_in(3)) +d2 = dynu_in(1)*dynu_in(2)*(dynu_in(2) + dynu_in(3)) +d3 = -dynu_in(2)*dynu_in(3)*(dynu_in(1) + dynu_in(2)) +d4 = dynu_in(3)*(dynu_in(2)+dynu_in(3))*(dynu_in(1)+dynu_in(2)+dynu_in(3)) + +ell1 = -2.0_dp*(3.0_dp*dynu_in(1) + 2.0_dp*dynu_in(2) + dynu_in(3)) / d1 +ell2 = -2.0_dp*(2.0_dp*(dynu_in(1) + dynu_in(2)) + dynu_in(3)) / d2 +ell3 = -2.0_dp*(2.0_dp*dynu_in(1) + dynu_in(2) + dynu_in(3)) / d3 +ell4 = -2.0_dp*(2.0_dp*dynu_in(1) + dynu_in(2)) / d4 + +dN = dynu_in(total_ny-1)*(dynu_in(total_ny-1)+dynu_in(total_ny-2))*(dynu_in(total_ny-1)+dynu_in(total_ny-2)+dynu_in(total_ny-3)) +dNm1 = -dynu_in(total_ny-1)*dynu_in(total_ny-2)*(dynu_in(total_ny-2)+dynu_in(total_ny-3)) +dNm2 = dynu_in(total_ny-2)*dynu_in(total_ny-3)*(dynu_in(total_ny-1)+dynu_in(total_ny-2)) +dNm3 = -dynu_in(total_ny-3)*(dynu_in(total_ny-2)+dynu_in(total_ny-3))*(dynu_in(total_ny-1)+dynu_in(total_ny-2)+dynu_in(total_ny-3)) + +ellN = 2.0_dp*(3.0_dp*dynu_in(total_ny-1) + 2.0_dp*dynu_in(total_ny-2) + dynu_in(total_ny-3)) / dN +ellNm1 = 2.0_dp*(2.0_dp*(dynu_in(total_ny-1)+dynu_in(total_ny-2)) + dynu_in(total_ny-3)) / dNm1 +ellNm2 = 2.0_dp*(2.0_dp*dynu_in(total_ny-1) + dynu_in(total_ny-2) + dynu_in(total_ny-3)) / dNm2 +ellNm3 = 2.0_dp*(2.0_dp*dynu_in(total_ny-1) + dynu_in(total_ny-2)) / dNm3 + +!d2yvec(1) = g1(1)*vec(1) + g2(1)*vec(2) + g3(1)*vec(3) +d2yvec(1) = ell1*vec(1) + ell2*vec(2) + ell3*vec(3) + ell4*vec(4) +do jj = 2,n-1 + d2yvec(jj) = g1_total(jj)*vec(jj-1) + g2_total(jj)*vec(jj) + g3_total(jj)*vec(jj+1) +end do +!d2yvec(n) = g1(n)*vec(n-2) + g2(n)*vec(n-1) + g3(n)*vec(n) +d2yvec(n) = ellNm3*vec(n-3) + ellNm2*vec(n-2) + & + &ellNm1*vec(n-1) + ellN*vec(n) + +end function d2y_MPI + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +function d2y_MPI2(vec, proc_id, num_procs) result(d2yvec) + +implicit none + +complex(C_DOUBLE_COMPLEX), intent(in) :: vec(:) +integer , intent(in) :: proc_id, num_procs +complex(C_DOUBLE_COMPLEX), allocatable :: d2yvec(:) +real(dp) :: d1, d2, d3, d4 +real(dp) :: ell1, ell2, ell3, ell4 +real(dp) :: dNm3, dNm2, dNm1, dN +real(dp) :: ellNm3, ellNm2, ellNm1, ellN +integer :: jj, n, mpierror +complex(C_DOUBLE_COMPLEX) :: prev_MPI, post_MPI + +n = size(vec) + +allocate(d2yvec(n), stat=alloc_err) +if (alloc_err /= 0) then + write(*,*) "ERROR: Allocation problem in d2y." + stop +end if + +d2yvec = 0.0_dp + +d1 = -dynu(1)*(dynu(1)+dynu(2))*(dynu(1)+dynu(2)+dynu(3)) +d2 = dynu(1)*dynu(2)*(dynu(2) + dynu(3)) +d3 = -dynu(2)*dynu(3)*(dynu(1) + dynu(2)) +d4 = dynu(3)*(dynu(2)+dynu(3))*(dynu(1)+dynu(2)+dynu(3)) + +ell1 = -2.0_dp*(3.0_dp*dynu(1) + 2.0_dp*dynu(2) + dynu(3)) / d1 +ell2 = -2.0_dp*(2.0_dp*(dynu(1) + dynu(2)) + dynu(3)) / d2 +ell3 = -2.0_dp*(2.0_dp*dynu(1) + dynu(2) + dynu(3)) / d3 +ell4 = -2.0_dp*(2.0_dp*dynu(1) + dynu(2)) / d4 + +dN = dynu(Ny)*(dynu(Ny)+dynu(Ny-1))*(dynu(Ny)+dynu(Ny-1)+dynu(Ny-2)) +dNm1 = -dynu(Ny)*dynu(Ny-1)*(dynu(Ny-1)+dynu(Ny-2)) +dNm2 = dynu(Ny-1)*dynu(Ny-2)*(dynu(Ny)+dynu(Ny-1)) +dNm3 = -dynu(Ny-2)*(dynu(Ny-1)+dynu(Ny-2))*(dynu(Ny)+dynu(Ny-1)+dynu(Ny-2)) + +ellN = 2.0_dp*(3.0_dp*dynu(Ny) + 2.0_dp*dynu(Ny-1) + dynu(Ny-2)) / dN +ellNm1 = 2.0_dp*(2.0_dp*(dynu(Ny)+dynu(Ny-1)) + dynu(Ny-2)) / dNm1 +ellNm2 = 2.0_dp*(2.0_dp*dynu(Ny) + dynu(Ny-1) + dynu(Ny-2)) / dNm2 +ellNm3 = 2.0_dp*(2.0_dp*dynu(Ny) + dynu(Ny-1)) / dNm3 + +if (proc_id == 0) then + d2yvec(1) = ell1*vec(1) + ell2*vec(2) + ell3*vec(3) + ell4*vec(4) + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 76, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 77, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + do jj = 2,n-1 + d2yvec(jj) = g1(jj)*vec(jj-1) + g2(jj)*vec(jj) + g3(jj)*vec(jj+1) + end do + d2yvec(n) = g1(n)*vec(n-1) + g2(n)*vec(n) + g3(n)*post_MPI +else if (proc_id == num_procs-1) then + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 76, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 77, MPI_COMM_WORLD, mpierror) + d2yvec(1) = g1(1)*prev_MPI + g2(1)*vec(1) + g3(1)*vec(2) + do jj = 2,n-1 + d2yvec(jj) = g1(jj)*vec(jj-1) + g2(jj)*vec(jj) + g3(jj)*vec(jj+1) + end do + d2yvec(n) = ellNm3*vec(n-3) + ellNm2*vec(n-2) + ellNm1*vec(n-1) + ellN*vec(n) +else + ! Send nth element of vector + call MPI_SEND(vec(n), 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 76, MPI_COMM_WORLD, mpierror) + ! Receive previous element of vector + call MPI_RECV(prev_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 76, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send first element of vector. + call MPI_SEND(vec(1), 1, MPI_C_DOUBLE_COMPLEX, proc_id-1, 77, MPI_COMM_WORLD, mpierror) + ! Receive post element of vector + call MPI_RECV(post_MPI, 1, MPI_C_DOUBLE_COMPLEX, proc_id+1, 77, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + d2yvec(1) = g1(1)*prev_MPI + g2(1)*vec(1) + g3(1)*vec(2) + do jj = 2,n-1 + d2yvec(jj) = g1(jj)*vec(jj-1) + g2(jj)*vec(jj) + g3(jj)*vec(jj+1) + end do + d2yvec(n) = g1(n)*vec(n-1) + g2(n)*vec(n) + g3(n)*post_MPI +endif + +end function d2y_MPI2 + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + subroutine check_alloc_err(err_mess) integer, intent(in) :: err_mess diff --git a/input.data b/input.data index ca77d7c..4b2b6c0 100755 --- a/input.data +++ b/input.data @@ -1,7 +1,7 @@ 7.0, 1.5585, 0.1 5.0e+03, 1, 1.1 -5.0, 0.1 +0.1, 0.1 -1.0, 1.0 -1600, 1275, 1 -0, 0, 0 +64, 48, 1 0, 0, 0 +1, 0, 0 diff --git a/mesh_pack.f90 b/mesh_pack.f90 index b7239b2..965550c 100755 --- a/mesh_pack.f90 +++ b/mesh_pack.f90 @@ -1,6 +1,7 @@ module mesh_pack use global +! include 'mpif.h' contains @@ -42,6 +43,51 @@ subroutine cosine_mesh(xcoor,ycoor,zcoor, numx,numy,numz,dxin) end subroutine cosine_mesh ! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +subroutine cosine_mesh_MPI(xcoor,ycoor,zcoor, numx,numy,numz, proc_id,num_procs,dxin) + +implicit none + +integer, intent(in) :: numx, numy, numz +integer, intent(in) :: proc_id, num_procs +integer :: mpierror +integer :: ii, jj, kk +real(dp) :: c +real(dp), optional, intent(in) :: dxin +real(dp) :: dxj +real(dp), allocatable, dimension(:), intent(out) :: xcoor, ycoor, zcoor +integer :: total_ny, start_ny + +if (present(dxin)) then + dxj = dxin +else + dxj = dx +end if + +allocate(xcoor(numx), stat=alloc_err) +allocate(ycoor(numy), stat=alloc_err) +allocate(zcoor(numz), stat=alloc_err) +call check_alloc_err(alloc_err) + +! Create the grid. +xcoor(1) = xL +zcoor(1) = 0.0_dp +total_ny = numy * num_procs +start_ny = proc_id * numy +do jj = 1,numy + c = (real(jj+start_ny,kind=dp) - 1.0_dp) / (real(total_ny,kind=dp) - 1.0_dp) + ycoor(jj) = -cos(c*pi) +end do +do ii = 2,numx + xcoor(ii) = xcoor(ii-1) + dxj +end do +do kk = 2,numz + zcoor(kk) = 0.0_dp +end do + +end subroutine cosine_mesh_MPI +! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + subroutine dxdydz(dyj, xcoor,ycoor,zcoor) implicit none @@ -61,7 +107,62 @@ subroutine dxdydz(dyj, xcoor,ycoor,zcoor) dyj(jj-1) = ycoor(jj) - ycoor(jj-1) end do +! do jj=1,numy-1 +! write(*,*) jj, dyj(jj) +! end do + end subroutine dxdydz + +! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +subroutine dxdydz_MPI(dyj, xcoor,ycoor,zcoor, proc_id, num_procs) + +implicit none + +real(dp), dimension(:), intent(in) :: xcoor, ycoor, zcoor +integer, intent(in) :: proc_id, num_procs +real(dp), allocatable, dimension(:), intent(out) :: dyj +integer :: alloc_err, mpierror +integer :: numy, jj +real(dp) :: prev_element + +numy = size(ycoor) + +if (proc_id == 0) then + allocate(dyj(numy-1), stat=alloc_err) + call check_alloc_err(alloc_err) + ! Calculate grid spacing + do jj = 2,numy + dyj(jj-1) = ycoor(jj) - ycoor(jj-1) + end do + ! Send last element to next process to the right. + call MPI_SEND(ycoor(numy), 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, mpierror) +else if (proc_id == num_procs - 1) then + ! Recieve first element from next process to the left. + call MPI_RECV(prev_element, 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + allocate(dyj(numy), stat=alloc_err) + call check_alloc_err(alloc_err) + ! Calculate grid spacing + dyj(1) = ycoor(1) - prev_element + do jj = 2,numy + dyj(jj) = ycoor(jj) - ycoor(jj-1) + end do +else + ! Recieve first element from next process to the left. + call MPI_RECV(prev_element, 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! Send last element to next process to the right. + call MPI_SEND(ycoor(numy), 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, mpierror) + allocate(dyj(numy), stat=alloc_err) + call check_alloc_err(alloc_err) + ! Calculate grid spacing + dyj(1) = ycoor(1) - prev_element + do jj = 2,numy + dyj(jj) = ycoor(jj) - ycoor(jj-1) + end do +end if + + +end subroutine dxdydz_MPI ! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: subroutine y_mesh_params @@ -95,6 +196,95 @@ subroutine y_mesh_params end subroutine y_mesh_params ! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine y_mesh_params_MPI (proc_id, num_procs) + +implicit none + +integer, intent(in) :: proc_id, num_procs +real(dp) :: recv_val +integer :: mpierror, jj + +! Calculate metric coefficients +! g is for second derivatives +! h is for first derivatives + +! First element +if (proc_id == 0) then + call MPI_RECV(recv_val, 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! First elems + g1(1) = 2.0_dp / (dynu(1)*(dynu(1)+dynu(2))) + g2(1) = -2.0_dp / (dynu(1)*dynu(2)) + g3(1) = 2.0_dp / (dynu(2)*(dynu(1)+dynu(2))) + + h1(1) = -(2.0_dp*dynu(1)+dynu(2)) / (dynu(1)*(dynu(1)+dynu(2))) + h2(1) = (dynu(1) + dynu(2)) / (dynu(1)*dynu(2)) + h3(1) = -dynu(1) / (dynu(2)*(dynu(1)+dynu(2))) + + ! Middle Ny-2 elements. + do jj = 2,Ny-1 + g1(jj) = 2.0_dp / (dynu(jj-1)*(dynu(jj)+dynu(jj-1))) + g2(jj) = -2.0_dp / (dynu(jj-1)*dynu(jj)) + g3(jj) = 2.0_dp / (dynu( jj )*(dynu(jj)+dynu(jj-1))) + + h1(jj) = -dynu(jj) / (dynu(jj-1)*(dynu(jj-1) + dynu(jj))) + h2(jj) = (dynu(jj) - dynu(jj-1)) / (dynu(jj-1)*dynu(jj)) + h3(jj) = dynu(jj-1) / (dynu(jj)*(dynu(jj-1) + dynu(jj))) + end do + + ! Last elems + g1(Ny) = 2.0_dp / (dynu(Ny-1)*(recv_val+dynu(Ny-1))) + g2(Ny) = -2.0_dp / (dynu(Ny-1)*recv_val) + g3(Ny) = 2.0_dp / (recv_val*(recv_val+dynu(Ny-1))) + + h1(Ny) = -recv_val / (dynu(Ny-1)*(dynu(Ny-1) + recv_val)) + h2(Ny) = (recv_val - dynu(Ny-1)) / (dynu(Ny-1)*recv_val) + h3(Ny) = dynu(Ny-1) / (recv_val*(dynu(Ny-1) + recv_val)) +else if (proc_id == num_procs - 1) then + call MPI_SEND(dynu(1), 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, mpierror) + do jj = 1,Ny-1 + g1(jj) = 2.0_dp / (dynu(jj)*(dynu(jj+1)+dynu(jj))) + g2(jj) = -2.0_dp / (dynu(jj)*dynu(jj+1)) + g3(jj) = 2.0_dp / (dynu(jj+1)*(dynu(jj+1)+dynu(jj))) + + h1(jj) = -dynu(jj+1) / (dynu(jj)*(dynu(jj) + dynu(jj+1))) + h2(jj) = (dynu(jj+1) - dynu(jj)) / (dynu(jj)*dynu(jj+1)) + h3(jj) = dynu(jj) / (dynu(jj+1)*(dynu(jj) + dynu(jj+1))) + end do + h1(Ny) = dynu(Ny) / (dynu(Ny-1)*(dynu(Ny-1)+dynu(Ny))) + h2(Ny) = -(dynu(Ny-1) + dynu(Ny)) / (dynu(Ny-1)*dynu(Ny)) + h3(Ny) = (2.0_dp*dynu(Ny) + dynu(Ny-1)) / (dynu(Ny)*(dynu(Ny-1) + dynu(Ny))) + + g1(Ny) = 2.0_dp / (dynu(Ny-1)*(dynu(Ny-1) + dynu(Ny))) + g2(Ny) = -2.0_dp / (dynu(Ny)*dynu(Ny-1)) + g3(Ny) = 2.0_dp / (dynu(Ny)*(dynu(Ny) + dynu(Ny-1))) + +else + call MPI_SEND(dynu(1), 1, MPI_DOUBLE, proc_id-1, 42, MPI_COMM_WORLD, mpierror) + call MPI_RECV(recv_val, 1, MPI_DOUBLE, proc_id+1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + ! All but last elems. + do jj = 1,Ny-1 + g1(jj) = 2.0_dp / (dynu(jj)*(dynu(jj+1)+dynu(jj))) + g2(jj) = -2.0_dp / (dynu(jj)*dynu(jj+1)) + g3(jj) = 2.0_dp / (dynu(jj+1)*(dynu(jj+1)+dynu(jj))) + + h1(jj) = -dynu(jj+1) / (dynu(jj)*(dynu(jj) + dynu(jj+1))) + h2(jj) = (dynu(jj+1) - dynu(jj)) / (dynu(jj)*dynu(jj+1)) + h3(jj) = dynu(jj) / (dynu(jj+1)*(dynu(jj) + dynu(jj+1))) + end do + ! Last elems + g1(Ny) = 2.0_dp / (dynu(Ny)*(recv_val+dynu(Ny))) + g2(Ny) = -2.0_dp / (dynu(Ny)*recv_val) + g3(Ny) = 2.0_dp / (recv_val*(recv_val+dynu(Ny))) + + h1(Ny) = -recv_val / (dynu(Ny)*(dynu(Ny) + recv_val)) + h2(Ny) = (recv_val - dynu(Ny)) / (dynu(Ny)*recv_val) + h3(Ny) = dynu(Ny) / (recv_val*(dynu(Ny) + recv_val)) +end if + +end subroutine y_mesh_params_MPI +! ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + subroutine uniform_mesh(xcoor,ycoor,zcoor, numx,numy,numz) implicit none diff --git a/time_integrators.f90 b/time_integrators.f90 index 3f1b908..99d4ff1 100644 --- a/time_integrators.f90 +++ b/time_integrators.f90 @@ -29,7 +29,7 @@ subroutine imex_rk(vtk_print, save_nusselt) logical, optional, intent(in) :: save_nusselt integer :: nti -integer :: nprint +integer :: nprint, i, j logical :: wvtk real(dp) :: nusselt_num real(dp) :: start, finish @@ -37,8 +37,11 @@ subroutine imex_rk(vtk_print, save_nusselt) integer :: nthreads, myid integer, EXTERNAL :: OMP_GET_THREAD_NUM, OMP_GET_NUM_THREADS real(dp), EXTERNAL :: OMP_GET_WTIME +EXTERNAL :: OMP_SET_NUM_THREADS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +call OMP_SET_NUM_THREADS(1) + if (present(vtk_print)) then wvtk = .true. nprint = vtk_print @@ -69,6 +72,7 @@ subroutine imex_rk(vtk_print, save_nusselt) ! Format for writing out single values. 1000 format(E25.16E3) + ! Time integration do ! while (time < t_final) start_overall = OMP_GET_WTIME() @@ -259,6 +263,17 @@ subroutine imex_rk(vtk_print, save_nusselt) if (time == t_final) then + open(unit=9010, file="T_real_update.txt", action="write", status="unknown") + open(unit=9011, file="T_im_update.txt", action="write", status="unknown") + do i=1,Ny + do j=1,Nx + write (9010,*) REAL(T(i,j)) + write (9011,*) AIMAG(T(i,j)) + end do + end do + close(unit=9010) + close(unit=9011) + write(*,*) "done writing T!" exit end if @@ -283,6 +298,7 @@ subroutine imex_rk(vtk_print, save_nusselt) end do ! time loop + end subroutine imex_rk !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! @@ -607,25 +623,25 @@ subroutine calc_explicit(stage) start = OMP_GET_WTIME() select case(stage) case (1) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K1hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do !$OMP END PARALLEL DO case (2) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K2hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do !$OMP END PARALLEL DO case (3) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K3hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do !$OMP END PARALLEL DO case (4) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx K4hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) end do @@ -635,7 +651,7 @@ subroutine calc_explicit(stage) write(*,*) " - - l1 timing: ", finish-start, "(s)" start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) schedule(dynamic) +!$OMP PARALLEL DO schedule(dynamic) do i=1,Nx ! Compute dx(T) in Fourier space nlT (:,i) = kx(i)*Ti(:,i) @@ -650,7 +666,7 @@ subroutine calc_explicit(stage) nlT = CI*nlT start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) private(tnlT, tnlphi, tT, tux, tuy, tphi) schedule(dynamic) +!$OMP PARALLEL DO private(tnlT, tnlphi, tT, tux, tuy, tphi) schedule(dynamic) do j = 1,Ny ! Bring everything to physical space tnlT = nlT(j,:) @@ -678,7 +694,7 @@ subroutine calc_explicit(stage) ! Calculate nonlinear term start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) private(tmp_T) schedule(dynamic) +!$OMP PARALLEL DO private(tmp_T) schedule(dynamic) do i = 1,Nx ! Temperature tmp_T = Ti(:,i) @@ -692,7 +708,7 @@ subroutine calc_explicit(stage) ! Bring nonlinear terms back to Fourier space start = OMP_GET_WTIME() -!$OMP PARALLEL DO num_threads(16) private(tnlT, tnlphi) schedule(dynamic) +!$OMP PARALLEL DO private(tnlT, tnlphi) schedule(dynamic) do j = 1,Ny tnlT = nlT(j,:) tnlphi = nlphi(j,:) @@ -718,7 +734,7 @@ subroutine calc_explicit(stage) start = OMP_GET_WTIME() select case (stage) case (1) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K1hat_phi(:,i) = K1hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K1hat_phi(:,i) = K1hat_phi(:,i) - CI*kx(i)*nlphi(:,i) @@ -726,7 +742,7 @@ subroutine calc_explicit(stage) !$OMP END PARALLEL DO K1hat_T = -nlT case (2) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K2hat_phi(:,i) = K2hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K2hat_phi(:,i) = K2hat_phi(:,i) - CI*kx(i)*nlphi(:,i) @@ -734,7 +750,7 @@ subroutine calc_explicit(stage) !$OMP END PARALLEL DO K2hat_T = -nlT case (3) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K3hat_phi(:,i) = K3hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K3hat_phi(:,i) = K3hat_phi(:,i) - CI*kx(i)*nlphi(:,i) @@ -742,7 +758,7 @@ subroutine calc_explicit(stage) !$OMP END PARALLEL DO K3hat_T = -nlT case (4) - !$OMP PARALLEL DO num_threads(16) schedule(dynamic) + !$OMP PARALLEL DO schedule(dynamic) do i = 1,Nx !K4hat_phi(:,i) = K4hat_phi(:,i) + CI*kx(i)*nlphi(:,i) K4hat_phi(:,i) = K4hat_phi(:,i) - CI*kx(i)*nlphi(:,i) diff --git a/time_integrators_MPI.f90 b/time_integrators_MPI.f90 new file mode 100644 index 0000000..73e48ce --- /dev/null +++ b/time_integrators_MPI.f90 @@ -0,0 +1,1637 @@ +module time_integrators_MPI + +use fftw +use global +use write_pack +use allocate_vars +use bc_setup +use statistics +use omp_lib + + +integer :: it, jt, kkt +integer :: info +real(dp) :: time, dtmax, dtmin, dt_old, dt_ramp, dt_final + +contains + +subroutine imex_rk_MPI(proc_id_str, vtk_print, save_nusselt, proc_id, num_procs) + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!! This progam solves the equations of thermal convection using a Fourier +!! spectral method in the x-direction and a 2nd order finite difference scheme in +!! the y-direction. +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + +implicit none + +character(3), intent(in) :: proc_id_str +integer, optional, intent(in) :: vtk_print +logical, optional, intent(in) :: save_nusselt +integer, intent(in) :: proc_id, num_procs + +integer :: nti, i, j, mpierror, total_ny +integer :: nprint, otherproc +logical :: wvtk +real(dp) :: nusselt_num +real(dp) :: start, finish +real(dp) :: start_overall, finish_overall +integer :: nthreads, myid, stind +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_phi_MPI, tmp_T_MPI, phi_MPI, phi_update_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_phi_MPI, K2hat_phi_MPI, K3hat_phi_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1hat_T_MPI, K2hat_T_MPI, K3hat_T_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: T_MPI, tmp_uy_MPI, tmp_uy1_MPI, tmp_phi1_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: tmp_K_phi_MPI, tmp_K_T_MPI, uxi_MPI +real(dp), allocatable, dimension(:) :: phi1_MPI, phi2_MPI, V1_MPI, V2_MPI +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:,:) :: single_phiout, single_Tout +real(dp), allocatable, dimension(:) :: g1_total, g2_total, g3_total, dynu_MPI +real(dp), allocatable, dimension(:) :: h1_total, h2_total, h3_total +real(dp) :: dyv1_T_it_MPI, dyv2_T_it_MPI, h1_end_MPI, h2_end_MPI, h3_end_MPI +integer, EXTERNAL :: OMP_GET_THREAD_NUM, OMP_GET_NUM_THREADS +real(dp), EXTERNAL :: OMP_GET_WTIME +EXTERNAL :: OMP_SET_NUM_THREADS +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +call OMP_SET_NUM_THREADS(1) + +if (present(vtk_print)) then + wvtk = .true. + nprint = vtk_print +else + wvtk = .false. + nprint = 100 +end if + +write(*,*) "imex_rk_MPI from proc ", proc_id_str + +if (wvtk) then + call write_to_vtk(0, .false., proc_id_str) ! false = Fourier space +end if + +dt = dt_init + +call init_bc_MPI(acoeffs(1,1), proc_id, num_procs, proc_id_str) + +time = 0.0_dp + +dtmax = 0.5_dp +dtmin = 1.0e-4_dp + +dt_ramp = 1.1_dp + +dt_old = dt + +nti = 0 + +! Format for writing out single values.x +1000 format(E25.16E3) + +if (proc_id == 0) then + total_ny = Ny * num_procs + allocate(tmp_phi_MPI(total_ny), tmp_T_MPI(total_ny), phi_MPI(total_ny-2), phi_update_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(K1hat_phi_MPI(total_ny-2), K2hat_phi_MPI(total_ny-2), K3hat_phi_MPI(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(K1hat_T_MPI(total_ny-2), K2hat_T_MPI(total_ny-2), K3hat_T_MPI(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(K1_phi_MPI(total_ny-2), K2_phi_MPI(total_ny-2), K1_T_MPI(total_ny-2), K2_T_MPI(total_ny-2), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(T_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(g1_total(total_ny), g2_total(total_ny), g3_total(total_ny), dynu_MPI(total_ny-1), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(h1_total(total_ny), h2_total(total_ny), h3_total(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(tmp_uy_MPI(total_ny), tmp_uy1_MPI(total_ny), tmp_phi1_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(single_phiout(total_ny, Nx), single_Tout(total_ny,Nx), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(phi1_MPI(total_ny), phi2_MPI(total_ny), V1_MPI(total_ny), V2_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) + allocate(tmp_K_phi_MPI(total_ny), tmp_K_T_MPI(total_ny), uxi_MPI(total_ny), stat=alloc_err) + call check_alloc_err(alloc_err) +end if + +call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + + +! Time integration +do ! while (time < t_final) + start_overall = OMP_GET_WTIME() + + dt_final = t_final - time + + if (dt_final <= dt) then + time = t_final + else + time = time + dt + end if + + write(*,*) "time = ", time, "dt = ", dt + + nti = nti + 1 + + !::::::::::: + ! STAGE 1 :: + !::::::::::: + phii = phi + Ti = T + uxi = ux + uyi = uy + start = OMP_GET_WTIME() + call calc_explicit_MPI(1, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(1) timing: ", finish-start, "(s)" + start = OMP_GET_WTIME() + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi1 and T1 + if (proc_id == 0) then + ! Need to pull all of the variables into local main memory. + phi_MPI(1:Ny-1) = phi(2:Ny,it) + K1hat_phi_MPI(1:Ny-1) = K1hat_phi(2:Ny,it) + K2hat_phi_MPI(1:Ny-1) = K2hat_phi(2:Ny,it) + K3hat_phi_MPI(1:Ny-1) = K3hat_phi(2:Ny,it) + K1hat_T_MPI(1:Ny-1) = K1hat_T(2:Ny,it) + K2hat_T_MPI(1:Ny-1) = K2hat_T(2:Ny,it) + K3hat_t_MPI(1:Ny-1) = K3hat_T(2:Ny,it) + K1_phi_MPI(1:Ny-1) = K1_phi(2:Ny,it) + K2_phi_MPI(1:Ny-1) = K2_phi(2:Ny,it) + K1_T_MPI(1:Ny-1) = K1_T(2:Ny,it) + K2_T_MPI(1:Ny-1) = K2_T(2:Ny,it) + T_MPI(1:Ny) = T(1:Ny,it) + + ! Receive pieces from other nodes. + do otherproc = 1,num_procs-2 + stind = otherproc * Ny + call MPI_RECV(phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + + ! Receive from last node. + stind = (num_procs-1) * Ny + call MPI_RECV(phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 54, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 55, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 56, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Call vari mod + call calc_vari_mod_MPI(tmp_phi_MPI, tmp_T_MPI, acoeffs(1,1), 1, total_ny,& + kx(it), phi_MPI,& + K1hat_phi_MPI,K2hat_phi_MPI,K3hat_phi_MPI,& + K1hat_T_MPI,K2hat_T_MPI,K3hat_T_MPI,& + K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI,& + g1_total, g2_total, g3_total,& + T_MPI) + + ! Compute v1 from phi1 + call calc_vi_mod_MPI(tmp_uy_MPI, tmp_phi_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + + ! Receive dyv1 and dyv2 + call MPI_RECV(dyv1_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 57, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(dyv2_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 58, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! Receive V1, V2, phi1, phi2 + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(V1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 59, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(V2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 60, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 61, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 62, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local variables to MPI. + V1_MPI(1:Ny) = V1(1:Ny, it) + V2_MPI(1:Ny) = V2(1:Ny, it) + phi1_MPI(1:Ny) = phi1(1:Ny, it) + phi2_MPI(1:Ny) = phi2(1:Ny, it) + + ! Receive end of h1, h2, h3 for last processor. + call MPI_RECV(h1_end_MPI, 1, MPI_DOUBLE, num_procs-1, 63, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_end_MPI, 1, MPI_DOUBLE, num_procs-1, 64, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_end_MPI, 1, MPI_DOUBLE, num_procs-1, 65, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! BOUNDAY CONDITIONS! + call update_bcs_mod_MPI(tmp_phi1_MPI,tmp_uy1_MPI, tmp_phi_MPI,tmp_uy_MPI,& + dyv1_T_it_MPI,dyv2_T_it_MPI,& + dyv1_B(it),dyv2_B(it),& + V1_MPI,V2_MPI,phi1_MPI,phi2_MPI,& + total_ny, h1_end_MPI, h2_end_MPI, h3_end_MPI) + + tmp_phi_MPI = tmp_phi1_MPI + tmp_uy_MPI = tmp_uy1_MPI + + ! Receive dynu from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + call MPI_RECV(dynu_MPI(stind), Ny, MPI_DOUBLE, otherproc, 66, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + dynu_MPI(1:Ny-1) = dynu(1:Ny-1) + + ! Implicit. + call calc_implicit_mod_MPI(tmp_K_phi_MPI,tmp_K_T_MPI, tmp_phi_MPI,tmp_T_MPI,& + kx(it), total_ny, dynu_MPI, g1_total, g2_total, g3_total) + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 67, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 68, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 69, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send K1_phi, K1_T, phii, Ti, uyi, uxi back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_K_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 70, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_K_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 71, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 72, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 73, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 74, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 75, MPI_COMM_WORLD, mpierror) + end do + + ! Set local to main node variables. + K1_phi(1:Ny,it) = tmp_K_phi_MPI(1:Ny) + K1_T(1:Ny,it) = tmp_K_T_MPI(1:Ny) + phii(1:Ny,it) = tmp_phi_MPI(1:Ny) + Ti(1:Ny,it) = tmp_T_MPI(1:Ny) + uyi(1:Ny,it) = tmp_uy_MPI(1:Ny) + uxi(1:Ny,it) = uxi_MPI(1:Ny) + + + else if (proc_id == num_procs - 1) then + ! Send data to main node + call MPI_SEND(phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send dyv1 and dyv2. + call MPI_SEND(dyv1_T(it), 1, MPI_DOUBLE, 0, 57, MPI_COMM_WORLD, mpierror) + call MPI_SEND(dyv2_T(it), 1, MPI_DOUBLE, 0, 58, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send h1,h2,h3 last elements. + call MPI_SEND(h1(Ny), 1, MPI_DOUBLE, 0, 63, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(Ny), 1, MPI_DOUBLE, 0, 64, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(Ny), 1, MPI_DOUBLE, 0, 65, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K1_phi, K1_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + else + ! Send data to main node + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K1_phi, K1_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 1 mid timing: ", finish-start, "(s)" + + ! Compute K2hat + start = OMP_GET_WTIME() + call calc_explicit_MPI(2, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(2) timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + + !::::::::::: + ! STAGE 2 :: + !::::::::::: + start = OMP_GET_WTIME() + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi1 and T1 + if (proc_id == 0) then + ! Need to pull all of the variables into local main memory. + phi_MPI(1:Ny-1) = phi(2:Ny,it) + K1hat_phi_MPI(1:Ny-1) = K1hat_phi(2:Ny,it) + K2hat_phi_MPI(1:Ny-1) = K2hat_phi(2:Ny,it) + K3hat_phi_MPI(1:Ny-1) = K3hat_phi(2:Ny,it) + K1hat_T_MPI(1:Ny-1) = K1hat_T(2:Ny,it) + K2hat_T_MPI(1:Ny-1) = K2hat_T(2:Ny,it) + K3hat_t_MPI(1:Ny-1) = K3hat_T(2:Ny,it) + K1_phi_MPI(1:Ny-1) = K1_phi(2:Ny,it) + K2_phi_MPI(1:Ny-1) = K2_phi(2:Ny,it) + K1_T_MPI(1:Ny-1) = K1_T(2:Ny,it) + K2_T_MPI(1:Ny-1) = K2_T(2:Ny,it) + T_MPI(1:Ny) = T(1:Ny,it) + + ! Receive pieces from other nodes. + do otherproc = 1,num_procs-2 + stind = otherproc * Ny + call MPI_RECV(phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + + ! Receive from last node. + stind = (num_procs-1) * Ny + call MPI_RECV(phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 54, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 55, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 56, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Call vari mod + call calc_vari_mod_MPI(tmp_phi_MPI, tmp_T_MPI, acoeffs(2,2), 2, total_ny,& + kx(it), phi_MPI,& + K1hat_phi_MPI,K2hat_phi_MPI,K3hat_phi_MPI,& + K1hat_T_MPI,K2hat_T_MPI,K3hat_T_MPI,& + K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI,& + g1_total, g2_total, g3_total,& + T_MPI) + + ! Compute v1 from phi1 + call calc_vi_mod_MPI(tmp_uy_MPI, tmp_phi_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + + ! Receive dyv1 and dyv2 + call MPI_RECV(dyv1_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 57, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(dyv2_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 58, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! Receive V1, V2, phi1, phi2 + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(V1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 59, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(V2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 60, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 61, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 62, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local variables to MPI. + V1_MPI(1:Ny) = V1(1:Ny, it) + V2_MPI(1:Ny) = V2(1:Ny, it) + phi1_MPI(1:Ny) = phi1(1:Ny, it) + phi2_MPI(1:Ny) = phi2(1:Ny, it) + + ! Receive end of h1, h2, h3 for last processor. + call MPI_RECV(h1_end_MPI, 1, MPI_DOUBLE, num_procs-1, 63, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_end_MPI, 1, MPI_DOUBLE, num_procs-1, 64, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_end_MPI, 1, MPI_DOUBLE, num_procs-1, 65, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! BOUNDAY CONDITIONS! + call update_bcs_mod_MPI(tmp_phi1_MPI,tmp_uy1_MPI, tmp_phi_MPI,tmp_uy_MPI,& + dyv1_T_it_MPI,dyv2_T_it_MPI,& + dyv1_B(it),dyv2_B(it),& + V1_MPI,V2_MPI,phi1_MPI,phi2_MPI,& + total_ny, h1_end_MPI, h2_end_MPI, h3_end_MPI) + + tmp_phi_MPI = tmp_phi1_MPI + tmp_uy_MPI = tmp_uy1_MPI + + ! Receive dynu from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + call MPI_RECV(dynu_MPI(stind), Ny, MPI_DOUBLE, otherproc, 66, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + dynu_MPI(1:Ny-1) = dynu(1:Ny-1) + + ! Implicit. + call calc_implicit_mod_MPI(tmp_K_phi_MPI,tmp_K_T_MPI, tmp_phi_MPI,tmp_T_MPI,& + kx(it), total_ny, dynu_MPI, g1_total, g2_total, g3_total) + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 67, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 68, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 69, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send K2_phi, K2_T, phii, Ti, uyi, uxi back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_K_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 70, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_K_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 71, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 72, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 73, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 74, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 75, MPI_COMM_WORLD, mpierror) + end do + + ! Set local to main node variables. + K2_phi(1:Ny,it) = tmp_K_phi_MPI(1:Ny) + K2_T(1:Ny,it) = tmp_K_T_MPI(1:Ny) + phii(1:Ny,it) = tmp_phi_MPI(1:Ny) + Ti(1:Ny,it) = tmp_T_MPI(1:Ny) + uyi(1:Ny,it) = tmp_uy_MPI(1:Ny) + uxi(1:Ny,it) = uxi_MPI(1:Ny) + + + else if (proc_id == num_procs - 1) then + ! Send data to main node + call MPI_SEND(phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send dyv1 and dyv2. + call MPI_SEND(dyv1_T(it), 1, MPI_DOUBLE, 0, 57, MPI_COMM_WORLD, mpierror) + call MPI_SEND(dyv2_T(it), 1, MPI_DOUBLE, 0, 58, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send h1,h2,h3 last elements. + call MPI_SEND(h1(Ny), 1, MPI_DOUBLE, 0, 63, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(Ny), 1, MPI_DOUBLE, 0, 64, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(Ny), 1, MPI_DOUBLE, 0, 65, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K2_phi, K2_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + else + ! Send data to main node + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K2_phi, K2_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 2 mid timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + ! Compute K3hat + start = OMP_GET_WTIME() + call calc_explicit_MPI(3, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(3) timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + + !::::::::::: + ! STAGE 3 :: + !::::::::::: + start = OMP_GET_WTIME() + !$OMP PARALLEL DO private(tmp_phi, tmp_T, tmp_uy, tmp_phi1, tmp_uy1, tmp_K_phi, tmp_K_T) schedule(dynamic) + do it = 1,Nx ! kx loop + ! Compute phi1 and T1 + if (proc_id == 0) then + ! Need to pull all of the variables into local main memory. + phi_MPI(1:Ny-1) = phi(2:Ny,it) + K1hat_phi_MPI(1:Ny-1) = K1hat_phi(2:Ny,it) + K2hat_phi_MPI(1:Ny-1) = K2hat_phi(2:Ny,it) + K3hat_phi_MPI(1:Ny-1) = K3hat_phi(2:Ny,it) + K1hat_T_MPI(1:Ny-1) = K1hat_T(2:Ny,it) + K2hat_T_MPI(1:Ny-1) = K2hat_T(2:Ny,it) + K3hat_t_MPI(1:Ny-1) = K3hat_T(2:Ny,it) + K1_phi_MPI(1:Ny-1) = K1_phi(2:Ny,it) + K2_phi_MPI(1:Ny-1) = K2_phi(2:Ny,it) + K1_T_MPI(1:Ny-1) = K1_T(2:Ny,it) + K2_T_MPI(1:Ny-1) = K2_T(2:Ny,it) + T_MPI(1:Ny) = T(1:Ny,it) + + ! Receive pieces from other nodes. + do otherproc = 1,num_procs-2 + stind = otherproc * Ny + call MPI_RECV(phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + + ! Receive from last node. + stind = (num_procs-1) * Ny + call MPI_RECV(phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3hat_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_phi_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 50, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K1_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 51, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K2_T_MPI(stind), Ny-1, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 52, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(T_MPI(stind+1), Ny, MPI_C_DOUBLE_COMPLEX, & + num_procs-1, 53, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 54, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 55, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 56, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Call vari mod + call calc_vari_mod_MPI(tmp_phi_MPI, tmp_T_MPI, acoeffs(3,3), 3, total_ny,& + kx(it), phi_MPI,& + K1hat_phi_MPI,K2hat_phi_MPI,K3hat_phi_MPI,& + K1hat_T_MPI,K2hat_T_MPI,K3hat_T_MPI,& + K1_phi_MPI, K2_phi_MPI, K1_T_MPI, K2_T_MPI,& + g1_total, g2_total, g3_total,& + T_MPI) + + ! Compute v1 from phi1 + call calc_vi_mod_MPI(tmp_uy_MPI, tmp_phi_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + + ! Receive dyv1 and dyv2 + call MPI_RECV(dyv1_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 57, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(dyv2_T_it_MPI, 1, MPI_DOUBLE, num_procs-1, 58, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! Receive V1, V2, phi1, phi2 + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(V1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 59, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(V2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 60, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi1_MPI(stind), Ny, MPI_DOUBLE, otherproc, 61, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phi2_MPI(stind), Ny, MPI_DOUBLE, otherproc, 62, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local variables to MPI. + V1_MPI(1:Ny) = V1(1:Ny, it) + V2_MPI(1:Ny) = V2(1:Ny, it) + phi1_MPI(1:Ny) = phi1(1:Ny, it) + phi2_MPI(1:Ny) = phi2(1:Ny, it) + + ! Receive end of h1, h2, h3 for last processor. + call MPI_RECV(h1_end_MPI, 1, MPI_DOUBLE, num_procs-1, 63, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_end_MPI, 1, MPI_DOUBLE, num_procs-1, 64, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_end_MPI, 1, MPI_DOUBLE, num_procs-1, 65, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + ! BOUNDAY CONDITIONS! + call update_bcs_mod_MPI(tmp_phi1_MPI,tmp_uy1_MPI, tmp_phi_MPI,tmp_uy_MPI,& + dyv1_T_it_MPI,dyv2_T_it_MPI,& + dyv1_B(it),dyv2_B(it),& + V1_MPI,V2_MPI,phi1_MPI,phi2_MPI,& + total_ny, h1_end_MPI, h2_end_MPI, h3_end_MPI) + + tmp_phi_MPI = tmp_phi1_MPI + tmp_uy_MPI = tmp_uy1_MPI + + ! Receive dynu from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + call MPI_RECV(dynu_MPI(stind), Ny, MPI_DOUBLE, otherproc, 66, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + dynu_MPI(1:Ny-1) = dynu(1:Ny-1) + + ! Implicit. + call calc_implicit_mod_MPI(tmp_K_phi_MPI,tmp_K_T_MPI, tmp_phi_MPI,tmp_T_MPI,& + kx(it), total_ny, dynu_MPI, g1_total, g2_total, g3_total) + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 67, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 68, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 69, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + + ! Compute u1 from v1 + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send K3_phi, K3_T, phii, Ti, uyi, uxi back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_K_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 70, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_K_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 71, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_phi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 72, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_T_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 73, MPI_COMM_WORLD, mpierror) + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 74, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 75, MPI_COMM_WORLD, mpierror) + end do + + ! Set local to main node variables. + K3_phi(1:Ny,it) = tmp_K_phi_MPI(1:Ny) + K3_T(1:Ny,it) = tmp_K_T_MPI(1:Ny) + phii(1:Ny,it) = tmp_phi_MPI(1:Ny) + Ti(1:Ny,it) = tmp_T_MPI(1:Ny) + uyi(1:Ny,it) = tmp_uy_MPI(1:Ny) + uxi(1:Ny,it) = uxi_MPI(1:Ny) + + + else if (proc_id == num_procs - 1) then + ! Send data to main node + call MPI_SEND(phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny-1,it), Ny-1, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send dyv1 and dyv2. + call MPI_SEND(dyv1_T(it), 1, MPI_DOUBLE, 0, 57, MPI_COMM_WORLD, mpierror) + call MPI_SEND(dyv2_T(it), 1, MPI_DOUBLE, 0, 58, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send h1,h2,h3 last elements. + call MPI_SEND(h1(Ny), 1, MPI_DOUBLE, 0, 63, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(Ny), 1, MPI_DOUBLE, 0, 64, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(Ny), 1, MPI_DOUBLE, 0, 65, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K3_phi, K3_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K3_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + + else + ! Send data to main node + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 42, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 43, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 44, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 45, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 46, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 47, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K3hat_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 48, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 49, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 50, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K1_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 51, MPI_COMM_WORLD, mpierror) + call MPI_SEND(K2_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 52, MPI_COMM_WORLD, mpierror) + call MPI_SEND(T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 53, MPI_COMM_WORLD, mpierror) + + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 54, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 55, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 56, MPI_COMM_WORLD, mpierror) + + ! Send V1, V2, phi1, phi2 + call MPI_SEND(V1(1:Ny,it), Ny, MPI_DOUBLE, 0, 59, MPI_COMM_WORLD, mpierror) + call MPI_SEND(V2(1:Ny,it), Ny, MPI_DOUBLE, 0, 60, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi1(1:Ny,it), Ny, MPI_DOUBLE, 0, 61, MPI_COMM_WORLD, mpierror) + call MPI_SEND(phi2(1:Ny,it), Ny, MPI_DOUBLE, 0, 62, MPI_COMM_WORLD, mpierror) + + ! Send dynu to main. + call MPI_SEND(dynu(1), Ny, MPI_DOUBLE, 0, 66, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 67, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 68, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 69, MPI_COMM_WORLD, mpierror) + + ! Receive K3_phi, K3_T, phii, Ti, uyi, uxi from main node. + call MPI_RECV(K3_phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 70,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(K3_T(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 71,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(phii(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 72,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(Ti(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 73,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uyi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 74,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(uxi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 75,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - stage 3 mid timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + ! Compute K4hat + start = OMP_GET_WTIME() + call calc_explicit_MPI(4, proc_id, num_procs, proc_id_str) + finish = OMP_GET_WTIME() + write(*,*) " - calc_explicit(4) timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + + start = OMP_GET_WTIME() + ! UPDATE SOLUTIONS + if (proc_id == 0) then + ! Get phi + phi(2:Ny,:) = phi(2:Ny,:) + dt*(b(1)*(K1_phi(2:Ny,:) + K2hat_phi(2:Ny,:)) + & + & b(2)*(K2_phi(2:Ny,:) + K3hat_phi(2:Ny,:)) + & + & b(3)*(K3_phi(2:Ny,:) + K4hat_phi(2:Ny,:))) + + ! Get temperature + T(2:Ny,:) = T(2:Ny,:) + dt*(b(1)*(K1_T(2:Ny,:) + K2hat_T(2:Ny,:)) + & + & b(2)*(K2_T(2:Ny,:) + K3hat_T(2:Ny,:)) + & + & b(3)*(K3_T(2:Ny,:) + K4hat_T(2:Ny,:))) + else if (proc_id == num_procs - 1) then + ! Get phi + phi(1:Ny-1,:) = phi(1:Ny-1,:) + dt*(b(1)*(K1_phi(1:Ny-1,:) + K2hat_phi(1:Ny-1,:)) + & + & b(2)*(K2_phi(1:Ny-1,:) + K3hat_phi(1:Ny-1,:)) + & + & b(3)*(K3_phi(1:Ny-1,:) + K4hat_phi(1:Ny-1,:))) + + ! Get temperature + T(1:Ny-1,:) = T(1:Ny-1,:) + dt*(b(1)*(K1_T(1:Ny-1,:) + K2hat_T(1:Ny-1,:)) + & + & b(2)*(K2_T(1:Ny-1,:) + K3hat_T(1:Ny-1,:)) + & + & b(3)*(K3_T(1:Ny-1,:) + K4hat_T(1:Ny-1,:))) + else + ! Get phi + phi(1:Ny,:) = phi(1:Ny,:) + dt*(b(1)*(K1_phi(1:Ny,:) + K2hat_phi(1:Ny,:)) + & + & b(2)*(K2_phi(1:Ny,:) + K3hat_phi(1:Ny,:)) + & + & b(3)*(K3_phi(1:Ny,:) + K4hat_phi(1:Ny,:))) + + ! Get temperature + T(1:Ny,:) = T(1:Ny,:) + dt*(b(1)*(K1_T(1:Ny,:) + K2hat_T(1:Ny,:)) + & + & b(2)*(K2_T(1:Ny,:) + K3hat_T(1:Ny,:)) + & + & b(3)*(K3_T(1:Ny,:) + K4hat_T(1:Ny,:))) + end if + + ! Get ux and uy + if (proc_id == 0) then + ! Receive g1,g2,g3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(g1_total(stind), Ny, MPI_DOUBLE, otherproc, 80, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g2_total(stind), Ny, MPI_DOUBLE, otherproc, 81, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(g3_total(stind), Ny, MPI_DOUBLE, otherproc, 82, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + g1_total(1:Ny) = g1 + g2_total(1:Ny) = g2 + g3_total(1:Ny) = g3 + + ! Receive h1,h2,h3 from all other nodes. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_RECV(h1_total(stind), Ny, MPI_DOUBLE, otherproc, 83, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h2_total(stind), Ny, MPI_DOUBLE, otherproc, 84, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(h3_total(stind), Ny, MPI_DOUBLE, otherproc, 85, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + h1_total(1:Ny) = h1 + h2_total(1:Ny) = h2 + h3_total(1:Ny) = h3 + else + ! Send g1, g2, g3 to first node. + call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 80, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 81, MPI_COMM_WORLD, mpierror) + call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 82, MPI_COMM_WORLD, mpierror) + + ! Send h1, h2, h3 to first node. + call MPI_SEND(h1(1), Ny, MPI_DOUBLE, 0, 83, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h2(1), Ny, MPI_DOUBLE, 0, 84, MPI_COMM_WORLD, mpierror) + call MPI_SEND(h3(1), Ny, MPI_DOUBLE, 0, 85, MPI_COMM_WORLD, mpierror) + end if + !$OMP PARALLEL DO private(tmp_uy, it) schedule(dynamic) + do it = 1,Nx + ! Only call from single node. + if (proc_id == 0) then + + do otherproc= 1, num_procs-1 + stind = otherproc * Ny + 1 + ! Receive phi + call MPI_RECV(phi_update_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, & + otherproc, 86, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end do + ! Set local phi + phi_update_MPI(1:Ny) = phi(1:Ny,it) + + ! Solve for v + call calc_vi_mod_MPI(tmp_uy_MPI, phi_update_MPI, kx(it), total_ny, g1_total, g2_total, g3_total) + ! Solve for u + if (kx(it) /= 0.0_dp) then + uxi_MPI = CI*d1y_MPI(tmp_uy_MPI, h1_total, h2_total, h3_total)/kx(it) + else if (kx(it) == 0.0_dp) then + uxi_MPI = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + + ! Send uy, ux back to each node. + do otherproc = 1,num_procs-1 + stind = otherproc * Ny + 1 + call MPI_SEND(tmp_uy_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 87, MPI_COMM_WORLD, mpierror) + call MPI_SEND(uxi_MPI(stind), Ny, MPI_C_DOUBLE_COMPLEX, otherproc, 88, MPI_COMM_WORLD, mpierror) + end do + ! Set local versions. + uy(1:Ny,it) = tmp_uy_MPI(1:Ny) + ux(1:Ny,it) = uxi_MPI(1:Ny) + + else + ! Send phi + call MPI_SEND(phi(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 86, MPI_COMM_WORLD, mpierror) + + ! Receive uy, ux from main node. + call MPI_RECV(uy(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 87,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + call MPI_RECV(ux(1:Ny,it), Ny, MPI_C_DOUBLE_COMPLEX, 0, 88,& + MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror) + end if + end do + !$OMP END PARALLEL DO + finish = OMP_GET_WTIME() + write(*,*) " - update sols timing: ", finish-start, "(s)" + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + + ! open(unit=9010, file="P"//proc_id_str//"uy_real_update.txt", action="write", status="unknown") + ! open(unit=9011, file="P"//proc_id_str//"uy_im_update.txt", action="write", status="unknown") + ! do i=1,Ny + ! do j=1,Nx + ! write (9010,*) REAL(uy(i,j)) + ! write (9011,*) AIMAG(uy(i,j)) + ! end do + ! end do + ! close(unit=9010) + ! close(unit=9011) + ! write(*,*) "done writing uy!" + + if (time == t_final) then + open(unit=9010, file="P"//proc_id_str//"T_real_update.txt", action="write", status="unknown") + open(unit=9011, file="P"//proc_id_str//"T_im_update.txt", action="write", status="unknown") + do i=1,Ny + do j=1,Nx + write (9010,*) REAL(T(i,j)) + write (9011,*) AIMAG(T(i,j)) + end do + end do + close(unit=9010) + close(unit=9011) + write(*,*) "done writing T!" + exit + end if + + if (wvtk) then + call write_to_vtk(nti, .false., proc_id_str) ! false = Fourier space + end if + + call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + write(*,*) "proc ", proc_id_str, " write vtk complete for t", time + + end do + +end subroutine imex_rk_MPI + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +subroutine calc_explicit_MPI(stage, proc_id, num_procs, proc_id_str) + +integer :: i, j +integer, intent(in) :: stage, proc_id, num_procs +character(3), intent(in) :: proc_id_str +real(dp) :: start, finish + +start = OMP_GET_WTIME() +select case(stage) + case (1) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K1hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO + case (2) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K2hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO + case (3) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K3hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO + case (4) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + K4hat_phi(:,i) = -kx(i)**2.0_dp*Ti(:,i) + end do + !$OMP END PARALLEL DO +end select +finish = OMP_GET_WTIME() +write(*,*) " - - l1 timing: ", finish-start, "(s)" + +start = OMP_GET_WTIME() +!$OMP PARALLEL DO schedule(dynamic) +do i=1,Nx + ! Compute dx(T) in Fourier space + nlT (:,i) = kx(i)*Ti(:,i) + ! Compute D2(ux) + nlphi(:,i) = -kx(i)**2.0_dp*uxi(:,i) + d2y_MPI2(uxi(:,i), proc_id, num_procs) +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() + +write(*,*) " - - l2 timing: ", finish-start, "(s)" + +!nlT = -CI*nlT +nlT = CI*nlT + +start = OMP_GET_WTIME() +!$OMP PARALLEL DO private(tnlT, tnlphi, tT, tux, tuy, tphi) schedule(dynamic) +do j = 1,Ny + ! Bring everything to physical space + tnlT = nlT(j,:) + tnlphi = nlphi(j,:) + tT = Ti(j,:) + tux = uxi(j,:) + tuy = uyi(j,:) + tphi = phii(j,:) + call fftw_execute_dft(iplannlT, tnlT, tnlT) + call fftw_execute_dft(iplannlphi, tnlphi, tnlphi) + call fftw_execute_dft(iplanT, tT, tT) + call fftw_execute_dft(iplanux, tux, tux) + call fftw_execute_dft(iplanuy, tuy, tuy) + call fftw_execute_dft(iplanphi, tphi, tphi) + nlT(j,:) = tnlT + nlphi(j,:) = tnlphi + Ti(j,:) = tT + uxi(j,:) = tux + uyi(j,:) = tuy + phii(j,:) = tphi +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() +write(*,*) " - - l3 timing: ", finish-start, "(s)" + +! Calculate nonlinear term +start = OMP_GET_WTIME() +!$OMP PARALLEL DO private(tmp_T) schedule(dynamic) +do i = 1,Nx + ! Temperature + tmp_T = Ti(:,i) + nlT(:,i) = uxi(:,i)*nlT(:,i) + uyi(:,i)*d1y_MPI2(tmp_T, proc_id, num_procs) + ! phi + nlphi(:,i) = uxi(:,i)*phii(:,i) - uyi(:,i)*nlphi(:,i) +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() +write(*,*) " - - l4 timing: ", finish-start, "(s)" + +! Bring nonlinear terms back to Fourier space +start = OMP_GET_WTIME() +!$OMP PARALLEL DO private(tnlT, tnlphi) schedule(dynamic) +do j = 1,Ny + tnlT = nlT(j,:) + tnlphi = nlphi(j,:) + call fftw_execute_dft(plannlT, tnlT, tnlT) + call fftw_execute_dft(plannlphi, tnlphi, tnlphi) + ! Dealias + do i = 1,Nx + if (abs(kx(i))/alpha >= Nf/2) then + tnlT(i) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + tnlphi(i) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + end if + end do + nlT(j,:) = tnlT + nlphi(j,:) = tnlphi +end do +!$OMP END PARALLEL DO +finish = OMP_GET_WTIME() +write(*,*) " - - l5 timing: ", finish-start, "(s)" + +nlT = nlT / real(Nx,kind=dp) +nlphi = nlphi / real(Nx,kind=dp) + +start = OMP_GET_WTIME() +select case (stage) + case (1) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K1hat_phi(:,i) = K1hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K1hat_phi(:,i) = K1hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K1hat_T = -nlT + case (2) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K2hat_phi(:,i) = K2hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K2hat_phi(:,i) = K2hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K2hat_T = -nlT + case (3) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K3hat_phi(:,i) = K3hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K3hat_phi(:,i) = K3hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K3hat_T = -nlT + case (4) + !$OMP PARALLEL DO schedule(dynamic) + do i = 1,Nx + !K4hat_phi(:,i) = K4hat_phi(:,i) + CI*kx(i)*nlphi(:,i) + K4hat_phi(:,i) = K4hat_phi(:,i) - CI*kx(i)*nlphi(:,i) + end do + !$OMP END PARALLEL DO + K4hat_T = -nlT +end select +finish = OMP_GET_WTIME() +write(*,*) " - - l6 timing: ", finish-start, "(s)" + +end subroutine calc_explicit_MPI + +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! +subroutine calc_vari_mod_MPI(phiout,Tout, aii, stage, total_ny, kx_it, phi_in, & + k1hat_phi_in, k2hat_phi_in, k3hat_phi_in,& + k1hat_T_in, k2hat_T_in, k3hat_T_in,& + k1_phi_in, k2_phi_in, k1_T_in, k2_T_in,& + g1_total, g2_total, g3_total,& + T_in) + +real(dp), intent(in) :: aii +integer, intent(in) :: stage, total_ny +real(dp), intent(in) :: kx_it +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1hat_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2hat_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k3hat_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1hat_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2hat_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k3hat_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny), intent(in) :: T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2_phi_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k1_T_in +complex(C_DOUBLE_COMPLEX), dimension(total_ny-2), intent(in) :: k2_T_in +real(dp), dimension(total_ny), intent(in) :: g1_total, g2_total, g3_total +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:), intent(out) :: phiout, Tout +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:) :: Fphi, FT +real(dp), allocatable, dimension(:,:) :: phi_rhs, T_rhs +real(dp), allocatable, dimension(:) :: dphi, duphi, dlphi +real(dp), allocatable, dimension(:) :: ddT, duT, dlT +integer :: i + +allocate(dphi(total_ny-2), ddT(total_ny-2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(duphi(total_ny-3), dlphi(total_ny-3), duT(total_ny-3), dlT(total_ny-3), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(phiout(total_ny), Tout(total_ny), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(Fphi(total_ny-2), FT(total_ny-2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(phi_rhs(total_ny-2,2), T_rhs(total_ny-2,2), stat=alloc_err) +call check_alloc_err(alloc_err) + +dphi = 0.0_dp +ddT = 0.0_dp +duphi = 0.0_dp +dlphi = 0.0_dp +duT = 0.0_dp +dlT = 0.0_dp +phi_rhs = 0.0_dp +T_rhs = 0.0_dp +Fphi = (0.0_dp, 0.0_dp) +FT = (0.0_dp, 0.0_dp) +phiout = (0.0_dp, 0.0_dp) +Tout = (0.0_dp, 0.0_dp) + +! LHS Matrix (tridiagonal, not necessarily symmetric) +do jt = 2,total_ny-1 + ddT (jt-1) = 1.0_dp - kappa0*dt*aii*(-kx_it**2.0_dp + g2_total(jt)) + dphi(jt-1) = 1.0_dp - nu0 *dt*aii*(-kx_it**2.0_dp + g2_total(jt)) +end do + +do jt = 2,total_ny-2 + duT (jt-1) = -kappa0*dt*g3_total(jt)*aii + duphi(jt-1) = -nu0 *dt*g3_total(jt)*aii +end do + +do jt = 3,total_ny-1 + dlT (jt-2) = -kappa0*dt*g1_total(jt)*aii + dlphi(jt-2) = -nu0 *dt*g1_total(jt)*aii +end do + +select case (stage) + case(1) + Fphi = phi_in + dt*ahatcoeffs(2,1)*k1hat_phi_in + FT = T_in(2:total_ny-1) + dt*ahatcoeffs(2,1)*k1hat_T_in + FT(1) = FT(1) + kappa0*dt*aii*g1_total(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3_total(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + + phi_rhs(:,1) = real(Fphi) + phi_rhs(:,2) = aimag(Fphi) + + T_rhs (:,1) = real(FT) + T_rhs (:,2) = aimag(FT) + + call dgtsv(total_ny-2, 2, dlT, ddT, duT, T_rhs, total_ny-2, info) + Tout(2:total_ny-1) = cmplx(T_rhs(:,1), T_rhs(:,2), kind=C_DOUBLE_COMPLEX) + ! Set temperature boundary conditions + Tout(1) = T_in(1) + Tout(total_ny) = T_in(total_ny) + + call dgtsv(total_ny-2, 2, dlphi, dphi, duphi, phi_rhs, total_ny-2, info) + phiout(2:total_ny-1) = cmplx(phi_rhs(:,1), phi_rhs(:,2), kind=C_DOUBLE_COMPLEX) + + case(2) + Fphi = phi_in + dt*(acoeffs(2,1)*k1_phi_in + & + & ahatcoeffs(3,1)*k1hat_phi_in + & + & ahatcoeffs(3,2)*k2hat_phi_in) + FT = T_in(2:total_ny-1) + dt*(acoeffs(2,1)*k1_T_in + & + & ahatcoeffs(3,1)*k1hat_T_in + & + & ahatcoeffs(3,2)*k2hat_T_in) + FT(1) = FT(1) + kappa0*dt*aii*g1_total(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3_total(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + + ! do i = 1,total_ny-2 + ! write(*,*) i, k2hat_phi_in(i) + ! end do + + phi_rhs(:,1) = real(Fphi) + phi_rhs(:,2) = aimag(Fphi) + + T_rhs (:,1) = real(FT) + T_rhs (:,2) = aimag(FT) + + call dgtsv(total_ny-2, 2, dlT, ddT, duT, T_rhs, total_ny-2, info) + Tout(2:total_ny-1) = cmplx(T_rhs(:,1), T_rhs(:,2), kind=C_DOUBLE_COMPLEX) + ! Set temperature boundary conditions + Tout(1) = T_in(1) + Tout(total_ny) = T_in(total_ny) + + call dgtsv(total_ny-2, 2, dlphi, dphi, duphi, phi_rhs, total_ny-2, info) + phiout(2:total_ny-1) = cmplx(phi_rhs(:,1), phi_rhs(:,2), kind=C_DOUBLE_COMPLEX) + + case(3) + Fphi = phi_in + dt*(acoeffs(3,1)*k1_phi_in + & + &acoeffs(3,2)*k2_phi_in + & + &ahatcoeffs(4,1)*k1hat_phi_in + & + &ahatcoeffs(4,2)*k2hat_phi_in + & + &ahatcoeffs(4,3)*k3hat_phi_in) + FT = T_in(2:total_ny-1) + dt*(acoeffs(3,1)*k1_T_in + & + &acoeffs(3,2)*k2_T_in + & + &ahatcoeffs(4,1)*k1hat_T_in + & + &ahatcoeffs(4,2)*k2hat_T_in + & + &ahatcoeffs(4,3)*k3hat_T_in) + FT(1) = FT(1) + kappa0*dt*aii*g1_total(2)*T_in(1) ! b/c Ti(y_1) = T(y_1) + FT(total_ny-2) = FT(total_ny-2) + kappa0*dt*aii*g3_total(total_ny-1)*T_in(total_ny) ! b/c Ti(total_ny) = T(total_ny) + + phi_rhs(:,1) = real(Fphi) + phi_rhs(:,2) = aimag(Fphi) + + T_rhs (:,1) = real(FT) + T_rhs (:,2) = aimag(FT) + + call dgtsv(total_ny-2, 2, dlT, ddT, duT, T_rhs, total_ny-2, info) + Tout(2:total_ny-1) = cmplx(T_rhs(:,1), T_rhs(:,2), kind=C_DOUBLE_COMPLEX) + ! Set temperature boundary conditions + Tout(1) = T_in(1) + Tout(total_ny) = T_in(total_ny) + + call dgtsv(total_ny-2, 2, dlphi, dphi, duphi, phi_rhs, total_ny-2, info) + phiout(2:total_ny-1) = cmplx(phi_rhs(:,1), phi_rhs(:,2), kind=C_DOUBLE_COMPLEX) + +end select + +end subroutine calc_vari_mod_MPI +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! + +subroutine calc_vi_mod_MPI(vi, phiin, kx_it, total_ny, g1_total, g2_total, g3_total) +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: phiin +complex(C_DOUBLE_COMPLEX), dimension(:), intent(out) :: vi +real(dp), intent(in) :: kx_it +real(dp), allocatable, dimension(:,:) :: vi_rhs +real(dp), allocatable, dimension(:) :: dvi, dlvi, duvi +integer :: j +integer, intent(in) :: total_ny +real(dp), dimension(:), intent(in) :: g1_total, g2_total, g3_total + +allocate(vi_rhs(total_ny-2,2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(dvi(total_ny-2), stat=alloc_err) +call check_alloc_err(alloc_err) +allocate(dlvi(total_ny-3), duvi(total_ny-3), stat=alloc_err) +call check_alloc_err(alloc_err) + +vi = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +vi_rhs = 0.0_dp +dvi = 0.0_dp +dlvi = 0.0_dp +duvi = 0.0_dp + +do j = 2,total_ny-1 + dvi(j-1) = -kx_it**2.0_dp + g2_total(j) +end do + +do j = 2,total_ny-2 + duvi(j-1) = g3_total(j) +end do + +do j = 3,total_ny-1 + dlvi(j-2) = g1_total(j) +end do + +vi_rhs(:,1) = real (phiin(2:total_ny-1)) +vi_rhs(:,2) = aimag(phiin(2:total_ny-1)) + +call dgtsv(total_ny-2, 2, dlvi, dvi, duvi, vi_rhs, total_ny-2, info) + +vi(1) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +vi(2:total_ny-1) = cmplx(vi_rhs(:,1), vi_rhs(:,2), kind=C_DOUBLE_COMPLEX) +vi(total_ny) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + +end subroutine calc_vi_mod_MPI + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine update_bcs_mod_MPI(phiout,vout, phiin,vin,dyv1_T_it,dyv2_T_it,dyv1_B_it,& + dyv2_B_it,V1_in, V2_in, phi1_in, phi2_in,& + total_ny, h1_end, h2_end, h3_end) + +integer, intent(in) :: total_ny +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: phiin, vin +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:), intent(out) :: phiout, vout +real(dp), dimension(2,2) :: C +real(dp) :: detC +complex(dp) :: c1, c2, c1t +complex(dp) :: dyV_T, dyV_B +real(dp), intent(in) :: dyv1_T_it,dyv2_T_it,dyv1_B_it,dyv2_B_it +real(dp), dimension(:), intent(in) :: V1_in, V2_in, phi1_in, phi2_in +real(dp), intent(in) :: h1_end, h2_end, h3_end +allocate(phiout(total_ny), vout(total_ny), stat=alloc_err) +call check_alloc_err(alloc_err) +phiout = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +vout = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +C = 0.0_dp + +C(1,1) = dyv1_T_it +C(1,2) = dyv2_T_it +C(2,1) = dyv1_B_it +C(2,2) = dyv2_B_it + +detC = C(1,1)*C(2,2) - C(1,2)*C(2,1) + +dyV_T = h1_end*vin(total_ny-2) + h2_end*vin(total_ny-1) + h3_end*vin(total_ny) +dyV_B = h1(1)*vin(1) + h2(1)*vin(2) + h3(1)*vin(3) + +! Need to negate b/c want to solve Cx = -c12. +c1 = -dyV_T +c2 = -dyV_B + +! Find c1 and c2. +if (detC == 0.0_dp) then + c1 = (0.0_dp, 0.0_dp) + c2 = (0.0_dp, 0.0_dp) +else + c1t = (C(2,2)*c1 - C(1,2)*c2) / detC + c2 = (C(1,1)*c2 - C(2,1)*c1) / detC + c1 = c1t +end if + +! Update uy and Phi. +vout = vin + c1*V1_in + c2*V2_in +phiout = phiin + c1*phi1_in + c2*phi2_in + +end subroutine update_bcs_mod_MPI + +!::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +subroutine calc_implicit_mod_MPI(Kphi,KT, phiin,Tin, kx_it, total_ny, dynu_in, g1_total, g2_total, g3_total) + +complex(C_DOUBLE_COMPLEX), dimension(:), intent(in) :: phiin, Tin +complex(C_DOUBLE_COMPLEX), dimension(:), intent(out) :: Kphi, KT +real(dp), intent(in) :: kx_it +integer, intent(in) :: total_ny +real(dp), dimension(:), intent(in) :: dynu_in, g1_total, g2_total, g3_total + +Kphi = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) +KT = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + +Kphi = nu0 *(-kx_it**2.0_dp*phiin + d2y_MPI(phiin, dynu_in, g1_total, g2_total, g3_total, total_ny)) +KT = kappa0*(-kx_it**2.0_dp*Tin + d2y_MPI(Tin, dynu_in, g1_total, g2_total, g3_total, total_ny)) + +end subroutine calc_implicit_mod_MPI + +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::! + +end module time_integrators_MPI + \ No newline at end of file diff --git a/time_loop_MPI.f90 b/time_loop_MPI.f90 new file mode 100644 index 0000000..5201f65 --- /dev/null +++ b/time_loop_MPI.f90 @@ -0,0 +1,334 @@ +program time_loop + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +use global +use strings +use write_pack +use interpolation_pack +use allocate_vars +use statistics +use mesh_pack +use time_integrators_MPI + +! include 'mpif.h' +implicit none + +integer :: ntokens, ios, i +integer :: Nxc, Nyc, Nzc +integer :: nRa, ii, jj +integer :: refine_flag_x, refine_flag_y, refine_flag_z +integer :: vtk_flag, rstrt_flag, opt_flag +logical :: wvtk, save_restart, calc_opt +logical :: refine_x, refine_y, refine_z, refine_xy +logical :: fTexist, fuyexist +logical :: ex_Tptrb +real(dp) :: temp +real(dp) :: dxc +real(dp) :: Ra, dRa, Nu, dalpha +real(dp), allocatable, dimension(:) :: xpc, ypc, zpc +complex(C_DOUBLE_COMPLEX), allocatable, dimension(:,:) :: uyc, Tc +character(10) :: citer +character(9) :: fiter +character(80) :: line +character(80) :: tokens(80) + +! MPI specific variables +integer :: mpierror, num_procs +integer :: proc_id +character(4) :: proc_id_str +real(dp) :: mpi_spacing_y +integer status(MPI_STATUS_SIZE) + + +! Initialize MPI. +call MPI_Init(mpierror) +call MPI_Comm_size(MPI_COMM_WORLD, num_procs, mpierror) +call MPI_Comm_rank(MPI_COMM_WORLD, proc_id, mpierror) + +! Read file. +open(2,file="input.data", status="old") +do ii = 1,7 +read(2,'(A)') line +call parse(line,' ,', tokens, ntokens) +if (ntokens > 0) then + select case(ii) + case (1) + call value(tokens(1), Pr, ios) + call value(tokens(2), alpha, ios) + call value(tokens(3), dalpha, ios) + case (2) + call value(tokens(1), Ra, ios) + call value(tokens(2), nRa, ios) + call value(tokens(3), dRa, ios) + case (3) + call value(tokens(1), t_final, ios) + call value(tokens(2), dt, ios) + dt_init = dt + case (4) + call value(tokens(1), ybot, ios) + call value(tokens(2), ytop, ios) + case (5) + call value(tokens(1), Nx, ios) + call value(tokens(2), Ny, ios) + call value(tokens(3), Nz, ios) + case (6) + call value(tokens(1), refine_flag_x, ios) + call value(tokens(2), refine_flag_y, ios) + call value(tokens(3), refine_flag_z, ios) + if (refine_flag_x == 1) then + if (refine_flag_y == 1) then + refine_xy = .true. + refine_x = .false. + refine_y = .false. + else + refine_x = .true. + refine_y = .false. + refine_xy = .false. + end if + else if (refine_flag_y == 1) then + refine_x = .false. + refine_y = .true. + refine_xy = .false. + else if (refine_flag_z == 1) then + write(*,*) "Refinement in z-direction not available yet!" + stop + else + refine_x = .false. + refine_y = .false. + refine_z = .false. + refine_xy = .false. + end if + case (7) + call value(tokens(1), vtk_flag, ios) + call value(tokens(2), rstrt_flag, ios) + call value(tokens(2), opt_flag, ios) + if (vtk_flag == 1) then + wvtk = .true. + else + wvtk = .false. + end if + if (rstrt_flag == 1) then + save_restart = .true. + else + save_restart = .false. + end if + if (opt_flag == 1) then + calc_opt = .true. + else + calc_opt = .false. + end if + end select +end if +end do +close(unit=2) + +! Initialize MPI variables. +Ny = Ny / 4 +mpi_spacing_y = (ytop - ybot) / num_procs +ybot = ybot + proc_id * mpi_spacing_y +ytop = ybot + mpi_spacing_y +write(proc_id_str, "(I3.3)") proc_id + +! Create FFT plans +planuy = fftw_plan_dft_1d(Nx,tuy,tuy, FFTW_FORWARD,FFTW_ESTIMATE) +iplanuy = fftw_plan_dft_1d(Nx,tuy,tuy, FFTW_BACKWARD,FFTW_ESTIMATE) + +planux = fftw_plan_dft_1d(Nx,tux,tux, FFTW_FORWARD,FFTW_ESTIMATE) +iplanux = fftw_plan_dft_1d(Nx,tux,tux, FFTW_BACKWARD,FFTW_ESTIMATE) + +planphi = fftw_plan_dft_1d(Nx,tphi,tphi, FFTW_FORWARD,FFTW_ESTIMATE) +iplanphi = fftw_plan_dft_1d(Nx,tphi,tphi, FFTW_BACKWARD,FFTW_ESTIMATE) + +planT = fftw_plan_dft_1d(Nx,tT,tT, FFTW_FORWARD,FFTW_ESTIMATE) +iplanT = fftw_plan_dft_1d(Nx,tT,tT, FFTW_BACKWARD,FFTW_ESTIMATE) + +plannlT = fftw_plan_dft_1d(Nx,tnlT,tnlT, FFTW_FORWARD,FFTW_ESTIMATE) +iplannlT = fftw_plan_dft_1d(Nx,tnlT,tnlT, FFTW_BACKWARD,FFTW_ESTIMATE) + +plannlphi = fftw_plan_dft_1d(Nx,tnlphi,tnlphi, FFTW_FORWARD,FFTW_ESTIMATE) +iplannlphi = fftw_plan_dft_1d(Nx,tnlphi,tnlphi, FFTW_BACKWARD,FFTW_ESTIMATE) + +call global_params +call global_allocations + +xR = pi / alpha +xL = -pi / alpha +Lx = xR - xL + +if (Nx == 1) then + dx = 1.0_dp +else + dx = Lx / (real(Nx,kind=dp)) +end if + +call cosine_mesh_MPI(xp,yp,zp, Nx,Ny,Nz, proc_id, num_procs) ! get coordinates +call dxdydz_MPI(dynu, xp,yp,zp, proc_id, num_procs) ! get mesh spacing for nonuniform grids +call y_mesh_params_MPI(proc_id, num_procs) ! get metric coefficients for nonuniform grids + +dymin = minval(dynu) +dxmin = sqrt(dx**2.0_dp + dymin**2.0_dp) + +! Fourier modes +do ii = 1,Nx/2 + kx_modes(ii) = real(ii,kind=dp) - 1.0_dp +end do +do ii = Nx/2+1, Nx + kx_modes(ii) = real(ii-Nx,kind=dp) - 1.0_dp +end do +kx = alpha*kx_modes + +! Write initial field to vtk +if (wvtk) then + call write_to_vtk(int(Ra), .true., proc_id_str) ! true = already in physical space +end if + +! Initialize fields. +call init_fields(ex_Tptrb, Ra) +call init_to_fourier(ex_Tptrb) + +! Get nu0 and kappa0 +call global_params_Ra(Ra) + +if (proc_id == 0) then + write(*,'(A70)') ' ' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') ' TWO-DIMENSIONAL THERMAL CONVECTION |' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,47X,A)') 'COMPUTATION SIZE:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(20X,A23,I5,21X,A)') 'Nx = ', Nx, '|' + write(*,'(20X,A23,I5,21X,A)') 'Ny = ', Ny, '|' + write(*,'(20X,A23,I5,21X,A)') 'Nz = ', Nz, '|' + write(*,'(20X,A23,I5,21X,A)') 'Number of time steps = ', nt, '|' + write(*,'(20X,A23,I5,21X,A)') 'Number of Fourier modes = ', Nf, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,45X,A)') 'PROBLEM PARAMETERS:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Prandtl number (Pr) = ', Pr, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Rayleigh number (Ra) = ', Ra, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Initial Reynolds number (Re) = ', sqrt(Ra/(16.0_dp*Pr)), '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(5X,A,42X,A)') 'PHYSICAL PROBLEM SIZE:', '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'alpha = ', alpha, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Left coordinate of box = ', xL, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Right coordinate of box = ', xR, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of bottom wall = ', ybot, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Coordinate of top wall = ', ytop, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Box width = ', Lx, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Box height = ', Ly, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Aspect Ratio = ', Lx/Ly, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Integration time (T) = ', t_final, '|' + write(*,'(10X,A32,ES16.8,11X,A)') 'Time step size (Delta t) = ', dt, '|' + write(*,'(A69,A)') ' ','|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') '*********************************************************************|' + write(*,'(A70)') ' ' + + flush(6) + ! Print MPI division. + write(*,*) "Ny = ", Ny*num_procs, " divided among ", num_procs, " processors -> ", & + Ny, " rows per processor." +end if + +call MPI_BARRIER(MPI_COMM_WORLD, mpierror) + +write(*,*) "processor ", proc_id, "initialized with ", Ny, "rows." + +! Get solution with time integration +call imex_rk_MPI(proc_id_str, 1, .true., proc_id, num_procs) ! true causes writing of nusselt number. + +call MPI_Finalize(mpierror) + +end program time_loop + +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine init_fields(ex_Tptrb,Ra) + +use global + +implicit none + +logical, intent(out) :: ex_Tptrb +real(dp), intent(in) :: Ra +integer :: ii + +! Initialize fields. +if (Ra < 1710.0_dp) then + ex_Tptrb = .true. + do ii = 1,Nx + Tptrb(:,ii) = 0.5_dp*2.0_dp*cos(alpha*xp(ii))*cos(pi*yp/2.0_dp) + T(:,ii) = -yp + Tptrb(:,ii) + end do +else + ex_Tptrb = .false. + do ii = 1,Nx + T(:,ii) = -yp + 0.5_dp*2.0_dp*cos(alpha*xp(ii))*cos(pi*yp/2.0_dp) + end do +end if + +end subroutine init_fields + +!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +subroutine init_to_fourier(ex_Tptrb) + +use global + +implicit none + +logical, intent(in) :: ex_Tptrb +integer :: ii, jj + +! Bring temperature and velocity to Fourier space. +do jj = 1,Ny + tT = T(jj,:) + tuy = uy(jj,:) + call fftw_execute_dft(planT, tT, tT) + call fftw_execute_dft(planuy, tuy, tuy) + ! Truncate modes + do ii = 1,Nx + if (abs(kx(ii))/alpha >= Nf/2) then + tT(ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + tuy(ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + end if + end do + T(jj,:) = tT + uy(jj,:) = tuy + ! If temperature perturbation needed. + if (ex_Tptrb) then + tT = Tptrb(jj,:) + call fftw_execute_dft(planT, tT, tT) + ! Truncate modes + do ii = 1,Nx + if (abs(kx(ii))/alpha >= Nf/2) then + tT(ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) + end if + end do + Tptrb(jj,:) = tT + end if +end do +T = T / real(Nx, kind=dp) +uy = uy / real(Nx, kind=dp) + +if (ex_Tptrb) then + Tptrb = Tptrb / real(Nx, kind=dp) +end if + +! Calculate phi and ux from uy +do ii = 1,Nx + if (kx(ii) /= 0.0_dp) then + tmp_uy = uy(:,ii) + !ux(:,ii) = -CI*d1y(tmp_uy)/kx(ii) + ux(:,ii) = CI*d1y(tmp_uy)/kx(ii) + else if (kx(ii) == 0.0_dp) then + ux(:,ii) = cmplx(0.0_dp, 0.0_dp, kind=C_DOUBLE_COMPLEX) ! Zero mean flow! + end if + phi(:,ii) = -kx(ii)**2.0_dp*uy(:,ii) + d2y(uy(:,ii)) +end do + +end subroutine init_to_fourier \ No newline at end of file diff --git a/write_pack.f90 b/write_pack.f90 index 08f53d4..dd1311d 100755 --- a/write_pack.f90 +++ b/write_pack.f90 @@ -88,7 +88,7 @@ end subroutine write_mat_cmplx !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine write_vtk_structured_grid(iter) +subroutine write_vtk_structured_grid(iter, process_id) use global @@ -99,13 +99,17 @@ subroutine write_vtk_structured_grid(iter) character(9) :: fiter integer :: n integer :: ii, jj, kk +character(3), intent(in), optional :: process_id write(citer, "(I10)") 1000000000 + iter fiter = citer(2:10) n = (Nx+1)*Ny*Nz - -open(unit=2, file=vtkloc//vtkname//fiter//gtype, action="write", status="replace") +if (PRESENT(process_id)) then + open(unit=2, file=vtkloc//vtkname//"_P"//process_id//"_"//fiter//gtype, action="write", status="replace") +else + open(unit=2, file=vtkloc//vtkname//fiter//gtype, action="write", status="replace") +endif write(2, "(a)") "# vtk DataFile Version 3.1" write(2, "(a)") "Thermal Convection Data" write(2, "(a)") "ASCII" @@ -180,18 +184,21 @@ subroutine write_vtk_structured_grid(iter) end subroutine write_vtk_structured_grid !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine write_to_vtk(step, physical) - -integer, intent(in) :: step -logical, intent(in) :: physical -integer :: j +subroutine write_to_vtk(step, physical, process_id) -if (physical) then +integer, intent(in) :: step +logical, intent(in) :: physical +character(3), intent(in), optional :: process_id +integer :: j - call write_vtk_structured_grid(step) +if (physical) then + if (PRESENT(process_id)) then + call write_vtk_structured_grid(step, process_id) + else + call write_vtk_structured_grid(step) + endif else - do j = 1,Ny ! Bring everything to physical space tT = T(j,:) @@ -204,7 +211,11 @@ subroutine write_to_vtk(step, physical) ux(j,:) = tux uy(j,:) = tuy end do - call write_vtk_structured_grid(step) + if (PRESENT(process_id)) then + call write_vtk_structured_grid(step, process_id) + else + call write_vtk_structured_grid(step) + endif do j = 1,Ny ! Bring everything to Fourier space tT = T(j,:) @@ -220,7 +231,6 @@ subroutine write_to_vtk(step, physical) T = T / real(Nx, kind=dp) ux = ux / real(Nx, kind=dp) uy = uy / real(Nx, kind=dp) - end if end subroutine write_to_vtk