Skip to content
Draft

MPI #13

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
FC=gfortran
FC=mpif90
FFLAGS= -c -O3

LIBFLAGS2 = -L/usr/local/fftw/lib
LDFLAGS = -I/usr/local/fftw/include

#MAIN = Ra_loop
#MAIN = Ra_loop_no_opt
MAIN = time_loop
# MAIN = Ra_loop
# MAIN = Ra_loop_no_opt
# MAIN = time_loop
MAIN = time_loop_MPI

OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o
OBJECTS = fftw.o global.o allocate_vars.o precmod.o stringmod.o write_pack.o interpolation_pack.o mesh_pack.o imod.o bc_setup.o statistics.o time_integrators.o time_integrators_MPI.o jacobians.o gmres_pack.o nonlinear_solvers.o $(MAIN).o
PROGRAMS = $(MAIN).exe

all: $(PROGRAMS)

$(PROGRAMS) : $(OBJECTS)
$(FC) -fopenmp $(LDFLAGS) -o $(PROGRAMS) $(OBJECTS) $(LIBFLAGS1) -llapack -lblas $(LIBFLAGS2) -lfftw3 -lm
$(FC) -fopenmp -fallow-argument-mismatch $(LDFLAGS) -o $(PROGRAMS) $(OBJECTS) $(LIBFLAGS1) -llapack -lblas $(LIBFLAGS2) -lfftw3 -lm

fftw.o : fftw.f90
$(FC) $(FFLAGS) fftw.f90
Expand Down Expand Up @@ -52,6 +53,9 @@ statistics.o : statistics.f90
time_integrators.o : time_integrators.f90
$(FC) -fopenmp $(FFLAGS) time_integrators.f90

time_integrators_MPI.o : time_integrators_MPI.f90
$(FC) -fopenmp -fallow-argument-mismatch $(FFLAGS) time_integrators_MPI.f90

jacobians.o : jacobians.f90
$(FC) $(FFLAGS) jacobians.f90

Expand Down
218 changes: 218 additions & 0 deletions bc_setup.f90
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ module bc_setup

use global
use write_pack
use mesh_pack

implicit none
! include 'mpif.h'

contains

Expand Down Expand Up @@ -138,4 +140,220 @@ subroutine init_bc(aii)

end subroutine init_bc

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

subroutine init_bc_MPI(aii, proc_id, num_procs, proc_id_str)

implicit none

real(dp), intent(in) :: aii
integer, intent(in) :: proc_id, num_procs
character(3), optional, intent(in) :: proc_id_str
real(dp) :: pnu, qnu
real(dp) :: wavex

real(dp), allocatable, dimension(:) :: d, dl, du
real(dp), allocatable, dimension(:) :: phi1_b, phi1_t
real(dp), allocatable, dimension(:) :: g1_total, g2_total, g3_total
real(dp), allocatable, dimension(:) :: recv_col, recv_col2, last_row_recv
real(dp), allocatable, dimension(:) :: phi2_b, phi2_t
real(dp), allocatable, dimension(:,:) :: Fphi12, FV12


integer :: ii, jj, i, total_ny, otherproc, start
integer :: info, mpierror

! Only do allocation on main node.
if(proc_id == 0) then
total_ny = Ny * num_procs

! Allocate local variables
allocate(Fphi12(total_ny-2,2), FV12(total_ny-2,2), stat=alloc_err)
call check_alloc_err(alloc_err)
allocate(d(total_ny-2), stat=alloc_err)
call check_alloc_err(alloc_err)
allocate(dl(total_ny-3), stat=alloc_err)
call check_alloc_err(alloc_err)
allocate(du(total_ny-3), stat=alloc_err)
call check_alloc_err(alloc_err)
allocate(phi1_b(Nx), phi1_t(Nx), stat=alloc_err)
call check_alloc_err(alloc_err)
allocate(phi2_b(Nx), phi2_t(Nx), stat=alloc_err)
call check_alloc_err(alloc_err)

! Need to pull all of g1, g2, g3 onto main node for calculations.
allocate(g1_total(total_ny), g2_total(total_ny), g3_total(total_ny), stat=alloc_err)
call check_alloc_err(alloc_err)

! Receive g1,g2,g3 from all other nodes.
do otherproc = 1,num_procs-1
start = otherproc * Ny + 1
call MPI_RECV(g1_total(start), Ny, MPI_DOUBLE, otherproc, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
call MPI_RECV(g2_total(start), Ny, MPI_DOUBLE, otherproc, 43, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
call MPI_RECV(g3_total(start), Ny, MPI_DOUBLE, otherproc, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
end do
g1_total(1:Ny) = g1
g2_total(1:Ny) = g2
g3_total(1:Ny) = g3

Fphi12 = 0.0_dp
FV12 = 0.0_dp
d = 0.0_dp
dl = 0.0_dp
du = 0.0_dp
phi1_b = 1.0_dp
phi1_t = 0.0_dp
phi2_b = 0.0_dp
phi2_t = 1.0_dp

phi1_b(1) = 0.0_dp
phi2_t(1) = 0.0_dp

! Compute phi1 and phi2 from D2 phi_i = 0 where
! D2 = -(alpha*kx)^2 + dy^2 and phi1 satisfies the BCs
! phi1 = 1 at the bottom wall and phi1 = 0 at the top wall.
! phi2 satisfies phi2 = 0 at the bottom wall and phi2 = 1
! at the top wall.

! A comment on how these arrays are formed. Since we are only
! dealing with Dirichlet conditions at the walls, we only need arrays of size
! Ny-2, i.e. we don't need the values at the walls. Thus, in the calculations
! below, F(1) would correspond to point number 2 and F(Ny-2) would correspond
! to point Ny-1.

! Some parameters
pnu = nu0*dt*aii

do ii = 1,Nx

Fphi12 = 0.0_dp

if (abs(kx(ii)/alpha) > Nf/2) then
phi1_b(ii) = 0.0_dp
phi2_t(ii) = 0.0_dp
end if

wavex = kx(ii)

qnu = 1.0_dp + pnu*wavex**2.0_dp

do jj = 2,total_ny-1
d(jj-1) = qnu - pnu*g2_total(jj)
end do

do jj = 2,total_ny-2
du(jj-1) = -pnu*g3_total(jj)
end do

do jj = 3,total_ny-1
dl(jj-2) = -pnu*g1_total(jj)
end do

Fphi12(1,1) = pnu*g1_total(2)*phi1_b(ii)
Fphi12(total_ny-2,2) = pnu*g3_total(total_ny-1)*phi2_t(ii)

! Solve the system Aphi phi = Fphi
call dgtsv(total_ny-2, 2, dl, d, du, Fphi12, total_ny-2, info)

! Put phi1 and phi2 together in main node.
phi1(1,ii) = phi1_b(ii)
phi1(2:Ny,ii) = Fphi12(1:Ny-1,1)
phi2(1,ii) = 0.0_dp
phi2(2:Ny,ii) = Fphi12(1:Ny-1,2)

! Send phi1 and phi2 columns to each node.
do otherproc = 1,num_procs-1
start = otherproc * Ny
call MPI_SEND(Fphi12(start,1), Ny, MPI_DOUBLE, otherproc, 45, MPI_COMM_WORLD, mpierror)
call MPI_SEND(Fphi12(start,2), Ny, MPI_DOUBLE, otherproc, 46, MPI_COMM_WORLD, mpierror)
end do

! Getting V1 and V2 initialized.
FV12(:,1) = Fphi12(:,1)
FV12(:,2) = Fphi12(:,2)

do jj = 2,total_ny-1
d(jj-1) = -wavex**2.0_dp + g2_total(jj)
end do

do jj = 2,total_ny-2
du(jj-1) = g3_total(jj)
end do

do jj = 3,total_ny-1
dl(jj-2) = g1_total(jj)
end do

call dgtsv(total_ny-2, 2, dl, d, du, FV12, total_ny-2, info)

! Place V1, V2 in main node memory.
V1(2:Ny,ii) = FV12(1:Ny-1,1)
V2(2:Ny,ii) = FV12(1:Ny-1,2)

! Send V1 and V1 columns to each node.
do otherproc = 1,num_procs-1
start = otherproc * Ny
call MPI_SEND(FV12(start,1), Ny, MPI_DOUBLE, otherproc, 48, MPI_COMM_WORLD, mpierror)
call MPI_SEND(FV12(start,2), Ny, MPI_DOUBLE, otherproc, 49, MPI_COMM_WORLD, mpierror)
end do

end do

! Send last row to final proc.
call MPI_SEND(phi2_t(1), Nx, MPI_DOUBLE, num_procs-1, 47, MPI_COMM_WORLD, mpierror)

else
! Send g1, g2, g3 to first node.
call MPI_SEND(g1(1), Ny, MPI_DOUBLE, 0, 42, MPI_COMM_WORLD, mpierror)
call MPI_SEND(g2(1), Ny, MPI_DOUBLE, 0, 43, MPI_COMM_WORLD, mpierror)
call MPI_SEND(g3(1), Ny, MPI_DOUBLE, 0, 44, MPI_COMM_WORLD, mpierror)

allocate(recv_col(Ny), stat=alloc_err)
call check_alloc_err(alloc_err)
allocate(recv_col2(Ny), stat=alloc_err)
call check_alloc_err(alloc_err)

! Receive cols of phi1 and phi2.
do ii = 1,Nx
call MPI_RECV(recv_col, Ny, MPI_DOUBLE, 0, 45, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
call MPI_RECV(recv_col2, Ny, MPI_DOUBLE, 0, 46, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
phi1(1:Ny, ii) = recv_col
phi2(1:Ny, ii) = recv_col2
end do

! Receive cols of V1 and V2.
do ii = 1,Nx
call MPI_RECV(recv_col, Ny, MPI_DOUBLE, 0, 48, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
call MPI_RECV(recv_col2, Ny, MPI_DOUBLE, 0, 49, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
V1(1:Ny, ii) = recv_col
V2(1:Ny, ii) = recv_col2
end do

! Handle boundary of last processor.
if (proc_id == num_procs - 1) then
allocate(last_row_recv(Nx), stat=alloc_err)
call check_alloc_err(alloc_err)
call MPI_RECV(last_row_recv, Nx, MPI_DOUBLE, 0, 47, MPI_COMM_WORLD, MPI_STATUS_IGNORE, mpierror)
phi1(Ny,1:Nx) = 0.0_dp
phi2(Ny,1:Nx) = last_row_recv
V1(Ny, 1:Nx) = 0.0_dp
V2(Ny, 1:Nx) = 0.0_dp
end if
end if

! Calculate wall derivative on first and last processor.
if (proc_id == 0) then
do ii = 1,Nx
dyv1_B(ii) = h1(1)*V1(1,ii) + h2(1)*V1(2,ii) + h3(1)*V1(3,ii)
dyv2_B(ii) = h1(1)*V2(1,ii) + h2(1)*V2(2,ii) + h3(1)*V2(3,ii)
end do
else if (proc_id == num_procs - 1) then
do ii = 1,Nx
dyv1_T(ii) = h1(Ny)*V1(Ny-2,ii) + h2(Ny)*V1(Ny-1,ii) + h3(Ny)*V1(Ny,ii)
dyv2_T(ii) = h1(Ny)*V2(Ny-2,ii) + h2(Ny)*V2(Ny-1,ii) + h3(Ny)*V2(Ny,ii)
end do
end if

end subroutine init_bc_MPI

end module bc_setup
Loading