From f9867bafdafa346a30cb13c253984fda0605acd5 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Wed, 27 Dec 2023 19:17:53 -0500 Subject: [PATCH 01/15] feat(subdomain): make fully asynchronous --- example/time-paradigm.f90 | 18 ++++++++++++------ src/matcha/subdomain_m.f90 | 6 +++--- src/matcha/subdomain_s.f90 | 24 +++++++++--------------- test/subdomain_test_m.f90 | 17 +++++++++++------ 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/example/time-paradigm.f90 b/example/time-paradigm.f90 index 1f34b8f..494d213 100644 --- a/example/time-paradigm.f90 +++ b/example/time-paradigm.f90 @@ -12,7 +12,7 @@ program time_paradigm_m character(len=:), allocatable :: steps_string, resolution_string type(command_line_t) command_line integer(int64) counter_start, counter_end, clock_rate - integer :: steps=200, resolution=64 + integer :: steps=300, resolution=128 associate(me => this_image()) if (command_line%argument_present(["--help"])) then @@ -56,16 +56,18 @@ function functional_programming_time() result(system_time) call T%define(side=1., boundary_val=T_boundary, internal_val=T_internal_initial, n=resolution) - call system_clock(t_start_functional) - associate(dt => T%dx()*T%dy()/(4*alpha)) + call system_clock(t_start_functional) + functional_programming: & do step = 1, steps + sync all T = T + dt * alpha * .laplacian. T end do functional_programming + + call system_clock(t_end_functional, clock_rate) end associate - call system_clock(t_end_functional, clock_rate) system_time = real(t_end_functional - t_start_functional)/real(clock_rate) associate(L_infinity_norm => maxval(abs(T%values() - T_steady))) @@ -80,16 +82,20 @@ function procedural_programming_time() result(system_time) real system_time type(subdomain_t) T + call T%define(side=1., boundary_val=0., internal_val=1., n=resolution) + associate(dt => T%dx()*T%dy()/(4*alpha)) - call T%define(side=1., boundary_val=0., internal_val=1., n=resolution) call system_clock(t_start_procedural) + procedural_programming: & do step = 1, steps + sync all call T%step(alpha*dt) end do procedural_programming + + call system_clock(t_end_procedural, clock_rate) end associate - call system_clock(t_end_procedural, clock_rate) system_time = real(t_end_procedural - t_start_procedural)/real(clock_rate) associate(L_infinity_norm => maxval(abs(T%values() - T_steady))) diff --git a/src/matcha/subdomain_m.f90 b/src/matcha/subdomain_m.f90 index 72c9cbf..5524cae 100644 --- a/src/matcha/subdomain_m.f90 +++ b/src/matcha/subdomain_m.f90 @@ -14,14 +14,14 @@ module subdomain_m generic :: operator(.laplacian.) => laplacian generic :: operator(*) => multiply generic :: operator(+) => add - generic :: assignment(=) => assign_and_sync + generic :: assignment(=) => assign_ procedure dx procedure dy procedure dz procedure values procedure, private :: laplacian procedure, private :: add - procedure, private :: assign_and_sync + procedure, private :: assign_ end type interface @@ -83,7 +83,7 @@ pure module function add(lhs, rhs) result(total) type(subdomain_t) total end function - module subroutine assign_and_sync(lhs, rhs) + module subroutine assign_(lhs, rhs) implicit none class(subdomain_t), intent(out) :: lhs type(subdomain_t), intent(in) :: rhs diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index 2ca5eb9..467a693 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -55,7 +55,6 @@ allocate(halo_x(west:east, ny, nz)[*]) if (me>1) halo_x(east,:,:)[me-1] = self%s_(1,:,:) if (me1) halo_x(east,:,:)[me-1] = rhs%s_(1,:,:) - if (me1) halo_x(east,:,:)[me-1] = s(1,:,:) + if (me1) halo_x(east,:,:)[me-1] = s(1,:,:) - if (me T%dx()*T%dy()*T%dz()/(4*alpha)) do step = 1, steps + sync all T = T + dt * alpha * .laplacian. T end do end associate @@ -174,7 +177,7 @@ function correct_steady_state() result(test_passes) function functional_matches_procedural() result(test_passes) logical test_passes real, parameter :: tolerance = 1.E-06 - integer, parameter :: steps = 6000, n=21 + integer, parameter :: steps = 6000, n=32 real, parameter :: alpha = 1. real, parameter :: side=1., boundary_val=1., internal_val=2. @@ -195,6 +198,7 @@ function T_functional() associate(dt => T%dx()*T%dy()/(4*alpha)) do step = 1, steps + sync all T = T + dt * alpha * .laplacian. T end do end associate @@ -211,6 +215,7 @@ function T_procedural() associate(dt => T%dx()*T%dy()/(4*alpha)) do step = 1, steps + sync all call T%step(alpha*dt) end do end associate From 837309ff208d71d2e0c496c9c691df36de9c194e Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Sun, 31 Dec 2023 11:31:38 -0500 Subject: [PATCH 02/15] fix(subdomain/test/example): rm halo --- example/time-paradigm.f90 | 4 ++-- src/matcha/subdomain_m.f90 | 4 ++-- src/matcha/subdomain_s.f90 | 49 +++++++++++++++++++------------------- test/subdomain_test_m.f90 | 9 +++---- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/example/time-paradigm.f90 b/example/time-paradigm.f90 index 494d213..ce369d2 100644 --- a/example/time-paradigm.f90 +++ b/example/time-paradigm.f90 @@ -52,7 +52,7 @@ function functional_programming_time() result(system_time) integer(int64) t_start_functional, t_end_functional, clock_rate integer step real system_time - type(subdomain_t) T + type(subdomain_t), save :: T[*] call T%define(side=1., boundary_val=T_boundary, internal_val=T_internal_initial, n=resolution) @@ -80,7 +80,7 @@ function procedural_programming_time() result(system_time) integer(int64) t_start_procedural, t_end_procedural, clock_rate integer step real system_time - type(subdomain_t) T + type(subdomain_t), save :: T[*] call T%define(side=1., boundary_val=0., internal_val=1., n=resolution) diff --git a/src/matcha/subdomain_m.f90 b/src/matcha/subdomain_m.f90 index 5524cae..5faa51b 100644 --- a/src/matcha/subdomain_m.f90 +++ b/src/matcha/subdomain_m.f90 @@ -36,7 +36,7 @@ module subroutine define(side, boundary_val, internal_val, n, self) module subroutine step(alpha_dt, self) implicit none real, intent(in) :: alpha_dt - class(subdomain_t), intent(inout) :: self + class(subdomain_t), intent(inout) :: self[*] end subroutine pure module function values(self) result(my_values) @@ -65,7 +65,7 @@ pure module function dz(self) result(my_dz) pure module function laplacian(rhs) result(laplacian_rhs) implicit none - class(subdomain_t), intent(in) :: rhs + class(subdomain_t), intent(in) :: rhs[*] type(subdomain_t) laplacian_rhs end function diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index 467a693..a1dc8c8 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -4,9 +4,6 @@ use intrinsic_array_m, only : intrinsic_array_t implicit none - real, allocatable :: halo_x(:,:,:)[:] - integer, parameter :: west=1, east=2 - type(data_partition_t) data_partition real dx_, dy_, dz_ @@ -50,11 +47,6 @@ if (me == 1) self%s_(1 , :, :) = boundary_val ! minimum x boundary if (me == num_subdomains) self%s_(my_nx, :, :) = boundary_val ! maximum x boundary - - if (allocated(halo_x)) deallocate(halo_x) - allocate(halo_x(west:east, ny, nz)[*]) - if (me>1) halo_x(east,:,:)[me-1] = self%s_(1,:,:) - if (me0,"laplacian: easternmost subdomain too small") do concurrent(j=2:ny-1, k=2:nz-1) @@ -125,7 +124,6 @@ module procedure assign_ call assert(allocated(rhs%s_), "subdomain_t%assign_: allocated(rhs%s_)") lhs%s_ = rhs%s_ - call exchange_halo(rhs%s_) end procedure module procedure values @@ -133,26 +131,19 @@ my_values = self%s_ end procedure - subroutine exchange_halo(s) - real, intent(in) :: s(:,:,:) - if (me>1) halo_x(east,:,:)[me-1] = s(1,:,:) - if (me0,"laplacian: easternmost subdomain too small") if (.not. allocated(increment)) allocate(increment(my_nx,ny,nz)) + sync all call internal_points(increment) - call edge_points(increment) + call edge_points(self, increment) call apply_boundary_condition(increment) self%s_ = self%s_ + increment - call exchange_halo(self%s_) contains @@ -169,14 +160,17 @@ subroutine internal_points(ds) end do end subroutine - subroutine edge_points(ds) + subroutine edge_points(self, ds) + type(subdomain_t), intent(in) :: self[*] real, intent(inout) :: ds(:,:,:) real, allocatable :: halo_west(:,:), halo_east(:,:) integer i, j, k - halo_west = merge(halo_x(west,:,:), self%s_(1, :,:), me/=1) - halo_east = merge(halo_x(east,:,:), self%s_(my_nx,:,:), me/=num_subdomains) - + if (me==1) then + halo_west = self%s_(1,:,:) + else + halo_west = self[me-1]%s_(ubound(self[me-1]%s_,1),:,:) + end if i = my_internal_west do concurrent(j=2:ny-1,k=2:nz-1) ds(i,j,k) = alpha_dt*( & @@ -186,6 +180,11 @@ subroutine edge_points(ds) ) end do + if (me==1) then + halo_east = self%s_(my_nx,:,:) + else + halo_east = self[me+1]%s_(1,:,:) + end if i = my_internal_east do concurrent(j=2:ny-1, k=2:nz-1) ds(i,j,k) = alpha_dt*( & diff --git a/test/subdomain_test_m.f90 b/test/subdomain_test_m.f90 index 3f5140a..2c48222 100644 --- a/test/subdomain_test_m.f90 +++ b/test/subdomain_test_m.f90 @@ -63,7 +63,8 @@ subroutine output(v) function concave_laplacian() result(test_passes) logical test_passes - type(subdomain_t) f, laplacian_f + type(subdomain_t), save :: f[*] + type(subdomain_t) :: laplacian_f real, allocatable :: lap_f_vals(:,:,:) call f%define(side=1., boundary_val=1., internal_val=2., n=32) ! internally constant subdomain with a step down at all surfaces @@ -155,7 +156,7 @@ function concave_laplacian() result(test_passes) function correct_steady_state() result(test_passes) logical test_passes - type(subdomain_t) T + type(subdomain_t), save :: T[*] real, parameter :: T_boundary = 1., T_initial = 2., tolerance = 5.E-03, T_steady = T_boundary, alpha = 1. integer, parameter :: steps = 25000 integer step @@ -191,7 +192,7 @@ function functional_matches_procedural() result(test_passes) function T_functional() real, allocatable :: T_functional(:,:,:) - type(subdomain_t) T + type(subdomain_t), save :: T[*] integer step call T%define(side, boundary_val, internal_val, n) @@ -208,7 +209,7 @@ function T_functional() function T_procedural() real, allocatable :: T_procedural(:,:,:) - type(subdomain_t) T + type(subdomain_t), save :: T[*] integer step call T%define(side, boundary_val, internal_val, n) From 345f5b8072088cd180fe774ae854442207e34061 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 10:49:16 -0500 Subject: [PATCH 03/15] fix(subdomain): non-type-bound laplacian operator --- example/time-paradigm.f90 | 4 +- src/matcha/subdomain_m.f90 | 83 +++++++++++++++++++++++++++++++++----- src/matcha/subdomain_s.f90 | 63 ----------------------------- test/subdomain_test_m.f90 | 4 +- 4 files changed, 77 insertions(+), 77 deletions(-) diff --git a/example/time-paradigm.f90 b/example/time-paradigm.f90 index ce369d2..662be3b 100644 --- a/example/time-paradigm.f90 +++ b/example/time-paradigm.f90 @@ -2,7 +2,7 @@ ! Terms of use are as specified in LICENSE.txt program time_paradigm_m !! Time various alternative programming paradigms - use subdomain_m, only : subdomain_t + use subdomain_m, only : subdomain_t, operator(.laplacian.), step use assert_m, only : assert use sourcery_m, only : string_t, file_t, command_line_t, bin_t, csv use iso_fortran_env, only : int64 @@ -90,7 +90,7 @@ function procedural_programming_time() result(system_time) procedural_programming: & do step = 1, steps sync all - call T%step(alpha*dt) + call step(alpha*dt, T) end do procedural_programming call system_clock(t_end_procedural, clock_rate) diff --git a/src/matcha/subdomain_m.f90 b/src/matcha/subdomain_m.f90 index 5faa51b..605ead6 100644 --- a/src/matcha/subdomain_m.f90 +++ b/src/matcha/subdomain_m.f90 @@ -1,17 +1,18 @@ module subdomain_m + use assert_m, only : assert implicit none private public :: subdomain_t + public :: operator(.laplacian.) + public :: step type subdomain_t private real, allocatable :: s_(:,:,:) contains procedure, pass(self) :: define - procedure, pass(self) :: step procedure, pass(rhs) :: multiply - generic :: operator(.laplacian.) => laplacian generic :: operator(*) => multiply generic :: operator(+) => add generic :: assignment(=) => assign_ @@ -19,11 +20,21 @@ module subdomain_m procedure dy procedure dz procedure values - procedure, private :: laplacian procedure, private :: add procedure, private :: assign_ end type + interface operator(.laplacian.) + + module procedure laplacian + !pure module function laplacian(rhs) result(laplacian_rhs) + ! implicit none + ! type(subdomain_t), intent(in) :: rhs[*] + ! type(subdomain_t) laplacian_rhs + !end function + + end interface + interface module subroutine define(side, boundary_val, internal_val, n, self) @@ -36,7 +47,7 @@ module subroutine define(side, boundary_val, internal_val, n, self) module subroutine step(alpha_dt, self) implicit none real, intent(in) :: alpha_dt - class(subdomain_t), intent(inout) :: self[*] + type(subdomain_t), intent(inout) :: self[*] end subroutine pure module function values(self) result(my_values) @@ -63,12 +74,6 @@ pure module function dz(self) result(my_dz) real my_dz end function - pure module function laplacian(rhs) result(laplacian_rhs) - implicit none - class(subdomain_t), intent(in) :: rhs[*] - type(subdomain_t) laplacian_rhs - end function - pure module function multiply(lhs, rhs) result(product) implicit none class(subdomain_t), intent(in) :: rhs @@ -91,4 +96,62 @@ module subroutine assign_(lhs, rhs) end interface + real dx_, dy_, dz_ + integer my_nx, nx, ny, nz, me, num_subdomains, my_internal_west, my_internal_east + +contains + + pure module function laplacian(rhs) result(laplacian_rhs) + type(subdomain_t), intent(in) :: rhs[*] + type(subdomain_t) laplacian_rhs + + integer i, j, k + real, allocatable :: halo_west(:,:), halo_east(:,:) + + call assert(allocated(rhs%s_), "subdomain_t%laplacian: allocated(rhs%s_)") + + allocate(laplacian_rhs%s_, mold=rhs%s_) + + if (me==1) then + halo_west = rhs%s_(1,:,:) + else + halo_west = rhs[me-1]%s_(ubound(rhs[me-1]%s_,1),:,:) + end if + i = my_internal_west + call assert(i+1<=my_nx,"laplacian: westernmost subdomain too small") + do concurrent(j=2:ny-1, k=2:nz-1) + laplacian_rhs%s_(i,j,k) = ( halo_west(j,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & + (rhs%s_(i,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & + (rhs%s_(i,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 + end do + + do concurrent(i=my_internal_west+1:my_internal_east-1, j=2:ny-1, k=2:nz-1) + laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & + (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & + (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 + end do + + if (me==1) then + halo_east = rhs%s_(1,:,:) + else + halo_east = rhs[me+1]%s_(lbound(rhs[me+1]%s_,1),:,:) + end if + i = my_internal_east + call assert(i-1>0,"laplacian: easternmost subdomain too small") + do concurrent(j=2:ny-1, k=2:nz-1) + laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + halo_east(j ,k ))/dx_**2 + & + (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & + (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 + end do + + laplacian_rhs%s_(:, 1,:) = 0. + laplacian_rhs%s_(:,ny,:) = 0. + laplacian_rhs%s_(:,:, 1) = 0. + laplacian_rhs%s_(:,:,nz) = 0. + if (me==1) laplacian_rhs%s_(1,:,:) = 0. + if (me==num_subdomains) laplacian_rhs%s_(my_nx,:,:) = 0. + + end function + + end module diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index a1dc8c8..8b4e953 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -1,13 +1,10 @@ submodule(subdomain_m) subdomain_s use sourcery_m, only : data_partition_t - use assert_m, only : assert use intrinsic_array_m, only : intrinsic_array_t implicit none type(data_partition_t) data_partition - real dx_, dy_, dz_ - integer my_nx, nx, ny, nz, me, num_subdomains, my_internal_west, my_internal_east real, allocatable :: increment(:,:,:) contains @@ -61,66 +58,6 @@ my_dz = dz_ end procedure - module procedure laplacian - - integer i, j, k - real, allocatable :: halo_west(:,:), halo_east(:,:) - - call assert(allocated(rhs%s_), "subdomain_t%laplacian: allocated(rhs%s_)") - - allocate(laplacian_rhs%s_(my_nx, ny, nz)) - - if (me==1) then - halo_west = rhs%s_(1,:,:) - else - halo_west = rhs[me-1]%s_(ubound(rhs[me-1]%s_,1),:,:) - end if - i = my_internal_west - call assert(i+1<=my_nx,"laplacian: westernmost subdomain too small") - do concurrent(j=2:ny-1, k=2:nz-1) - laplacian_rhs%s_(i,j,k) = ( halo_west(j,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & - (rhs%s_(i,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & - (rhs%s_(i,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 - end do - - do concurrent(i=my_internal_west+1:my_internal_east-1, j=2:ny-1, k=2:nz-1) - laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & - (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & - (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 - end do - - if (me==1) then - halo_east = rhs%s_(1,:,:) - else - halo_east = rhs[me+1]%s_(lbound(rhs[me+1]%s_,1),:,:) - end if - i = my_internal_east - call assert(i-1>0,"laplacian: easternmost subdomain too small") - do concurrent(j=2:ny-1, k=2:nz-1) - laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + halo_east(j ,k ))/dx_**2 + & - (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & - (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 - end do - - laplacian_rhs%s_(:, 1,:) = 0. - laplacian_rhs%s_(:,ny,:) = 0. - laplacian_rhs%s_(:,:, 1) = 0. - laplacian_rhs%s_(:,:,nz) = 0. - if (me==1) laplacian_rhs%s_(1,:,:) = 0. - if (me==num_subdomains) laplacian_rhs%s_(my_nx,:,:) = 0. - - end procedure - - module procedure multiply - call assert(allocated(rhs%s_), "subdomain_t%multiply: allocated(rhs%s_)") - product%s_ = lhs * rhs%s_ - end procedure - - module procedure add - call assert(allocated(rhs%s_), "subdomain_t%add: allocated(rhs%s_)") - total%s_ = lhs%s_ + rhs%s_ - end procedure - module procedure assign_ call assert(allocated(rhs%s_), "subdomain_t%assign_: allocated(rhs%s_)") lhs%s_ = rhs%s_ diff --git a/test/subdomain_test_m.f90 b/test/subdomain_test_m.f90 index 2c48222..64c8ef9 100644 --- a/test/subdomain_test_m.f90 +++ b/test/subdomain_test_m.f90 @@ -3,7 +3,7 @@ module subdomain_test_m !! Define subdomain tests and procedures required for reporting results use sourcery_m, only : test_t, test_result_t - use subdomain_m, only : subdomain_t + use subdomain_m, only : subdomain_t, operator(.laplacian.), step use assert_m, only : assert implicit none @@ -217,7 +217,7 @@ function T_procedural() associate(dt => T%dx()*T%dy()/(4*alpha)) do step = 1, steps sync all - call T%step(alpha*dt) + call step(alpha*dt, T) end do end associate From 977d4c5b18fc3782d2e5d794ec2e9e62ebf66f23 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 10:56:11 -0500 Subject: [PATCH 04/15] chore(do_concurrent_m): rm unsued use association --- src/matcha/do_concurrent_m.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matcha/do_concurrent_m.f90 b/src/matcha/do_concurrent_m.f90 index 21869c1..b13b26a 100644 --- a/src/matcha/do_concurrent_m.f90 +++ b/src/matcha/do_concurrent_m.f90 @@ -1,6 +1,6 @@ module do_concurrent_m use iso_c_binding, only : c_double, c_int - use t_cell_collection_m, only : t_cell_collection_t, t_cell_collection_bind_C_t + use t_cell_collection_m, only : t_cell_collection_bind_C_t implicit none private public :: do_concurrent_sampled_speeds, do_concurrent_my_velocities, do_concurrent_k, do_concurrent_speeds From 472be32ed5222c250ef5fa464ed780f177a8b956 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 11:30:23 -0500 Subject: [PATCH 05/15] fix(bind(C)): pre-allocate arguments --- src/matcha/distribution_s.F90 | 10 ++++++++-- src/matcha/do_concurrent_m.f90 | 10 +++++----- src/matcha/do_concurrent_s.f90 | 14 +++++--------- src/matcha/output_s.f90 | 7 +++++++ 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/matcha/distribution_s.F90 b/src/matcha/distribution_s.F90 index fff76fb..d4d14f4 100644 --- a/src/matcha/distribution_s.F90 +++ b/src/matcha/distribution_s.F90 @@ -47,8 +47,11 @@ pure function monotonically_increasing(f) result(monotonic) "distribution_t%cumulative_distribution: allocated(cumulative_distribution_)") call assert(allocated(self%vel_), "distribution_t%cumulative_distribution: allocated(vel_)") - ! Sample from the distribution - call do_concurrent_sampled_speeds(speeds, self%vel_, self%cumulative_distribution(), sampled_speeds) + ! Sample from the distribution + associate(ncells => size(speeds,1), nsteps => size(speeds,2)) + allocate(sampled_speeds(ncells,nsteps)) + call do_concurrent_sampled_speeds(speeds, self%vel_, self%cumulative_distribution(), sampled_speeds) + end associate associate(nsteps => size(speeds,2)) @@ -63,6 +66,9 @@ pure function monotonically_increasing(f) result(monotonic) end associate end associate + if(allocated(my_velocities)) deallocate(my_velocities) + allocate(my_velocities, mold=dir) + call do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) end associate diff --git a/src/matcha/do_concurrent_m.f90 b/src/matcha/do_concurrent_m.f90 index b13b26a..db1b04f 100644 --- a/src/matcha/do_concurrent_m.f90 +++ b/src/matcha/do_concurrent_m.f90 @@ -11,20 +11,20 @@ module do_concurrent_m pure module subroutine do_concurrent_sampled_speeds(speeds, vel, cumulative_distribution, sampled_speeds) bind(C) implicit none real(c_double), intent(in) :: speeds(:,:), vel(:), cumulative_distribution(:) - real(c_double), intent(out), allocatable :: sampled_speeds(:,:) + real(c_double), intent(out) :: sampled_speeds(:,:) end subroutine pure module subroutine do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) bind(C) implicit none integer(c_int), intent(in) :: nsteps real(c_double), intent(in) :: dir(:,:,:), sampled_speeds(:,:) - real(c_double), intent(out), allocatable :: my_velocities(:,:,:) + real(c_double), intent(out) :: my_velocities(:,:,:) end subroutine pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) implicit none real(c_double), intent(in) :: speeds(:), vel(:) - integer(c_int), intent(out), allocatable :: k(:) + integer(c_int), intent(out) :: k(:) end subroutine pure module subroutine & @@ -32,13 +32,13 @@ pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) implicit none integer(c_int), intent(in) :: nintervals, speed, freq, k(:) real(c_double), intent(in) :: emp_distribution(:,:) - real(c_double), intent(out), allocatable :: output_distribution(:,:) + real(c_double), intent(out) :: output_distribution(:,:) end subroutine module subroutine do_concurrent_speeds(history, speeds) bind(C) implicit none type(t_cell_collection_bind_C_t), intent(in) :: history(:) - real(c_double), intent(out), allocatable :: speeds(:) + real(c_double), intent(out) :: speeds(:) end subroutine end interface diff --git a/src/matcha/do_concurrent_s.f90 b/src/matcha/do_concurrent_s.f90 index 80c37e7..33c0218 100644 --- a/src/matcha/do_concurrent_s.f90 +++ b/src/matcha/do_concurrent_s.f90 @@ -1,4 +1,5 @@ submodule(do_concurrent_m) do_concurrent_s + use assert_m, only : assert use iso_c_binding, only : c_f_pointer implicit none @@ -7,9 +8,10 @@ module procedure do_concurrent_sampled_speeds integer cell, step + + call assert(all(shape(sampled_speeds)==shape(speeds)), "do_concurrent_sampled_speeds: {sampled_,}speeds shape match") associate(ncells => size(speeds,1), nsteps => size(speeds,2)) - allocate(sampled_speeds(ncells,nsteps)) do concurrent(cell = 1:ncells, step = 1:nsteps) associate(k => findloc(speeds(cell,step) >= cumulative_distribution, value=.false., dim=1)-1) sampled_speeds(cell,step) = vel(k) @@ -23,9 +25,8 @@ integer step - if(allocated(my_velocities)) deallocate(my_velocities) - allocate(my_velocities, mold=dir) - + call assert(all(shape(sampled_speeds)==shape(my_velocities)), "do_concurrent_my_velocities: argument shape match") + do concurrent(step=1:nsteps) my_velocities(:,step,1) = sampled_speeds(:,step)*dir(:,step,1) my_velocities(:,step,2) = sampled_speeds(:,step)*dir(:,step,2) @@ -39,8 +40,6 @@ integer i associate(nspeeds => size(speeds)) - if(allocated(k)) deallocate(k) - allocate(k(nspeeds)) do concurrent(i = 1:nspeeds) k(i) = findloc(speeds(i) >= vel, value=.false., dim=1)-1 end do @@ -51,8 +50,6 @@ integer i - if(allocated(output_distribution)) deallocate(output_distribution) - allocate(output_distribution(nintervals,2)) output_distribution(:,freq) = 0.d0 output_distribution(:,speed) = emp_distribution(:,speed) do concurrent(i = 1:size(output_distribution,1)) @@ -79,7 +76,6 @@ end do associate(t => history%time) - allocate(speeds(ncells*(npositions-1))) do concurrent(i = 1:npositions-1, j = 1:ncells) associate( & u => (x(i+1,j,:) - x(i,j,:))/(t(i+1) - t(i)), & diff --git a/src/matcha/output_s.f90 b/src/matcha/output_s.f90 index 1bac989..57318b1 100644 --- a/src/matcha/output_s.f90 +++ b/src/matcha/output_s.f90 @@ -24,12 +24,19 @@ integer, parameter :: speed=1, freq=2 ! subscripts for speeds and frequencies + associate(npositions => size(history), ncells => history(1)%positions_shape(1)) + allocate(speeds(ncells*(npositions-1))) + end associate call do_concurrent_speeds(t_cell_collection_bind_C_t(self%history_), speeds) associate(emp_distribution => self%input_%sample_distribution()) associate(nintervals => size(emp_distribution(:,1)), dvel_half => (emp_distribution(2,speed)-emp_distribution(1,speed))/2.d0) vel = [emp_distribution(1,speed) - dvel_half, [(emp_distribution(i,speed) + dvel_half, i=1,nintervals)]] + if (allocated(k)) deallocate(k) + allocate(k(nspeeds)) call do_concurrent_k(speeds, vel, k) + if(allocated(output_distribution)) deallocate(output_distribution) + allocate(output_distribution(nintervals,2)) call do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) output_distribution(:,freq) = output_distribution(:,freq)/sum(output_distribution(:,freq)) end associate From 7d5d427d99a539ca1e12f432b55c4e798f3e7715 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 17:10:54 -0500 Subject: [PATCH 06/15] fix(do_concurrent): work around NAG compiler bug nagfor 7.1 Build 7143 requires a separate module procedure definition with the bind(C) attribute to repeat the full interface body. --- src/matcha/do_concurrent_m.f90 | 4 +-- src/matcha/do_concurrent_s.f90 | 49 ++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/matcha/do_concurrent_m.f90 b/src/matcha/do_concurrent_m.f90 index db1b04f..21e1885 100644 --- a/src/matcha/do_concurrent_m.f90 +++ b/src/matcha/do_concurrent_m.f90 @@ -27,8 +27,8 @@ pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) integer(c_int), intent(out) :: k(:) end subroutine - pure module subroutine & - do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) bind(C) + pure module subroutine do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) & + bind(C) implicit none integer(c_int), intent(in) :: nintervals, speed, freq, k(:) real(c_double), intent(in) :: emp_distribution(:,:) diff --git a/src/matcha/do_concurrent_s.f90 b/src/matcha/do_concurrent_s.f90 index 33c0218..e8f1aac 100644 --- a/src/matcha/do_concurrent_s.f90 +++ b/src/matcha/do_concurrent_s.f90 @@ -5,8 +5,9 @@ contains - module procedure do_concurrent_sampled_speeds - + pure module subroutine do_concurrent_sampled_speeds(speeds, vel, cumulative_distribution, sampled_speeds) bind(C) + real(c_double), intent(in) :: speeds(:,:), vel(:), cumulative_distribution(:) + real(c_double), intent(out) :: sampled_speeds(:,:) integer cell, step call assert(all(shape(sampled_speeds)==shape(speeds)), "do_concurrent_sampled_speeds: {sampled_,}speeds shape match") @@ -19,34 +20,42 @@ end do end associate - end procedure - - module procedure do_concurrent_my_velocities - + end subroutine + + pure module subroutine do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) bind(C) + integer(c_int), intent(in) :: nsteps + real(c_double), intent(in) :: dir(:,:,:), sampled_speeds(:,:) + real(c_double), intent(out) :: my_velocities(:,:,:) integer step - call assert(all(shape(sampled_speeds)==shape(my_velocities)), "do_concurrent_my_velocities: argument shape match") + call assert(all([size(my_velocities,1),size(sampled_speeds,2)] == shape(sampled_speeds)), & + "do_concurrent_my_velocities: argument size match") + call assert(all(shape(my_velocities,1)==shape(dir)), "do_concurrent_my_velocities: argument shape match") do concurrent(step=1:nsteps) my_velocities(:,step,1) = sampled_speeds(:,step)*dir(:,step,1) my_velocities(:,step,2) = sampled_speeds(:,step)*dir(:,step,2) my_velocities(:,step,3) = sampled_speeds(:,step)*dir(:,step,3) end do - - end procedure + end subroutine - module procedure do_concurrent_k - - integer i + pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) + real(c_double), intent(in) :: speeds(:), vel(:) + integer(c_int), intent(out) :: k(:) + integer i associate(nspeeds => size(speeds)) do concurrent(i = 1:nspeeds) k(i) = findloc(speeds(i) >= vel, value=.false., dim=1)-1 end do end associate - end procedure + end subroutine - module procedure do_concurrent_output_distribution + pure module subroutine do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) & + bind(C) + integer(c_int), intent(in) :: nintervals, speed, freq, k(:) + real(c_double), intent(in) :: emp_distribution(:,:) + real(c_double), intent(out) :: output_distribution(:,:) integer i @@ -55,11 +64,11 @@ do concurrent(i = 1:size(output_distribution,1)) output_distribution(i,freq) = count(k==i) end do - - end procedure - - module procedure do_concurrent_speeds + end subroutine + module subroutine do_concurrent_speeds(history, speeds) bind(C) + type(t_cell_collection_bind_C_t), intent(in) :: history(:) + real(c_double), intent(out) :: speeds(:) integer i, j, k integer, parameter :: nspacedims=3 @@ -87,6 +96,6 @@ end associate end associate - end procedure + end subroutine -end submodule do_concurrent_s +end submodule do_concurrent_s \ No newline at end of file From 14f7c6d5cb4c458edee5b7cfe448fd556d38f177 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 17:38:24 -0500 Subject: [PATCH 07/15] chore(do_concurrent_m): rm unused dummy argument --- src/matcha/do_concurrent_m.f90 | 4 ++-- src/matcha/do_concurrent_s.f90 | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/matcha/do_concurrent_m.f90 b/src/matcha/do_concurrent_m.f90 index 21e1885..d28691c 100644 --- a/src/matcha/do_concurrent_m.f90 +++ b/src/matcha/do_concurrent_m.f90 @@ -27,10 +27,10 @@ pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) integer(c_int), intent(out) :: k(:) end subroutine - pure module subroutine do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) & + pure module subroutine do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) & bind(C) implicit none - integer(c_int), intent(in) :: nintervals, speed, freq, k(:) + integer(c_int), intent(in) :: speed, freq, k(:) real(c_double), intent(in) :: emp_distribution(:,:) real(c_double), intent(out) :: output_distribution(:,:) end subroutine diff --git a/src/matcha/do_concurrent_s.f90 b/src/matcha/do_concurrent_s.f90 index e8f1aac..a8d3a50 100644 --- a/src/matcha/do_concurrent_s.f90 +++ b/src/matcha/do_concurrent_s.f90 @@ -51,12 +51,10 @@ pure module subroutine do_concurrent_k(speeds, vel, k) bind(C) end associate end subroutine - pure module subroutine do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) & - bind(C) - integer(c_int), intent(in) :: nintervals, speed, freq, k(:) + pure module subroutine do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) bind(C) + integer(c_int), intent(in) :: speed, freq, k(:) real(c_double), intent(in) :: emp_distribution(:,:) real(c_double), intent(out) :: output_distribution(:,:) - integer i output_distribution(:,freq) = 0.d0 From 7e0190522f38d99c627b63cdb222e767cb770996 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 17:58:38 -0500 Subject: [PATCH 08/15] chore(output_s): rm unused c_loc use association --- src/matcha/output_s.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matcha/output_s.f90 b/src/matcha/output_s.f90 index 57318b1..6fda001 100644 --- a/src/matcha/output_s.f90 +++ b/src/matcha/output_s.f90 @@ -3,7 +3,7 @@ submodule(output_m) output_s use do_concurrent_m, only : do_concurrent_k, do_concurrent_output_distribution, do_concurrent_speeds use t_cell_collection_m, only : t_cell_collection_bind_C_t - use iso_c_binding, only : c_loc, c_double + use iso_c_binding, only : c_double implicit none contains From 70ab130a37b1e7879e3c50c5967d90eec3bdc090 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 20:30:34 -0500 Subject: [PATCH 09/15] chore: eliminate pre-processing (.F90 -> .f90) Changing file extensions from .f90 to .F90 eliminates unnecessary automatic C preprocessing with some compilers, including gfortran. --- src/matcha/{distribution_s.F90 => distribution_s.f90} | 0 src/matcha/{t_cell_collection_s.F90 => t_cell_collection_s.f90} | 0 src/{matcha_s.F90 => matcha_s.f90} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename src/matcha/{distribution_s.F90 => distribution_s.f90} (100%) rename src/matcha/{t_cell_collection_s.F90 => t_cell_collection_s.f90} (100%) rename src/{matcha_s.F90 => matcha_s.f90} (100%) diff --git a/src/matcha/distribution_s.F90 b/src/matcha/distribution_s.f90 similarity index 100% rename from src/matcha/distribution_s.F90 rename to src/matcha/distribution_s.f90 diff --git a/src/matcha/t_cell_collection_s.F90 b/src/matcha/t_cell_collection_s.f90 similarity index 100% rename from src/matcha/t_cell_collection_s.F90 rename to src/matcha/t_cell_collection_s.f90 diff --git a/src/matcha_s.F90 b/src/matcha_s.f90 similarity index 100% rename from src/matcha_s.F90 rename to src/matcha_s.f90 From 86593b29962176d15980a231ceb296bd85359607 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 20:51:42 -0500 Subject: [PATCH 10/15] chore(subdomain): rm unused variables --- src/matcha/subdomain_s.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index 8b4e953..eb30147 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -134,7 +134,6 @@ subroutine edge_points(self, ds) subroutine apply_boundary_condition(ds) real, intent(inout) :: ds(:,:,:) - integer i, j ds(:,1:ny:ny-1, : ) = 0. ds(:, : ,1:nz:nz-1) = 0. From 6526377c46f9c2e71aaea803e6804a3e8d221b05 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 1 Jan 2024 20:58:17 -0500 Subject: [PATCH 11/15] WIP: nagfor/gfortran internal compiler error (ICE) The following commands reproduce an internal compiler error (ICE) with each of the named compilers and compiler versions: `gfortran` (Homebrew-installed) --------------------------------- * Version 13.2.: `fpm test` * Version 12.3.0: `fpm test --compiler gfortran-12` `nagfor` 7.1 (Build 7143) ------------------------- * `fpm test --compiler nagfor --flag "-fpp -f2018"` --- src/matcha/do_concurrent_s.f90 | 20 +++---- src/matcha/output_s.f90 | 15 +++-- src/matcha/subdomain_m.f90 | 88 ++++-------------------------- src/matcha/subdomain_s.f90 | 59 +++++++++++++++++++- src/matcha/t_cell_collection_m.f90 | 4 +- src/{matcha_s.f90 => matcha_s.F90} | 2 + 6 files changed, 90 insertions(+), 98 deletions(-) rename src/{matcha_s.f90 => matcha_s.F90} (99%) diff --git a/src/matcha/do_concurrent_s.f90 b/src/matcha/do_concurrent_s.f90 index a8d3a50..c4798db 100644 --- a/src/matcha/do_concurrent_s.f90 +++ b/src/matcha/do_concurrent_s.f90 @@ -30,7 +30,7 @@ pure module subroutine do_concurrent_my_velocities(nsteps, dir, sampled_speeds, call assert(all([size(my_velocities,1),size(sampled_speeds,2)] == shape(sampled_speeds)), & "do_concurrent_my_velocities: argument size match") - call assert(all(shape(my_velocities,1)==shape(dir)), "do_concurrent_my_velocities: argument shape match") + call assert(all(size(my_velocities,1)==shape(dir)), "do_concurrent_my_velocities: argument shape match") do concurrent(step=1:nsteps) my_velocities(:,step,1) = sampled_speeds(:,step)*dir(:,step,1) @@ -82,16 +82,14 @@ module subroutine do_concurrent_speeds(history, speeds) bind(C) x(i,:,:) = positions end do - associate(t => history%time) - do concurrent(i = 1:npositions-1, j = 1:ncells) - associate( & - u => (x(i+1,j,:) - x(i,j,:))/(t(i+1) - t(i)), & - ij => i + (j-1)*(npositions-1) & - ) - speeds(ij) = sqrt(sum([(u(k)**2, k=1,nspacedims)])) - end associate - end do - end associate + do concurrent(i = 1:npositions-1, j = 1:ncells) + associate( & + u => (x(i+1,j,:) - x(i,j,:))/(history(i+1)%time - history(i)%time), & + ij => i + (j-1)*(npositions-1) & + ) + speeds(ij) = sqrt(sum([(u(k)**2, k=1,nspacedims)])) + end associate + end do end associate end subroutine diff --git a/src/matcha/output_s.f90 b/src/matcha/output_s.f90 index 6fda001..ec621bf 100644 --- a/src/matcha/output_s.f90 +++ b/src/matcha/output_s.f90 @@ -24,23 +24,26 @@ integer, parameter :: speed=1, freq=2 ! subscripts for speeds and frequencies - associate(npositions => size(history), ncells => history(1)%positions_shape(1)) - allocate(speeds(ncells*(npositions-1))) + associate(npositions => size(self%history_)) + allocate(speeds(self%my_num_cells()*(npositions-1))) end associate call do_concurrent_speeds(t_cell_collection_bind_C_t(self%history_), speeds) - associate(emp_distribution => self%input_%sample_distribution()) + block + real(c_double), allocatable :: emp_distribution(:,:) + + emp_distribution = self%input_%sample_distribution() associate(nintervals => size(emp_distribution(:,1)), dvel_half => (emp_distribution(2,speed)-emp_distribution(1,speed))/2.d0) vel = [emp_distribution(1,speed) - dvel_half, [(emp_distribution(i,speed) + dvel_half, i=1,nintervals)]] if (allocated(k)) deallocate(k) - allocate(k(nspeeds)) + allocate(k(size(speeds))) call do_concurrent_k(speeds, vel, k) if(allocated(output_distribution)) deallocate(output_distribution) allocate(output_distribution(nintervals,2)) - call do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) + call do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) output_distribution(:,freq) = output_distribution(:,freq)/sum(output_distribution(:,freq)) end associate - end associate + end block end procedure diff --git a/src/matcha/subdomain_m.f90 b/src/matcha/subdomain_m.f90 index 605ead6..bf64959 100644 --- a/src/matcha/subdomain_m.f90 +++ b/src/matcha/subdomain_m.f90 @@ -1,42 +1,36 @@ module subdomain_m - use assert_m, only : assert implicit none private public :: subdomain_t - public :: operator(.laplacian.) - public :: step type subdomain_t private real, allocatable :: s_(:,:,:) contains procedure, pass(self) :: define - procedure, pass(rhs) :: multiply - generic :: operator(*) => multiply - generic :: operator(+) => add - generic :: assignment(=) => assign_ procedure dx procedure dy procedure dz procedure values + generic :: operator(*) => multiply + generic :: operator(+) => add + generic :: operator(.laplacian.) => laplacian + generic :: assignment(=) => assign_ + procedure, private, pass(rhs) :: multiply + procedure, private :: laplacian procedure, private :: add procedure, private :: assign_ end type - interface operator(.laplacian.) - - module procedure laplacian - !pure module function laplacian(rhs) result(laplacian_rhs) - ! implicit none - ! type(subdomain_t), intent(in) :: rhs[*] - ! type(subdomain_t) laplacian_rhs - !end function - - end interface - interface + pure module function laplacian(rhs) result(laplacian_rhs) + implicit none + class(subdomain_t), intent(in) :: rhs[*] + type(subdomain_t) laplacian_rhs + end function + module subroutine define(side, boundary_val, internal_val, n, self) implicit none real, intent(in) :: side, boundary_val, internal_val @@ -96,62 +90,4 @@ module subroutine assign_(lhs, rhs) end interface - real dx_, dy_, dz_ - integer my_nx, nx, ny, nz, me, num_subdomains, my_internal_west, my_internal_east - -contains - - pure module function laplacian(rhs) result(laplacian_rhs) - type(subdomain_t), intent(in) :: rhs[*] - type(subdomain_t) laplacian_rhs - - integer i, j, k - real, allocatable :: halo_west(:,:), halo_east(:,:) - - call assert(allocated(rhs%s_), "subdomain_t%laplacian: allocated(rhs%s_)") - - allocate(laplacian_rhs%s_, mold=rhs%s_) - - if (me==1) then - halo_west = rhs%s_(1,:,:) - else - halo_west = rhs[me-1]%s_(ubound(rhs[me-1]%s_,1),:,:) - end if - i = my_internal_west - call assert(i+1<=my_nx,"laplacian: westernmost subdomain too small") - do concurrent(j=2:ny-1, k=2:nz-1) - laplacian_rhs%s_(i,j,k) = ( halo_west(j,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & - (rhs%s_(i,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & - (rhs%s_(i,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 - end do - - do concurrent(i=my_internal_west+1:my_internal_east-1, j=2:ny-1, k=2:nz-1) - laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & - (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & - (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 - end do - - if (me==1) then - halo_east = rhs%s_(1,:,:) - else - halo_east = rhs[me+1]%s_(lbound(rhs[me+1]%s_,1),:,:) - end if - i = my_internal_east - call assert(i-1>0,"laplacian: easternmost subdomain too small") - do concurrent(j=2:ny-1, k=2:nz-1) - laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + halo_east(j ,k ))/dx_**2 + & - (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & - (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 - end do - - laplacian_rhs%s_(:, 1,:) = 0. - laplacian_rhs%s_(:,ny,:) = 0. - laplacian_rhs%s_(:,:, 1) = 0. - laplacian_rhs%s_(:,:,nz) = 0. - if (me==1) laplacian_rhs%s_(1,:,:) = 0. - if (me==num_subdomains) laplacian_rhs%s_(my_nx,:,:) = 0. - - end function - - end module diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index eb30147..5cebb3d 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -1,10 +1,11 @@ submodule(subdomain_m) subdomain_s + use assert_m, only : assert, intrinsic_array_t use sourcery_m, only : data_partition_t - use intrinsic_array_m, only : intrinsic_array_t implicit none type(data_partition_t) data_partition - + real dx_, dy_, dz_ + integer my_nx, nx, ny, nz, me, num_subdomains, my_internal_west, my_internal_east real, allocatable :: increment(:,:,:) contains @@ -144,4 +145,56 @@ subroutine apply_boundary_condition(ds) end procedure -end submodule subdomain_s \ No newline at end of file + pure module function laplacian(rhs) result(laplacian_rhs) + class(subdomain_t), intent(in) :: rhs[*] + type(subdomain_t) laplacian_rhs + + integer i, j, k + real, allocatable :: halo_west(:,:), halo_east(:,:) + + call assert(allocated(rhs%s_), "subdomain_t%laplacian: allocated(rhs%s_)") + + allocate(laplacian_rhs%s_, mold=rhs%s_) + + if (me==1) then + halo_west = rhs%s_(1,:,:) + else + halo_west = rhs[me-1]%s_(ubound(rhs[me-1]%s_,1),:,:) + end if + i = my_internal_west + call assert(i+1<=my_nx,"laplacian: westernmost subdomain too small") + do concurrent(j=2:ny-1, k=2:nz-1) + laplacian_rhs%s_(i,j,k) = ( halo_west(j,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & + (rhs%s_(i,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & + (rhs%s_(i,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 + end do + + do concurrent(i=my_internal_west+1:my_internal_east-1, j=2:ny-1, k=2:nz-1) + laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i+1,j ,k ))/dx_**2 + & + (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & + (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 + end do + + if (me==1) then + halo_east = rhs%s_(1,:,:) + else + halo_east = rhs[me+1]%s_(lbound(rhs[me+1]%s_,1),:,:) + end if + i = my_internal_east + call assert(i-1>0,"laplacian: easternmost subdomain too small") + do concurrent(j=2:ny-1, k=2:nz-1) + laplacian_rhs%s_(i,j,k) = (rhs%s_(i-1,j ,k ) - 2*rhs%s_(i,j,k) + halo_east(j ,k ))/dx_**2 + & + (rhs%s_(i ,j-1,k ) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j+1,k ))/dy_**2 + & + (rhs%s_(i ,j ,k-1) - 2*rhs%s_(i,j,k) + rhs%s_(i ,j ,k+1))/dz_**2 + end do + + laplacian_rhs%s_(:, 1,:) = 0. + laplacian_rhs%s_(:,ny,:) = 0. + laplacian_rhs%s_(:,:, 1) = 0. + laplacian_rhs%s_(:,:,nz) = 0. + if (me==1) laplacian_rhs%s_(1,:,:) = 0. + if (me==num_subdomains) laplacian_rhs%s_(my_nx,:,:) = 0. + + end function + +end submodule subdomain_s diff --git a/src/matcha/t_cell_collection_m.f90 b/src/matcha/t_cell_collection_m.f90 index 323d47b..03789ac 100644 --- a/src/matcha/t_cell_collection_m.f90 +++ b/src/matcha/t_cell_collection_m.f90 @@ -42,8 +42,9 @@ pure module function construct(positions, time) result(t_cell_collection) interface t_cell_collection_bind_C_t - elemental module function construct_bind_C(t_cell_collection) result(t_cell_collection_bind_C) + impure elemental module function construct_bind_C(t_cell_collection) result(t_cell_collection_bind_C) !! Result is bind(C) representation of the data inside a t_cell_collection_t object + !! This function is impure because it invokes c_loc. Fortran 2023 compliance will allow this function to be pure. implicit none type(t_cell_collection_t), intent(in), target :: t_cell_collection type(t_cell_collection_bind_C_t) t_cell_collection_bind_C @@ -60,7 +61,6 @@ pure module function positions(self) result(my_positions) double precision, allocatable :: my_positions(:,:) end function - elemental module function time(self) result(my_time) !! Return the t_cell_collection_t object's time stamp implicit none diff --git a/src/matcha_s.f90 b/src/matcha_s.F90 similarity index 99% rename from src/matcha_s.f90 rename to src/matcha_s.F90 index 4735efc..0cc9081 100644 --- a/src/matcha_s.f90 +++ b/src/matcha_s.F90 @@ -32,7 +32,9 @@ associate(me => this_image()) associate(my_num_cells => data_partition%last(me) - data_partition%first(me) + 1) +#ifndef NAGFOR call random_init(repeatable=.true., image_distinct=.true.) +#endif allocate(random_positions(my_num_cells,ndim)) call random_number(random_positions) From d19482d76dc144776cbeb2db30401da297a28330 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Sat, 3 Feb 2024 14:02:55 -0800 Subject: [PATCH 12/15] test: work around nagfor 7.1 Build 7145 bug --- app/main.f90 | 6 +++--- test/matcha_test_m.f90 | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/app/main.f90 b/app/main.f90 index 99ca6ce..67fb9b9 100644 --- a/app/main.f90 +++ b/app/main.f90 @@ -9,14 +9,14 @@ program main associate(input => input_t()) output = output_t(input, matcha(input)) block - double precision, allocatable :: simulated_distribution(:,:) + double precision, allocatable :: simulated_distribution(:,:), frequency_distribution(:) integer, parameter :: freq=2 integer num_cells num_cells = output%my_num_cells() simulated_distribution = output%simulated_distribution() - simulated_distribution(:,freq) = num_cells*simulated_distribution(:,freq) - call co_sum(simulated_distribution(:,freq), result_image=1) + frequency_distribution = num_cells*simulated_distribution(:,freq) ! copy to work around nagfor bug + call co_sum(frequency_distribution, result_image=1) call co_sum(num_cells, result_image=1) if (this_image()==1) simulated_distribution(:,freq) = simulated_distribution(:,freq)/dble(num_cells) end block diff --git a/test/matcha_test_m.f90 b/test/matcha_test_m.f90 index f75e164..8a2c64e 100644 --- a/test/matcha_test_m.f90 +++ b/test/matcha_test_m.f90 @@ -67,7 +67,7 @@ function compare_image_distributions() result(test_passes) function compare_global_distributions() result(test_passes) logical test_passes type(output_t) output - double precision, allocatable :: simulated_distribution(:,:) + double precision, allocatable :: simulated_distribution(:,:), frequency(:) integer num_cells integer, parameter :: speed=1, freq=2 ! subscripts for speeds and frequencies real, parameter :: tolerance = 1.D-02 @@ -77,16 +77,16 @@ function compare_global_distributions() result(test_passes) associate(empirical_distribution => input%sample_distribution()) simulated_distribution = output%simulated_distribution() num_cells = output%my_num_cells() - simulated_distribution(:,freq) = num_cells*simulated_distribution(:,freq) - call co_sum(simulated_distribution(:,freq), result_image=1) + frequency_distribution = num_cells*simulated_distribution(:,freq) ! copy to work around nagfor 7.1 Build 7145 compiler bug + call co_sum(frequency_distribution, result_image=1) call co_sum(num_cells, result_image=1) if (this_image()/=1) then test_passes = .true. else - simulated_distribution(:,freq) = simulated_distribution(:,freq)/dble(num_cells) + frequency_distribution = frequency_distribution/dble(num_cells) associate( & diffmax_speeds=> maxval(abs(empirical_distribution(:,speed)-simulated_distribution(:,speed))), & - diffmax_freqs => maxval(abs(empirical_distribution(:,freq)-simulated_distribution(:,freq))) & + diffmax_freqs => maxval(abs(empirical_distribution(:,freq)-frequency_distribution)) & ) test_passes = (diffmax_freqs < tolerance) .and. (diffmax_speeds < tolerance) end associate From e561f4228d50b11072589071c586d76634b57a2b Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Sat, 3 Feb 2024 14:03:57 -0800 Subject: [PATCH 13/15] fix(subdomain): eliminate ambigous var/proc names --- example/time-paradigm.f90 | 4 ++-- src/matcha/subdomain_m.f90 | 3 ++- src/matcha/subdomain_s.f90 | 2 +- test/subdomain_test_m.f90 | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/example/time-paradigm.f90 b/example/time-paradigm.f90 index 662be3b..a77008e 100644 --- a/example/time-paradigm.f90 +++ b/example/time-paradigm.f90 @@ -2,7 +2,7 @@ ! Terms of use are as specified in LICENSE.txt program time_paradigm_m !! Time various alternative programming paradigms - use subdomain_m, only : subdomain_t, operator(.laplacian.), step + use subdomain_m, only : subdomain_t, march use assert_m, only : assert use sourcery_m, only : string_t, file_t, command_line_t, bin_t, csv use iso_fortran_env, only : int64 @@ -90,7 +90,7 @@ function procedural_programming_time() result(system_time) procedural_programming: & do step = 1, steps sync all - call step(alpha*dt, T) + call march(alpha*dt, T) end do procedural_programming call system_clock(t_end_procedural, clock_rate) diff --git a/src/matcha/subdomain_m.f90 b/src/matcha/subdomain_m.f90 index bf64959..f2a22d0 100644 --- a/src/matcha/subdomain_m.f90 +++ b/src/matcha/subdomain_m.f90 @@ -3,6 +3,7 @@ module subdomain_m private public :: subdomain_t + public :: march type subdomain_t private @@ -38,7 +39,7 @@ module subroutine define(side, boundary_val, internal_val, n, self) class(subdomain_t), intent(out) :: self end subroutine - module subroutine step(alpha_dt, self) + module subroutine march(alpha_dt, self) implicit none real, intent(in) :: alpha_dt type(subdomain_t), intent(inout) :: self[*] diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index 5cebb3d..ac68f62 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -69,7 +69,7 @@ my_values = self%s_ end procedure - module procedure step + module procedure march call assert(allocated(self%s_), "subdomain_t%laplacian: allocated(rhs%s_)") call assert(my_internal_west+1<=my_nx,"laplacian: westernmost subdomain too small") diff --git a/test/subdomain_test_m.f90 b/test/subdomain_test_m.f90 index 64c8ef9..5b506e3 100644 --- a/test/subdomain_test_m.f90 +++ b/test/subdomain_test_m.f90 @@ -3,7 +3,7 @@ module subdomain_test_m !! Define subdomain tests and procedures required for reporting results use sourcery_m, only : test_t, test_result_t - use subdomain_m, only : subdomain_t, operator(.laplacian.), step + use subdomain_m, only : subdomain_t, march use assert_m, only : assert implicit none @@ -217,7 +217,7 @@ function T_procedural() associate(dt => T%dx()*T%dy()/(4*alpha)) do step = 1, steps sync all - call step(alpha*dt, T) + call march(alpha*dt, T) end do end associate From 6ad5e1435dd3d7e9219f0a30bed492d05651bfdd Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Sat, 3 Feb 2024 14:36:42 -0800 Subject: [PATCH 14/15] test(matcha): fix test declaration --- example/heat-equation.f90 | 12 +++++++----- test/matcha_test_m.f90 | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/example/heat-equation.f90 b/example/heat-equation.f90 index 3c3cb5c..2e7624c 100644 --- a/example/heat-equation.f90 +++ b/example/heat-equation.f90 @@ -76,13 +76,14 @@ module subroutine exchange_halo(self) end interface + real, allocatable :: halo_x(:,:)[:] + end module submodule(subdomain_2D_m) subdomain_2D_s use assertions_m, only : assert implicit none - real, allocatable :: halo_x(:,:)[:] real dx_, dy_ integer, parameter :: west=1, east=2 integer my_nx, nx, ny, me, num_subdomains, my_internal_west, my_internal_east @@ -114,7 +115,8 @@ module subroutine exchange_halo(self) self%s_(my_nx, 2:ny-1) = merge(boundary_val, internal_val, me==num_subdomains) ! east subdomain boundary if (allocated(halo_x)) deallocate(halo_x) - allocate(halo_x(west:east, ny)[*]) + !allocate(halo_x(west:east, ny)[*]) + allocate(halo_x(2, ny)[*]) call self%exchange_halo end procedure @@ -131,7 +133,7 @@ module subroutine exchange_halo(self) integer i, j real, allocatable :: halo_west(:), halo_east(:) - call assert(allocated(rhs%s_), "subdomain_2D_t%laplacian: allocated(rhs%s_)") + !call assert(allocated(rhs%s_), "subdomain_2D_t%laplacian: allocated(rhs%s_)") call assert(allocated(halo_x), "subdomain_2D_t%laplacian: allocated(halo_x)") allocate(laplacian_rhs(my_nx, ny)) @@ -168,8 +170,8 @@ module subroutine exchange_halo(self) end procedure module procedure exchange_halo - if (me>1) halo_x(east,:)[me-1] = self%s_(1,:) - if (me1) halo_x(east,:)[me-1] = self%s_(1,:) + !if (me Date: Sat, 3 Feb 2024 20:56:05 -0800 Subject: [PATCH 15/15] fix(subdomain): add missing type-bound operators --- src/matcha/subdomain_s.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/matcha/subdomain_s.f90 b/src/matcha/subdomain_s.f90 index ac68f62..f5bfa0c 100644 --- a/src/matcha/subdomain_s.f90 +++ b/src/matcha/subdomain_s.f90 @@ -59,6 +59,16 @@ my_dz = dz_ end procedure + module procedure add + call assert(allocated(lhs%s_) .and. allocated(rhs%s_), "subdomain_t%add: allocated(rhs%s_)") + total%s_ = lhs%s_ + rhs%s_ + end procedure + + module procedure multiply + call assert(allocated(rhs%s_), "subdomain_t%multiply: allocated(rhs%s_)") + product%s_ = lhs + rhs%s_ + end procedure + module procedure assign_ call assert(allocated(rhs%s_), "subdomain_t%assign_: allocated(rhs%s_)") lhs%s_ = rhs%s_ @@ -142,7 +152,6 @@ subroutine apply_boundary_condition(ds) if (me==num_subdomains) ds(my_nx,:,:) = 0. end subroutine - end procedure pure module function laplacian(rhs) result(laplacian_rhs)