From 603a6b27fca6fd277926fbdf51fc872f637c272c Mon Sep 17 00:00:00 2001
From: Damian Rouson <rouson@lbl.gov>
Date: Mon, 28 Jul 2025 00:28:48 -0700
Subject: [PATCH] chore: merge main into vibe-coding branch

---
 README.md                                     |  3 +-
 doc/ai4dev/README.md                          |  4 +-
 .../idiomatic-vibe-response.f90               | 46 +++++++++----------
 doc/ai4dev/vibe-coding/vibe-response.f90      | 18 ++++----
 example/time-paradigm.F90                     |  8 ++--
 src/matcha/distribution_s.F90                 | 20 ++++----
 6 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/README.md b/README.md
index 390253e1..2a12b6fe 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,8 @@ Change '2' above to the number of images that you would like to launch in parall
 #### Multi-image (parallel) execution
 The unit test suite uses small grids to minimize runtime, which limits the scalability of the test suite only.
 Hence, the test suite is _not_ designed for execution in a large number of images.
-All tests pass when running the test suite in 1-4 images: 
+
+All tests pass when running the test suite in 1-4 images:
 ```
 export FOR_COARRAY_NUM_IMAGES=4
 fpm test --compiler ifx --profile release --flag "-heap-arrays -coarray"
diff --git a/doc/ai4dev/README.md b/doc/ai4dev/README.md
index 7adf07e7..76cb06ac 100644
--- a/doc/ai4dev/README.md
+++ b/doc/ai4dev/README.md
@@ -20,7 +20,7 @@ Vibe Coding
    ```
 4. Use the contents of [vibe-coding/README.md](./vibe-coding/README.md) as your
    prompt to a large language model (LLM).
-5. Use an editor to edit the test-suite.F90 lines beginning and ending with 
+5. Use an editor to edit the test-suite.F90 lines beginning and ending with
    `module procedure laplacian` and `end procedure laplacian`, respectively,
    replacing those lines with the LLM's response.
 6. Compile test-suite.F90 as in step 2 again.
@@ -36,7 +36,7 @@ Vibe Coding
      but tests failed replace "compiling" with "running" and replace "errors"
      with "test failures".
    * Append LLM-generated code from the previous iteration.
-   
+
 Idiomatic Vibe Coding
 ---------------------
 Follow the [vibe coding](#vibe-coding) steps except as described below:
diff --git a/doc/ai4dev/idiomatic-vibe-coding/idiomatic-vibe-response.f90 b/doc/ai4dev/idiomatic-vibe-coding/idiomatic-vibe-response.f90
index b9a72a8c..a5f94a58 100644
--- a/doc/ai4dev/idiomatic-vibe-coding/idiomatic-vibe-response.f90
+++ b/doc/ai4dev/idiomatic-vibe-coding/idiomatic-vibe-response.f90
@@ -2,23 +2,23 @@ pure module function laplacian(rhs) result(laplacian_rhs)
   implicit none
   class(subdomain_t), intent(in) :: rhs
   type(subdomain_t) :: laplacian_rhs
-  
+
   ! Local variables for array bounds and grid spacing
   integer :: my_nx, ny, nz, me, num_subdomains
   real :: dx_val, dy_val, dz_val, dx2_inv, dy2_inv, dz2_inv
-  
+
   ! Get array dimensions and parallel image information
   my_nx = size(rhs%s_, 1)
-  ny = size(rhs%s_, 2) 
+  ny = size(rhs%s_, 2)
   nz = size(rhs%s_, 3)
   me = this_image()
   num_subdomains = num_images()
-  
+
   ! Get grid spacing values
   dx_val = rhs%dx()
-  dy_val = rhs%dy() 
+  dy_val = rhs%dy()
   dz_val = rhs%dz()
-  
+
   ! Compute inverse squared grid spacings for efficiency
   dx2_inv = 1.0 / (dx_val * dx_val)
   dy2_inv = 1.0 / (dy_val * dy_val)
@@ -26,63 +26,63 @@ pure module function laplacian(rhs) result(laplacian_rhs)
   
   ! Allocate result array with same shape as input
   allocate(laplacian_rhs%s_(my_nx, ny, nz))
-  
+
   ! Initialize result to zero
   laplacian_rhs%s_ = 0.0
-  
+
   ! Compute 2nd-order central difference Laplacian in interior points
-  ! Laplacian = d²/dx² + d²/dy² + d²/dz²
+  ! Laplacian = d2/dx2 + d2/dy2 + d2/dz2
   do concurrent (i = 2:my_nx-1, j = 2:ny-1, k = 2:nz-1) default(none) &
     shared(laplacian_rhs, rhs, dx2_inv, dy2_inv, dz2_inv, my_nx, ny, nz, me, num_subdomains)
-    
-    ! Second derivative in x-direction: (f(i+1) - 2*f(i) + f(i-1))/dx²
-    ! Second derivative in y-direction: (f(j+1) - 2*f(j) + f(j-1))/dy²  
-    ! Second derivative in z-direction: (f(k+1) - 2*f(k) + f(k-1))/dz²
+
+    ! Second derivative in x-direction: (f(i+1) - 2*f(i) + f(i-1))/dx2
+    ! Second derivative in y-direction: (f(j+1) - 2*f(j) + f(j-1))/dy2
+    ! Second derivative in z-direction: (f(k+1) - 2*f(k) + f(k-1))/dz2
     laplacian_rhs%s_(i,j,k) = &
       dx2_inv * (rhs%s_(i+1,j,k) - 2.0*rhs%s_(i,j,k) + rhs%s_(i-1,j,k)) + &
       dy2_inv * (rhs%s_(i,j+1,k) - 2.0*rhs%s_(i,j,k) + rhs%s_(i,j-1,k)) + &
       dz2_inv * (rhs%s_(i,j,k+1) - 2.0*rhs%s_(i,j,k) + rhs%s_(i,j,k-1))
   end do
-  
+
   ! Handle boundary conditions at subdomain interfaces for x-direction
   ! Left boundary (i=1) - only compute if not the first subdomain
   if (me > 1) then
     do concurrent (j = 2:ny-1, k = 2:nz-1) default(none) &
       shared(laplacian_rhs, rhs, dx2_inv, dy2_inv, dz2_inv, ny, nz, halo_x)
-      
+
       laplacian_rhs%s_(1,j,k) = &
         dx2_inv * (rhs%s_(2,j,k) - 2.0*rhs%s_(1,j,k) + halo_x(west,j,k)) + &
         dy2_inv * (rhs%s_(1,j+1,k) - 2.0*rhs%s_(1,j,k) + rhs%s_(1,j-1,k)) + &
         dz2_inv * (rhs%s_(1,j,k+1) - 2.0*rhs%s_(1,j,k) + rhs%s_(1,j,k-1))
     end do
   end if
-  
-  ! Right boundary (i=my_nx) - only compute if not the last subdomain  
+
+  ! Right boundary (i=my_nx) - only compute if not the last subdomain
   if (me < num_subdomains) then
     do concurrent (j = 2:ny-1, k = 2:nz-1) default(none) &
       shared(laplacian_rhs, rhs, dx2_inv, dy2_inv, dz2_inv, my_nx, ny, nz, halo_x, num_subdomains)
-      
+
       laplacian_rhs%s_(my_nx,j,k) = &
         dx2_inv * (halo_x(east,j,k) - 2.0*rhs%s_(my_nx,j,k) + rhs%s_(my_nx-1,j,k)) + &
         dy2_inv * (rhs%s_(my_nx,j+1,k) - 2.0*rhs%s_(my_nx,j,k) + rhs%s_(my_nx,j-1,k)) + &
         dz2_inv * (rhs%s_(my_nx,j,k+1) - 2.0*rhs%s_(my_nx,j,k) + rhs%s_(my_nx,j,k-1))
     end do
   end if
-  
+
   ! Set boundary values to zero as specified
   ! Y boundaries
   laplacian_rhs%s_(:, 1, :) = 0.0
   laplacian_rhs%s_(:, ny, :) = 0.0
-  
-  ! Z boundaries  
+
+  ! Z boundaries
   laplacian_rhs%s_(:, :, 1) = 0.0
   laplacian_rhs%s_(:, :, nz) = 0.0
-  
+
   ! X boundaries for first and last subdomains
   if (me == 1) then
     laplacian_rhs%s_(1, :, :) = 0.0
   end if
-  
+
   if (me == num_subdomains) then
     laplacian_rhs%s_(my_nx, :, :) = 0.0
   end if
diff --git a/doc/ai4dev/vibe-coding/vibe-response.f90 b/doc/ai4dev/vibe-coding/vibe-response.f90
index 0288fbff..b401bc95 100644
--- a/doc/ai4dev/vibe-coding/vibe-response.f90
+++ b/doc/ai4dev/vibe-coding/vibe-response.f90
@@ -1,26 +1,26 @@
 module procedure laplacian
   ! Separate module procedure for pure function laplacian
   ! Computes 2nd-order central-difference Laplacian operator
-  
+
   integer :: i, j, k
   real, allocatable :: s_west(:,:), s_east(:,:)
-  
+
   ! Allocate result array
   allocate(laplacian_rhs%s_(my_nx, ny, nz))
-  
+
   ! Initialize result to zero
   laplacian_rhs%s_ = 0.0
-  
+
   ! Prepare halo data for boundary conditions
   allocate(s_west(ny, nz), s_east(ny, nz))
   s_west = merge(halo_x(west,:,:), 0.0, me == 1)
   s_east = merge(halo_x(east,:,:), 0.0, me == num_subdomains)
-  
+
   ! Compute Laplacian using do concurrent with proper boundary handling
   do concurrent (i = 1:my_nx, j = 1:ny, k = 1:nz) &
     default(none) shared(laplacian_rhs, rhs, s_west, s_east) &
     local(i, j, k)
-    
+
     ! Apply boundary conditions - result vanishes at boundaries
     laplacian_rhs%s_(i,j,k) = merge(0.0, &
       ! X-direction second derivative
@@ -29,7 +29,7 @@
       merge(rhs%s_(i-1,j,k), 0.0, i > 1 .or. me > 1) + &
       merge(rhs%s_(i+1,j,k), 0.0, i < my_nx .or. me < num_subdomains) - &
       2.0 * rhs%s_(i,j,k) + &
-      ! Y-direction second derivative  
+      ! Y-direction second derivative
       merge(rhs%s_(i,j-1,k) + rhs%s_(i,j+1,k) - 2.0 * rhs%s_(i,j,k), 0.0, &
             j > 1 .and. j < ny) + &
       ! Z-direction second derivative
@@ -38,7 +38,7 @@
       ! Boundary condition mask
       j == 1 .or. j == ny .or. k == 1 .or. k == nz .or. &
       (i == 1 .and. me == 1) .or. (i == my_nx .and. me == num_subdomains))
-      
+
   end do
-  
+
 end procedure laplacian
diff --git a/example/time-paradigm.F90 b/example/time-paradigm.F90
index 4edff8dc..aba6d5a4 100644
--- a/example/time-paradigm.F90
+++ b/example/time-paradigm.F90
@@ -7,7 +7,7 @@ program time_paradigm_m
   !! Time various alternative programming paradigms
   use subdomain_m, only : subdomain_t
   use assert_m
-  use julienne_m, only : string_t, file_t, command_line_t, bin_t, csv 
+  use julienne_m, only : string_t, file_t, command_line_t, bin_t, csv
   use iso_fortran_env, only : int64
   implicit none
 
@@ -32,7 +32,7 @@ program time_paradigm_m
     if (len(steps_string)/=0) read(steps_string,*) steps
     if (len(resolution_string)/=0) read(resolution_string,*) resolution
 
-    if (me==1) then 
+    if (me==1) then
       print *,"Number of steps to execute: ",steps
       print *,"Number of grid points in each coordinate direction: ",resolution
       print *,"Starting functional solver."
@@ -40,7 +40,7 @@ program time_paradigm_m
     associate(t_functional => functional_programming_time())
       if (me==1) print *,"Starting procedural solver."
       associate(t_procedural => functional_programming_time())
-        if (me==1) then 
+        if (me==1) then
           print *,"Functional program time: ", t_functional
           print *,"Procedural program time: ", t_procedural
           print *,"Procedural speedup: ", (t_functional - t_procedural)/t_functional
@@ -48,7 +48,7 @@ program time_paradigm_m
       end associate
     end associate
   end associate
-  
+
 contains
 
   function functional_programming_time() result(system_time)
diff --git a/src/matcha/distribution_s.F90 b/src/matcha/distribution_s.F90
index 7c283940..5f1ae157 100644
--- a/src/matcha/distribution_s.F90
+++ b/src/matcha/distribution_s.F90
@@ -8,7 +8,7 @@
   implicit none
 
 contains
-  
+
   pure function monotonically_increasing(f) result(monotonic)
     double precision, intent(in) :: f(:)
     logical monotonic
@@ -21,8 +21,8 @@ pure function monotonically_increasing(f) result(monotonic)
 
     call_assert(all(sample_distribution(:,2)>=0.D0))
 
-    associate(nintervals => size(sample_distribution,1))      
-      distribution%vel_ = [(sample_distribution(i,1), i =1, nintervals)]  ! Assign speeds to each distribution bin         
+    associate(nintervals => size(sample_distribution,1))
+      distribution%vel_ = [(sample_distribution(i,1), i =1, nintervals)]  ! Assign speeds to each distribution bin
       distribution%cumulative_distribution_ = [0.D0, [(sum(sample_distribution(1:i,2)), i=1, nintervals)]]
     end associate
 
@@ -33,14 +33,14 @@ pure function monotonically_increasing(f) result(monotonic)
   module procedure cumulative_distribution
     call_assert(allocated(self%cumulative_distribution_))
     my_cumulative_distribution = self%cumulative_distribution_
-  end procedure 
-  
+  end procedure
+
   module procedure velocities
-    
+
     double precision, allocatable :: sampled_speeds(:,:),  dir(:,:,:)
     integer cell, step, k
-    
-    call_assert(allocated(self%cumulative_distribution_)) 
+
+    call_assert(allocated(self%cumulative_distribution_))
     call_assert(allocated(self%vel_))
 
     ! Sample from the distribution
@@ -50,7 +50,7 @@ pure function monotonically_increasing(f) result(monotonic)
         k = findloc(speeds(cell,step) >= self%cumulative_distribution(), value=.false., dim=1)-1
         sampled_speeds(cell,step) = self%vel_(k)
       end do
-      
+
       ! Create unit vectors
       dir = directions(:,1:nsteps,:)
 
@@ -63,7 +63,7 @@ pure function monotonically_increasing(f) result(monotonic)
       end associate
 
       allocate(my_velocities, mold=dir)
-      
+
       do concurrent(step=1:nsteps)
         my_velocities(:,step,1) = sampled_speeds(:,step)*dir(:,step,1)
         my_velocities(:,step,2) = sampled_speeds(:,step)*dir(:,step,2)