MFlowCode · wilfonba · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/src/common/m_compute_levelset.fpp b/src/common/m_compute_levelset.fpp
@@ -552,9 +552,8 @@ contains
                         end if
                     else
                         levelset%sf(i, j, k, ib_patch_id) = dist_surface
-
                         xyz_local = xyz_local*dist_surface_vec
-                        xyz_local = xyz_local/norm2(xyz_local)
+                        xyz_local = xyz_local/max(norm2(xyz_local), sgm_eps)
                         levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, xyz_local)
                     end if
                 end do

diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp
@@ -678,7 +678,7 @@ contains
         do i = 1, boundary_edge_count
             boundary_edge(1) = boundary_v(i, 2, 1) - boundary_v(i, 1, 1)
             boundary_edge(2) = boundary_v(i, 2, 2) - boundary_v(i, 1, 2)
-            edgetan = boundary_edge(1)/boundary_edge(2)
+            edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2))
-            edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2))
+            edgetan = boundary_edge(1)/(sign(1.0_wp, boundary_edge(2))*max(abs(boundary_edge(2)), sgm_eps))
-            edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2))
+            edgetan = boundary_edge(1)/sign(max(sgm_eps, abs(boundary_edge(2))), boundary_edge(2))
-            edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2))
+            edgetan = boundary_edge(1)/sign(max(sgm_eps, abs(boundary_edge(2))), boundary_edge(2))
-            edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2))
+            edgetan = boundary_edge(1)/(sign(1.0_wp, boundary_edge(2))*max(abs(boundary_edge(2)), sgm_eps))
-            edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2))
+            edgetan = boundary_edge(1)/sign(max(sgm_eps, abs(boundary_edge(2))), boundary_edge(2))
-            edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2))
+            edgetan = boundary_edge(1)/sign(max(sgm_eps, abs(boundary_edge(2))), boundary_edge(2))
 
             if (abs(boundary_edge(2)) < threshold_vector_zero) then
                 if (edgetan > 0._wp) then

diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
@@ -1377,8 +1377,7 @@ contains
 
 #ifndef MFC_PRE_PROCESS
     subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c, qv)
-        $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', &
-            & parallelism='[seq]', cray_inline=True)
+        $:GPU_ROUTINE(parallelism='[seq]')
-        $:GPU_ROUTINE(parallelism='[seq]')
+        $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', parallelism='[seq]', cray_inline=True)
-        $:GPU_ROUTINE(parallelism='[seq]')
+        $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', parallelism='[seq]', cray_inline=True)
 
-    subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c, qv)
-        $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', &
-            & parallelism='[seq]', cray_inline=True)
-        $:GPU_ROUTINE(parallelism='[seq]')
+    subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c, qv)
+        $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', parallelism='[seq]')
+
-    subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c, qv)
-        $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', &
-            & parallelism='[seq]', cray_inline=True)
-        $:GPU_ROUTINE(parallelism='[seq]')
+    subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c, qv)
+        $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', parallelism='[seq]')
+
         real(wp), intent(in) :: pres
         real(wp), intent(in) :: rho, gamma, pi_inf, qv

diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
@@ -1233,7 +1233,7 @@ contains
                             q_cons_vf(mom_idx%beg)%sf(j - 2, k, l), G_local)
                     else
                         call s_compute_pressure( &
-                            q_cons_vf(1)%sf(j - 2, k, l), &
+                            q_cons_vf(E_idx)%sf(j - 2, k, l), &
                             q_cons_vf(alf_idx)%sf(j - 2, k, l), &
                             dyn_p, pi_inf, gamma, rho, qv, rhoYks(:), pres, T)
-                            dyn_p, pi_inf, gamma, rho, qv, rhoYks(:), pres, T)
+                            dyn_p, pi_inf, gamma, rho, qv, rhoYks, pres, T)
-                            dyn_p, pi_inf, gamma, rho, qv, rhoYks(:), pres, T)
+                            dyn_p, pi_inf, gamma, rho, qv, rhoYks, pres, T)
                     end if

diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp
@@ -19,8 +19,6 @@ module m_derived_variables
 
     use m_data_output           !< Data output module
 
-    use m_time_steppers         !< Time-stepping algorithms
-
     use m_compile_specific
 
     use m_helper
@@ -120,9 +118,11 @@ contains
 
     !> Writes coherent body information, communication files, and probes.
         !!  @param t_step Current time-step
-    subroutine s_compute_derived_variables(t_step)
+    subroutine s_compute_derived_variables(t_step, q_cons_vf, q_prim_ts1, q_prim_ts2)
 
         integer, intent(in) :: t_step
+        type(scalar_field), dimension(:), intent(inout) :: q_cons_vf
+        type(vector_field), dimension(:), intent(inout) :: q_prim_ts1, q_prim_ts2
         integer :: i, j, k !< Generic loop iterators
 
         if (probe_wrt) then
@@ -169,7 +169,7 @@ contains
 
             call s_derive_center_of_mass(q_prim_ts2(2)%vf, c_mass)
 
-            call s_write_probe_files(t_step, q_cons_ts(1)%vf, accel_mag)
+            call s_write_probe_files(t_step, q_cons_vf, accel_mag)
 
             call s_write_com_files(t_step, c_mass)
         end if

diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
@@ -1123,11 +1123,6 @@ contains
             end do
         end if
 
-        call s_compute_derived_variables(t_step)
-
-#ifdef DEBUG
-        print *, 'Computed derived vars'
-#endif
         mytime = mytime + dt
 
         ! Total-variation-diminishing (TVD) Runge-Kutta (RK) time-steppers
@@ -1138,7 +1133,6 @@ contains
         if (relax) call s_infinite_relaxation_k(q_cons_ts(1)%vf)
 
         ! Time-stepping loop controls
-
         t_step = t_step + 1
 
     end subroutine s_perform_time_step

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
@@ -46,6 +46,8 @@ module m_time_steppers
 
     use m_body_forces
 
+    use m_derived_variables
+
     implicit none
 
     type(vector_field), allocatable, dimension(:) :: q_cons_ts !<
@@ -533,6 +535,7 @@ contains
 
                 if (probe_wrt) then
                     call s_time_step_cycling(t_step)
+                    call s_compute_derived_variables(t_step, q_cons_ts(1)%vf, q_prim_ts1, q_prim_ts2)
-                    call s_compute_derived_variables(t_step, q_cons_ts(1)%vf, q_prim_ts1, q_prim_ts2)
+                    $:GPU_UPDATE(host='[q_cons_ts(1)%vf,q_prim_ts1,q_prim_ts2]')
-                    call s_compute_derived_variables(t_step, q_cons_ts(1)%vf, q_prim_ts1, q_prim_ts2)
+                    $:GPU_UPDATE(host='[q_cons_ts(1)%vf,q_prim_ts1,q_prim_ts2]')
                 end if
 
                 if (cfl_dt) then

diff --git a/toolchain/bootstrap/modules.sh b/toolchain/bootstrap/modules.sh
@@ -39,13 +39,14 @@ done
 if [ -v $u_c ]; then
     log   "Select a system:"
     log   "$G""ORNL$W:    Ascent     (a) | Frontier (f) | Summit (s) | Wombat (w)"
+    log   "$B""LLNL $W:   Tuolumne   (tuo)"
     log   "$C""ACCESS$W:  Bridges2   (b) | Expanse (e) | Delta  (d) | DeltaAI (dai)"
     log   "$Y""Gatech$W:  Phoenix    (p)"
     log   "$R""Caltech$W: Richardson (r)"
-    log   "$BR""Brown$W: Oscar (o)"
+    log   "$BR""Brown$W:   Oscar (o)"
     log   "$B""DoD$W:     Carpenter Cray (cc) | Carpenter GNU (c) |  Nautilus (n)"
     log   "$OR""Florida$W: HiPerGator (h)"
-    log_n "($G""a$W/$G""f$W/$G""s$W/$G""w$W/$C""b$W/$C""e$CR/$C""d/$C""dai$CR/$Y""p$CR/$R""r$CR/$B""cc$CR/$B""c$CR/$B""n$CR/$BR""o"$CR"/$OR""h"$CR"): "
+    log_n "($G""a$W/$G""f$W/$G""s$W/$G""w$W/$B""tuo$W/$C""b$W/$C""e$CR/$C""d/$C""dai$CR/$Y""p$CR/$R""r$CR/$B""cc$CR/$B""c$CR/$B""n$CR/$BR""o"$CR"/$OR""h"$CR"): "
     read u_c
     log
 fi

diff --git a/toolchain/dependencies/CMakeLists.txt b/toolchain/dependencies/CMakeLists.txt
@@ -130,7 +130,7 @@ if (MFC_HIPFORT)
     if (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
         ExternalProject_Add(hipfort
             GIT_REPOSITORY "https://github.com/ROCmSoftwarePlatform/hipfort"
-            GIT_TAG        rocm-6.0.2
+            GIT_TAG        rocm-6.3.1
             GIT_SHALLOW    ON
             GIT_PROGRESS   ON
             CMAKE_ARGS     "-DHIPFORT_COMPILER=${CMAKE_Fortran_COMPILER}"

diff --git a/toolchain/modules b/toolchain/modules
@@ -45,6 +45,12 @@ f-all cpe/25.03 rocm/6.3.1
 f-all cray-fftw cray-hdf5 python cmake
 f-gpu python craype-accel-amd-gfx90a rocprofiler-compute/3.0.0
 
+tuo     OLCF Tuolumne
+tuo-all cpe/25.03 rocm/6.3.1
+tuo-all cray-fftw/3.3.10.9 cray-hdf5 python/3.12.2 cmake
+tuo-gpu craype-accel-amd-gfx942
+tuo-gpu HSA_XNACK=0
+
 d     NCSA Delta
 d-all python/3.11.6
 d-cpu gcc/11.4.0 openmpi

diff --git a/toolchain/templates/tuo.mako b/toolchain/templates/tuo.mako
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+<%namespace name="helpers" file="helpers.mako"/>
+
+% if engine == 'batch':
+# flux: -N ${nodes}
+# flux: -n ${tasks_per_node*nodes}
+# flux: --job-name="${name}"
+# flux: --output="${name}.out"
+# flux: --error="${name}.err"
+# flux: --time=${walltime}
+# flux: --exclusive
+# flux:--setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
-# flux:--setattr=thp=always
+# flux: --setattr=thp=always
+# flux: --coral2-hugepages=512GB
+% if account:
+# flux: --bank=${account}
+% endif
+% if partition:
+# flux: --queue=${partition}
+% endif
+% endif
+
+${helpers.template_prologue()}
+
+ok ":) Loading modules:\n"
-ok ":) Loading modules:\n"
+printf "%s\n" ":) Loading modules:"
-ok ":) Loading modules:\n"
+printf "%s\n" ":) Loading modules:"
+cd "${MFC_ROOT_DIR}"
+% if engine == 'batch':
+. ./mfc.sh load -c t -m ${'g' if gpu else 'c'}
-. ./mfc.sh load -c t -m ${'g' if gpu else 'c'}
+. ./mfc.sh load -c tuo -m ${'g' if gpu else 'c'}
-. ./mfc.sh load -c t -m ${'g' if gpu else 'c'}
+. ./mfc.sh load -c tuo -m ${'g' if gpu else 'c'}
+% endif
+cd - > /dev/null
+echo
+
+% if gpu:
+    export MPICH_GPU_SUPPORT_ENABLED=1
+% else:
+    export MPICH_GPU_SUPPORT_ENABLED=0
+% endif
+
+export HSA_XNACK=0
+
+% for target in targets:
+    ${helpers.run_prologue(target)}
+
+    % if not mpi:
+        (set -x; ${profiler} "${target.get_install_binpath(case)}")
+    % else:
+        (set -x; flux run \
+            --nodes=${nodes} --ntasks=${tasks_per_node * nodes} \
+            -o spindle.level=off --exclusive \
+            % if gpu:
+                --gpus-per-task 1 \
+            % endif
+            ${profiler} "${target.get_install_binpath(case)}")
+    % endif
+
+    ${helpers.run_epilogue(target)}
+
+    echo
+% endfor
+
+${helpers.template_epilogue()}