From d5134fae002edb177930502fc073c133e3439721 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Fri, 5 Dec 2025 11:16:29 -0800 Subject: [PATCH 1/3] add tuo modules and template --- toolchain/modules | 8 ++++- toolchain/templates/tuo.mako | 63 ++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 toolchain/templates/tuo.mako diff --git a/toolchain/modules b/toolchain/modules index 67655a41df..d733d13d9c 100644 --- a/toolchain/modules +++ b/toolchain/modules @@ -52,6 +52,12 @@ f-all cpe/25.03 rocm/6.3.1 f-all cray-fftw cray-hdf5 cray-python cmake f-gpu craype-accel-amd-gfx90a rocprofiler-compute/3.0.0 +t OLCF Tuolumne +t-all cpe/25.03 rocm/6.3.1 +t-all cray-fftw cray-hdf5 cray-python cmake +t-gpu craype-accel-amd-gfx942 +t-gpu HSA_XNACK=1 + d NCSA Delta d-all python/3.11.6 d-cpu gcc/11.4.0 openmpi @@ -101,4 +107,4 @@ h-all HPC_OMPI_BIN="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin" h-all OMPI_MCA_pml=ob1 OMPI_MCA_coll_hcoll_enable=0 h-gpu PATH="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin:${PATH}" h-all LD_LIBRARY_PATH=/apps/compilers/cuda/12.8.1/lib64:$LD_LIBRARY_PATH -h-gpu MFC_CUDA_CC=100 NVHPC_CUDA_HOME="/apps/compilers/cuda/12.8.1" \ No newline at end of file +h-gpu MFC_CUDA_CC=100 NVHPC_CUDA_HOME="/apps/compilers/cuda/12.8.1" diff --git a/toolchain/templates/tuo.mako b/toolchain/templates/tuo.mako new file mode 100644 index 0000000000..6821b3af1a --- /dev/null +++ b/toolchain/templates/tuo.mako @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +<%namespace name="helpers" file="helpers.mako"/> + +% if engine == 'batch': +# flux: -N ${nodes} +# flux: -n ${tasks_per_node*nodes} +# flux: --job-name="${name}" +# flux: --output="${name}.out" +# flux: --error="${name}.err" +# flux: --time=${walltime} +# flux: --exclusive +# flux:--setattr=thp=always +# flux: --coral2-hugepages=512GB +% if account: +# flux: --bank=${account} +% endif +% if partition: +# flux: --queue=${partition} +% endif +% if unified: +# flux:--setattr=thp=always +# flux: --coral2-hugepages=512GB +% endif +% endif + +${helpers.template_prologue()} + +ok ":) Loading modules:\n" +cd "${MFC_ROOT_DIR}" +% if engine == 'batch': +. ./mfc.sh load -c t -m ${'g' if gpu else 'c'} +% endif +cd - > /dev/null +echo + +% if gpu: + export MPICH_GPU_SUPPORT_ENABLED=1 +% else: + export MPICH_GPU_SUPPORT_ENABLED=0 +% endif + +% for target in targets: + ${helpers.run_prologue(target)} + + % if not mpi: + (set -x; ${profiler} "${target.get_install_binpath(case)}") + % else: + (set -x; flux run \ + --nodes=${nodes} --ntasks=${tasks_per_node * nodes} \ + --exclusive \ + % if gpu: + --gpus-per-task 1 \ + % endif + ${profiler} "${target.get_install_binpath(case)}") + % endif + + ${helpers.run_epilogue(target)} + + echo +% endfor + +${helpers.template_epilogue()} From e23561256c0f1878fa9c9a97403b64d35948119c Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Fri, 5 Dec 2025 14:30:37 -0500 Subject: [PATCH 2/3] Add tuo to ./mfc.sh load --- toolchain/bootstrap/modules.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/toolchain/bootstrap/modules.sh b/toolchain/bootstrap/modules.sh index 2a0245fc80..fbe28a9598 100644 --- a/toolchain/bootstrap/modules.sh +++ b/toolchain/bootstrap/modules.sh @@ -39,12 +39,13 @@ done if [ -v $u_c ]; then log "Select a system:" log "$G""ORNL$W: Ascent (a) | Frontier (f) | Summit (s) | Wombat (w)" + log "$B""LLNL $W: Tuolumne (t)" log "$C""ACCESS$W: Bridges2 (b) | Expanse (e) | Delta (d) | DeltaAI (dai)" log "$Y""Gatech$W: Phoenix (p)" log "$R""Caltech$W: Richardson (r)" - log "$BR""Brown$W: Oscar (o)" + log "$BR""Brown$W: Oscar (o)" log "$B""DoD$W: Carpenter Cray (cc) | Carpenter GNU (c) | Nautilus (n)" - log_n "($G""a$W/$G""f$W/$G""s$W/$G""w$W/$C""b$W/$C""e$CR/$C""d/$C""dai$CR/$Y""p$CR/$R""r$CR/$B""cc$CR/$B""c$CR/$B""n$CR/$BR""o"$CR"): " + log_n "($G""a$W/$G""f$W/$G""s$W/$G""w$W/$B""t$W/$C""b$W/$C""e$CR/$C""d/$C""dai$CR/$Y""p$CR/$R""r$CR/$B""cc$CR/$B""c$CR/$B""n$CR/$BR""o"$CR"): " read u_c log fi From 0c827ab92e577f35b6a517e9a169286fcc59e430 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Fri, 19 Dec 2025 09:35:53 -0800 Subject: [PATCH 3/3] fixes for --debug build --- src/common/m_compute_levelset.fpp | 3 +-- src/common/m_model.fpp | 2 +- src/common/m_variables_conversion.fpp | 3 +-- src/simulation/m_data_output.fpp | 2 +- src/simulation/m_derived_variables.fpp | 8 ++++---- src/simulation/m_ibm.fpp | 2 +- src/simulation/m_start_up.fpp | 6 ------ src/simulation/m_time_steppers.fpp | 3 +++ toolchain/bootstrap/modules.sh | 4 ++-- toolchain/dependencies/CMakeLists.txt | 2 +- toolchain/modules | 10 +++++----- toolchain/templates/tuo.mako | 8 +++----- 12 files changed, 23 insertions(+), 30 deletions(-) diff --git a/src/common/m_compute_levelset.fpp b/src/common/m_compute_levelset.fpp index f343c0c47c..951c00a641 100644 --- a/src/common/m_compute_levelset.fpp +++ b/src/common/m_compute_levelset.fpp @@ -552,9 +552,8 @@ contains end if else levelset%sf(i, j, k, ib_patch_id) = dist_surface - xyz_local = xyz_local*dist_surface_vec - xyz_local = xyz_local/norm2(xyz_local) + xyz_local = xyz_local/max(norm2(xyz_local), sgm_eps) levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, xyz_local) end if end do diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 0ae508a172..ec95b8c608 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -678,7 +678,7 @@ contains do i = 1, boundary_edge_count boundary_edge(1) = boundary_v(i, 2, 1) - boundary_v(i, 1, 1) boundary_edge(2) = boundary_v(i, 2, 2) - boundary_v(i, 1, 2) - edgetan = boundary_edge(1)/boundary_edge(2) + edgetan = boundary_edge(1)/max(sgm_eps, boundary_edge(2)) if (abs(boundary_edge(2)) < threshold_vector_zero) then if (edgetan > 0._wp) then diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 0f5654f3e5..6de73c0186 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -1377,8 +1377,7 @@ contains #ifndef MFC_PRE_PROCESS subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c, qv) - $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', & - & parallelism='[seq]', cray_inline=True) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pres real(wp), intent(in) :: rho, gamma, pi_inf, qv diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 2ddae9bc30..7c13fd7774 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -1233,7 +1233,7 @@ contains q_cons_vf(mom_idx%beg)%sf(j - 2, k, l), G_local) else call s_compute_pressure( & - q_cons_vf(1)%sf(j - 2, k, l), & + q_cons_vf(E_idx)%sf(j - 2, k, l), & q_cons_vf(alf_idx)%sf(j - 2, k, l), & dyn_p, pi_inf, gamma, rho, qv, rhoYks(:), pres, T) end if diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index 0b9cf5cbc1..81cd185c4c 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -19,8 +19,6 @@ module m_derived_variables use m_data_output !< Data output module - use m_time_steppers !< Time-stepping algorithms - use m_compile_specific use m_helper @@ -120,9 +118,11 @@ contains !> Writes coherent body information, communication files, and probes. !! @param t_step Current time-step - subroutine s_compute_derived_variables(t_step) + subroutine s_compute_derived_variables(t_step, q_cons_vf, q_prim_ts1, q_prim_ts2) integer, intent(in) :: t_step + type(scalar_field), dimension(:), intent(inout) :: q_cons_vf + type(vector_field), dimension(:), intent(inout) :: q_prim_ts1, q_prim_ts2 integer :: i, j, k !< Generic loop iterators if (probe_wrt) then @@ -169,7 +169,7 @@ contains call s_derive_center_of_mass(q_prim_ts2(2)%vf, c_mass) - call s_write_probe_files(t_step, q_cons_ts(1)%vf, accel_mag) + call s_write_probe_files(t_step, q_cons_vf, accel_mag) call s_write_com_files(t_step, c_mass) end if diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index aff2a1882a..267b3be665 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -979,7 +979,7 @@ contains end subroutine s_finalize_ibm_module function cross_product(a, b) result(c) - implicit none + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: a(3), b(3) real(wp) :: c(3) diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index eb5fd3179e..3f750a009e 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1121,11 +1121,6 @@ contains end do end if - call s_compute_derived_variables(t_step) - -#ifdef DEBUG - print *, 'Computed derived vars' -#endif mytime = mytime + dt ! Total-variation-diminishing (TVD) Runge-Kutta (RK) time-steppers @@ -1136,7 +1131,6 @@ contains if (relax) call s_infinite_relaxation_k(q_cons_ts(1)%vf) ! Time-stepping loop controls - t_step = t_step + 1 end subroutine s_perform_time_step diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index f77666f81f..957759530c 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -46,6 +46,8 @@ module m_time_steppers use m_body_forces + use m_derived_variables + implicit none type(vector_field), allocatable, dimension(:) :: q_cons_ts !< @@ -533,6 +535,7 @@ contains if (probe_wrt) then call s_time_step_cycling(t_step) + call s_compute_derived_variables(t_step, q_cons_ts(1)%vf, q_prim_ts1, q_prim_ts2) end if if (cfl_dt) then diff --git a/toolchain/bootstrap/modules.sh b/toolchain/bootstrap/modules.sh index fbe28a9598..304845de85 100644 --- a/toolchain/bootstrap/modules.sh +++ b/toolchain/bootstrap/modules.sh @@ -39,13 +39,13 @@ done if [ -v $u_c ]; then log "Select a system:" log "$G""ORNL$W: Ascent (a) | Frontier (f) | Summit (s) | Wombat (w)" - log "$B""LLNL $W: Tuolumne (t)" + log "$B""LLNL $W: Tuolumne (tuo)" log "$C""ACCESS$W: Bridges2 (b) | Expanse (e) | Delta (d) | DeltaAI (dai)" log "$Y""Gatech$W: Phoenix (p)" log "$R""Caltech$W: Richardson (r)" log "$BR""Brown$W: Oscar (o)" log "$B""DoD$W: Carpenter Cray (cc) | Carpenter GNU (c) | Nautilus (n)" - log_n "($G""a$W/$G""f$W/$G""s$W/$G""w$W/$B""t$W/$C""b$W/$C""e$CR/$C""d/$C""dai$CR/$Y""p$CR/$R""r$CR/$B""cc$CR/$B""c$CR/$B""n$CR/$BR""o"$CR"): " + log_n "($G""a$W/$G""f$W/$G""s$W/$G""w$W/$B""tuo$W/$C""b$W/$C""e$CR/$C""d/$C""dai$CR/$Y""p$CR/$R""r$CR/$B""cc$CR/$B""c$CR/$B""n$CR/$BR""o"$CR"): " read u_c log fi diff --git a/toolchain/dependencies/CMakeLists.txt b/toolchain/dependencies/CMakeLists.txt index aae2a3cad5..cf4f58fa50 100644 --- a/toolchain/dependencies/CMakeLists.txt +++ b/toolchain/dependencies/CMakeLists.txt @@ -130,7 +130,7 @@ if (MFC_HIPFORT) if (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") ExternalProject_Add(hipfort GIT_REPOSITORY "https://github.com/ROCmSoftwarePlatform/hipfort" - GIT_TAG rocm-6.0.2 + GIT_TAG rocm-6.3.1 GIT_SHALLOW ON GIT_PROGRESS ON CMAKE_ARGS "-DHIPFORT_COMPILER=${CMAKE_Fortran_COMPILER}" diff --git a/toolchain/modules b/toolchain/modules index 365a7dfde3..2f99f66c61 100644 --- a/toolchain/modules +++ b/toolchain/modules @@ -45,11 +45,11 @@ f-all cpe/25.03 rocm/6.3.1 f-all cray-fftw cray-hdf5 python cmake f-gpu python craype-accel-amd-gfx90a rocprofiler-compute/3.0.0 -t OLCF Tuolumne -t-all cpe/25.03 rocm/6.3.1 -t-all cray-fftw cray-hdf5 cray-python cmake -t-gpu craype-accel-amd-gfx942 -t-gpu HSA_XNACK=1 +tuo OLCF Tuolumne +tuo-all cpe/25.03 rocm/6.3.1 +tuo-all cray-fftw/3.3.10.9 cray-hdf5 python/3.12.2 cmake +tuo-gpu craype-accel-amd-gfx942 +tuo-gpu HSA_XNACK=0 d NCSA Delta d-all python/3.11.6 diff --git a/toolchain/templates/tuo.mako b/toolchain/templates/tuo.mako index 6821b3af1a..bb09a8d906 100644 --- a/toolchain/templates/tuo.mako +++ b/toolchain/templates/tuo.mako @@ -18,10 +18,6 @@ % if partition: # flux: --queue=${partition} % endif -% if unified: -# flux:--setattr=thp=always -# flux: --coral2-hugepages=512GB -% endif % endif ${helpers.template_prologue()} @@ -40,6 +36,8 @@ echo export MPICH_GPU_SUPPORT_ENABLED=0 % endif +export HSA_XNACK=0 + % for target in targets: ${helpers.run_prologue(target)} @@ -48,7 +46,7 @@ echo % else: (set -x; flux run \ --nodes=${nodes} --ntasks=${tasks_per_node * nodes} \ - --exclusive \ + -o spindle.level=off --exclusive \ % if gpu: --gpus-per-task 1 \ % endif